<a href="https://colab.research.google.com/github/alexgill321/EKFAC-Influence-Benchmarks/blob/main/PBRF_OVER_cancer_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
x = data['data']
y = data['target']
print("shape of x: {}\nshape of y: {}".format(x.shape,y.shape))

shape of x: (569, 30)
shape of y: (569,)


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

In [49]:

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)


from torch.utils.data import Dataset, DataLoader
import torch

class dataset(Dataset):
  def __init__(self,x,y):
    self.x = torch.tensor(x,dtype=torch.float32)
    self.y = torch.tensor(y,dtype=torch.float32)
    self.length = self.x.shape[0]

  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]
  def __len__(self):
    return self.length

trainset = dataset(X_train,Y_train)
trainloader = DataLoader(trainset,batch_size=60,shuffle=False)


valset = dataset(X_test,Y_test)
valloader = DataLoader(valset,batch_size=60,shuffle=False)

In [59]:
from torch import nn
from torch.nn import functional as F
class Net(nn.Module):
  def __init__(self,input_shape):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_shape,32)
    self.fc2 = nn.Linear(32,64)
    self.fc3 = nn.Linear(64,1)
  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)
    return x


In [60]:
from torch import optim
network = Net(x.shape[1])
optimizer = optim.Adam(network.parameters(), lr=0.001)

In [61]:
# Define training function
def train(model, train_loader, optimizer, criterion, get_preds_only = False):
    model.train()
    running_loss = 0.0

    if get_preds_only == False:

      output_grads = []
      for inputs, labels in train_loader:
          optimizer.zero_grad()
          outputs = model(inputs)
          loss = criterion(outputs, labels.view(-1, 1))  # Assuming binary classification

          outputs.retain_grad()
          loss.backward()

          output_grads.append(outputs.grad)
          optimizer.step()

          running_loss += loss.item()
      return running_loss / len(train_loader), output_grads

    else:
      model.eval()
      all_preds_array = []
      for inputs, labels in train_loader:
        outputs = model(inputs)
        all_preds_array.append(outputs)
      model.train()
      return all_preds_array

# Define validation function
def validate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels.view(-1, 1))
            running_loss += loss.item()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels.view(-1, 1)).sum().item()
            total += labels.size(0)
    val_loss = running_loss / len(val_loader)
    val_acc = correct / total
    return val_loss, val_acc

# Training loop
num_epochs = 20
criterion = nn.BCEWithLogitsLoss()  # Binary Cross Entropy Loss


training_preds_on_untrained_model = train(network, trainloader, optimizer, criterion, get_preds_only = True)


output_grads_global = []



for epoch in range(num_epochs):

    train_loss, output_grads = train(network, trainloader, optimizer, criterion)
    if epoch == 1:
      untrained_model_params = network.fc1.weight.grad

    output_grads_global = output_grads

    val_loss, val_accuracy = validate(network, valloader, criterion)
    print(f"Epoch [{epoch + 1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2%}")

training_preds_on_trained_model = train(network, trainloader, optimizer, criterion, get_preds_only = True)
trained_model_params = network.fc1.weight.grad


print(torch.equal(untrained_model_params, trained_model_params))

Epoch [1/20] - Train Loss: 0.6897, Val Loss: 0.6535, Val Accuracy: 90.21%
Epoch [2/20] - Train Loss: 0.6245, Val Loss: 0.5867, Val Accuracy: 90.91%
Epoch [3/20] - Train Loss: 0.5485, Val Loss: 0.5036, Val Accuracy: 93.01%
Epoch [4/20] - Train Loss: 0.4566, Val Loss: 0.4070, Val Accuracy: 93.01%
Epoch [5/20] - Train Loss: 0.3570, Val Loss: 0.3136, Val Accuracy: 93.01%
Epoch [6/20] - Train Loss: 0.2663, Val Loss: 0.2413, Val Accuracy: 93.01%
Epoch [7/20] - Train Loss: 0.1982, Val Loss: 0.1941, Val Accuracy: 93.01%
Epoch [8/20] - Train Loss: 0.1533, Val Loss: 0.1650, Val Accuracy: 92.31%
Epoch [9/20] - Train Loss: 0.1253, Val Loss: 0.1463, Val Accuracy: 93.71%
Epoch [10/20] - Train Loss: 0.1071, Val Loss: 0.1333, Val Accuracy: 93.01%
Epoch [11/20] - Train Loss: 0.0945, Val Loss: 0.1240, Val Accuracy: 93.71%
Epoch [12/20] - Train Loss: 0.0853, Val Loss: 0.1172, Val Accuracy: 93.71%
Epoch [13/20] - Train Loss: 0.0784, Val Loss: 0.1122, Val Accuracy: 93.71%
Epoch [14/20] - Train Loss: 0.0732

In [62]:
print((training_preds_on_trained_model[0][4]))

tensor([-2.1708], grad_fn=<SelectBackward0>)


## **Take Initial PBRF over untrained dataset**

In [63]:
def calculate_bergman_divergance(trainloader, example_prediction, trained_model_example_prediction):

  #let's do only for one batch, hence break
  for inputs, labels in trainloader:

    itr = 0

    for individual_inputs_in_batch, individual_label_in_batch in zip(inputs, labels):

      loss_on_untrained_examples = criterion(example_prediction[0][itr], individual_label_in_batch.unsqueeze(dim = 0))

      loss_on_trained_examples = criterion(trained_model_example_prediction[0][itr], individual_label_in_batch.unsqueeze(dim = 0))

      #grad of final prediction wrt loss (not sure):
      output_grads_colum_vector = output_grads_global[0][itr].t()

      #difference_in_preds_before_vs_after_training
      output_difference_vector = example_prediction[0][itr] - trained_model_example_prediction[0][itr]

      output_matmul = output_grads_colum_vector * output_difference_vector

      final_bergman_for_current_input = loss_on_untrained_examples  - loss_on_trained_examples - output_matmul

      print(final_bergman_for_current_input)


      itr+=1
      break
    break


  return final_bergman_for_current_input



def pbrf_from_bergman(bergman_divergance):

  pbrf_for_curr_example = bergman_divergance.item() - torch.sum(torch.square(untrained_model_params - trained_model_params))
  return []



one_example_bergman_cov = calculate_bergman_divergance(trainloader, training_preds_on_untrained_model, training_preds_on_trained_model)
pbrf_from_bergman(one_example_bergman_cov)



tensor([0.7209], grad_fn=<SubBackward0>)
tensor([[ 3.5539e-02, -2.4995e-03,  3.5185e-02,  3.1117e-02,  1.3292e-02,
          1.9750e-02,  1.4400e-02,  2.4263e-02,  1.9477e-02, -2.1316e-02,
          1.2991e-02, -8.8752e-02,  1.3538e-02,  1.6757e-02,  6.4057e-03,
          1.0079e-02,  1.5489e-03,  2.3873e-02, -1.7038e-02,  6.8573e-04,
          3.5793e-02, -7.5127e-03,  3.5941e-02,  3.1242e-02,  1.9469e-02,
          1.8692e-02,  1.3061e-02,  3.0947e-02,  2.4895e-02,  8.3003e-04],
        [-1.3182e-03, -8.9619e-04, -1.1012e-03, -1.6254e-03,  4.7391e-03,
          2.8862e-03,  1.3953e-03,  1.9101e-04,  3.1318e-03,  2.6078e-03,
         -3.3650e-03, -1.0149e-02, -2.9982e-03, -2.4240e-03,  5.8581e-03,
          3.3209e-03,  2.6222e-03,  2.7097e-03, -1.7009e-03,  3.4707e-03,
         -2.3673e-03,  1.0393e-03, -2.2478e-03, -2.7163e-03,  6.4336e-03,
          3.0606e-03,  2.5300e-03,  5.7849e-04,  4.3197e-03,  4.5485e-03],
        [-1.6142e-02, -4.7555e-03, -1.5125e-02, -1.5518e-02,  5.7431e

[]

In [22]:
import torch

# Assuming you have the true and predicted values
Y = torch.tensor([1.0, 2.0, 3.0])  # Example true values
Y_hat = torch.tensor([1.2, 1.8, 2.9], requires_grad=True)  # Example predicted values

# Assuming a simple loss function (mean squared error)
loss_function = torch.nn.MSELoss()

# Calculating the loss
loss = loss_function(Y_hat, Y)
print(Y_hat.grad)
loss.backward()  # Compute gradients

# Get the gradient with respect to predicted values
gradient_wrt_Yhat = Y_hat.grad
print(gradient_wrt_Yhat)

# Transpose the gradient (reshaping it to a column vector)
gradient_transposed = gradient_wrt_Yhat.view(-1, 1)

print("Gradient with respect to Y_hat:\n", gradient_wrt_Yhat)
print("Transposed Gradient:\n", gradient_transposed)

None
tensor([ 0.1333, -0.1333, -0.0667])
Gradient with respect to Y_hat:
 tensor([ 0.1333, -0.1333, -0.0667])
Transposed Gradient:
 tensor([[ 0.1333],
        [-0.1333],
        [-0.0667]])
