# Kernel Calculation Test
This notebook is used to test the functionality of the kernel calc methods that (if working) will be rewritten in Python script.

In [23]:
import torch
import resources as rs

### Loading Model Data

In [24]:
model_path = 'ex_models.pt'
state_dict = torch.load(model_path, map_location=torch.device('cpu')) # MODEL

### Unpacking the params

**NOTE:** 
The model was trained on the MNIST digits of 0 1

Unpacking the features params

In [25]:
import torch.nn as nn

# get the weights and biases of the quantized model (for the features layer)
f_weights_quant = state_dict['features.hidden_layer._packed_params._packed_params'][0]
f_bias_quant = state_dict['features.hidden_layer._packed_params._packed_params'][1]

# dequantize the weights and biases
f_weights_float = torch.dequantize(f_weights_quant)
f_bias_float = torch.dequantize(f_bias_quant)
# print the float values of weights and biases
print(f_weights_float)
print(f_bias_float)

tensor([[ 0.0212, -0.0311, -0.0050,  ..., -0.0343, -0.0193,  0.0000],
        [ 0.0112, -0.0050,  0.0149,  ..., -0.0093, -0.0106,  0.0336],
        [-0.0324, -0.0006, -0.0062,  ...,  0.0311,  0.0137, -0.0324],
        ...,
        [-0.0305,  0.0168, -0.0293,  ...,  0.0237, -0.0280, -0.0112],
        [-0.0349,  0.0056, -0.0174,  ..., -0.0093, -0.0031,  0.0212],
        [ 0.0349,  0.0168, -0.0206,  ..., -0.0149,  0.0044, -0.0187]])
Parameter containing:
tensor([ 0.0235, -0.0103, -0.0210,  ..., -0.0244,  0.0028, -0.0313],
       grad_fn=<NotImplemented>)


Unpacking the readout params.

In [26]:
# get the weights and biases of the quantized model (for the readout layer)
r_weights_quant = state_dict['readout._packed_params._packed_params'][0]
r_bias_quant = state_dict['readout._packed_params._packed_params'][1]

# dequantize the weights and bises
r_weights_float = torch.dequantize(r_weights_quant)
r_bias_float = torch.dequantize(r_bias_quant)

print(r_weights_float)
print(r_bias_float)

tensor([[ 0.0219,  0.0389,  0.0024,  ..., -0.0073,  0.0182, -0.0158],
        [-0.0195, -0.0219, -0.0024,  ...,  0.0073, -0.0170,  0.0134]])
Parameter containing:
tensor([0.4623, 0.4824], grad_fn=<NotImplemented>)


### Manually updating the model

In [27]:
model = rs.NN()
model

NN(
  (features): Sequential(
    (hidden_layer): Linear(in_features=784, out_features=2048, bias=True)
    (hidden_activation): ReLU()
  )
  (readout): Linear(in_features=2048, out_features=2, bias=True)
)

In [28]:
params = list(model.parameters())
params

[Parameter containing:
 tensor([[ 0.0225, -0.0102, -0.0162,  ..., -0.0345, -0.0016, -0.0135],
         [ 0.0154,  0.0070,  0.0239,  ..., -0.0002, -0.0327,  0.0221],
         [ 0.0165,  0.0316,  0.0331,  ...,  0.0279, -0.0249,  0.0317],
         ...,
         [ 0.0123, -0.0321,  0.0080,  ..., -0.0237,  0.0114, -0.0168],
         [-0.0163, -0.0265, -0.0123,  ...,  0.0102, -0.0143,  0.0307],
         [ 0.0350,  0.0323,  0.0201,  ..., -0.0036,  0.0276,  0.0264]],
        requires_grad=True),
 Parameter containing:
 tensor([ 0.0306,  0.0139,  0.0259,  ...,  0.0266, -0.0032, -0.0260],
        requires_grad=True),
 Parameter containing:
 tensor([[-0.0169, -0.0136, -0.0182,  ...,  0.0027,  0.0116,  0.0066],
         [ 0.0040,  0.0094,  0.0049,  ..., -0.0106,  0.0022, -0.0215]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.0002, -0.0009], requires_grad=True)]

In [29]:
params[0].data = f_weights_float
params[1].data = f_bias_float
params[2].data = r_weights_float
params[3].data = r_bias_float

### Loading MNIST dataset

In [30]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
def mnist_dataset(batch_size, train=True, values=list(range(10))):
    # Initializing MNIST data set.
    dataset = datasets.MNIST(root='dataset/', train=train, transform=transforms.ToTensor(), download=True)

    targets_list = dataset.targets.tolist()
    values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

    # Creating a subset of ### MNIST targets.
    subset = torch.utils.data.Subset(dataset, values_index)
    loader = DataLoader(dataset=subset, shuffle=True)

    return loader

In [31]:
MNIST = rs.mnist_dataset(batch_size=0, train=True, values=[0,1])
data, targets = next(iter(MNIST))

Next, in order to perform the CKA calc. we will need to reshape the data into a batch_size X features tenso (12665, 784).

In [32]:
data = torch.squeeze(data, dim=1)
data = data.view(data.size(0), -1)
data.shape

torch.Size([12665, 784])

### CKA Calc.
At this point we can now calculate the CKA for the model state.

In [33]:
model.features(data).shape

torch.Size([12665, 1024])

In [34]:
targets.shape

torch.Size([12665])

In [35]:
rs.kernel_calc(targets, model.features(data))

tensor(0.9497, grad_fn=<DivBackward0>)

#TODO using this notebook create a python script that will do this for each of the model states (135 * 512)...

### Accuracy

In [37]:
import torch.optim as optim

In [38]:
device = torch.device('cpu')
loss = nn.MSELoss()
model.eval()
losses = rs.train(MNIST, device, model, loss, values=[0, 1], backwards=False, record_loss=True)

In [39]:
losses

8.250704013335053e-06

### CKA Test.

In [52]:
import time
def kernel_calc(y, phi):
    # Output Kernel
    start = time.time()
    y = torch.t(torch.unsqueeze(y, -1))
    end = time.time()
    print(f"UNSQUEEZE: {end - start}s")
    
    start = time.time()
    K1 = torch.matmul(torch.t(y), y)
    end = time.time()
    print(f"K1 MATMUL: {end - start}s")
    
    start = time.time()
    K1c = kernel_centering(K1.float())
    end = time.time()
    print(f"K1 CENTERING: {end - start}")

    # Feature Kernel
    start = time.time()
    K2 = torch.mm(phi, torch.t(phi))
    end = time.time()
    print(f"K2 MM: {end - start}")
    
    start = time.time()
    K2c = kernel_centering(K2)
    end = time.time()
    print(f"K2 CENTERING: {end - start}")

    start = time.time()
    ka = kernel_alignment(K1c, K2c)
    end = time.time()
    print(f"ALIGNMENT {end - start}")
    return ka


def frobenius_product(K1, K2):
    return torch.sum(K1 * K2)
    #return torch.trace(torch.mm(K2, torch.t(K1)))


def kernel_alignment(K1, K2):
    return frobenius_product(K1, K2) / ((torch.norm(K1, p='fro') * torch.norm(K2, p='fro')))


def kernel_centering(K):
#    # Lemmna 1
#    
#    print("=" * 80)
#    print("CENTERING")
#
#    start = time.time()
#    # I - ll^T / m
#    m = K.size()[0]
#    I = torch.eye(m)
#    l = torch.ones(m, 1)
#    mat = I - torch.matmul(l, torch.t(l)) / m
#    end = time.time()
#    print(f"I MATRIX: {end - start}s")
#
#    start = time.time()
#    j = torch.matmul(torch.matmul(mat, K), mat)
#    end = time.time()
#    print(f"MATMUL I: {end - start}s")
#    print("=" * 80)
#    return j
    row_means = K.mean(dim=1, keepdim=True)
    col_means = K.mean(dim=0, keepdim=True)
    total_mean = K.mean()
    
    K_centered = K - row_means - col_means + total_mean

In [53]:
start = time.time()
cka = kernel_calc(targets, model.features(data))
end = time.time()
print(f"TOTAL TIME: {end - start}s")

UNSQUEEZE: 0.0005209445953369141s
K1 MATMUL: 1.5471091270446777s
K1 CENTERING: 1.5976200103759766
K2 MM: 1.0305120944976807
K2 CENTERING: 0.6956150531768799


TypeError: unsupported operand type(s) for *: 'NoneType' and 'NoneType'

In [51]:
cka

tensor(0.9497, grad_fn=<DivBackward0>)