# Objective

This experiment checks the following for a simple convnet on MNIST.
1. Verify Agop and NFM relations for the conv layers
2. Run RFM to construct similar matrices as the above.(TBD)

The model is taken from MNIST/model2

# Setup

In [1]:
import os
model_dir= os.path.join('/work/DLR','trained_models', 'MNIST', 'model2', 'nn_models/')


In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from utils import agop_conv as agc
from torch.utils.data import Dataset
import random
import torch.backends.cudnn as cudnn
import rfm
import numpy as np
from trained_models.MNIST.model2 import trainer as t
import numpy as np
from sklearn.model_selection import train_test_split
from torch.linalg import norm
from torchvision import models
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from copy import deepcopy

Setting up a new session...
Without the incoming socket you cannot receive events from the server or register event handlers to your Visdom client.


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device='cpu'
print(f"Using device: {device}")

Using device: cuda:0


In [16]:
torch.cuda.empty_cache()

In [5]:
trainloader, valloader, testloader = t.get_loaders()
net= t.get_untrained_net()
init_net= deepcopy(net)
import os
if os.path.exists(model_dir+'mnist_conv_trained_nn.pth'):
    checkpoint = torch.load(model_dir+'mnist_conv_trained_nn.pth', map_location=torch.device(device), weights_only=True)
    net.load_state_dict(checkpoint['state_dict'])  # Access the 'state_dict' within the loaded dictionary
    print("Model weights loaded successfully.")
else:
    print("Train the network first")

Model weights loaded successfully.


# Verify NFA for conv layers:

In [6]:
G = agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=0)

Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([9, 9])
Correlation between Initial and Trained CNFM:  tensor(0.8931, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Initial CNFM and Trained AGOP:  tensor(0.8596, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Trained CNFM and Trained AGOP:  tensor(0.9853, device='cuda:0')


In [7]:
# Here layer_idx refers to layer_idx+1 th conv layer. 
G= agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=1)

Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([288, 288])
Correlation between Initial and Trained CNFM:  tensor(0.2294, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Initial CNFM and Trained AGOP:  tensor(0.3062, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Trained CNFM and Trained AGOP:  tensor(0.9106, device='cuda:0')


In [21]:
agc.vis_transform_image(net, imgs[0], None, layer_idx=1)
agc.vis_transform_image(net, imgs[0], G, layer_idx=1)

torch.Size([64, 32, 3, 3])
Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 0
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 1
torch.Size([64, 14, 14, 32, 3, 3])
torch.Size([64, 32, 3, 3])
Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 0
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 1
torch.Size([64, 14, 14, 32, 3, 3])


# Init AGOP vs Init CNFM

In [13]:
import torch.nn.init as init
print(init_net)

rand = torch.empty(init_net.features[0].weight.shape)
init.uniform_(rand, a=100.0, b=1000.0)
init_net.features[0].weight= nn.Parameter(rand)

rand = torch.empty(init_net.features[3].weight.shape)
init.uniform_(rand, a=300.0, b=500.0)
init_net.features[3].weight= nn.Parameter(rand)

ConvNet(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [15]:
agc.verify_NFA(init_net.to(device), init_net.to(device), trainloader, layer_idx=0)

Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([9, 9])
Correlation between Initial and Trained CNFM:  tensor(1.0000, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Initial CNFM and Trained AGOP:  tensor(0.9769, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Trained CNFM and Trained AGOP:  tensor(0.9769, device='cuda:0')


tensor([[8.1617e+07, 8.3983e+07, 7.5531e+07, 7.2400e+07, 6.6662e+07, 7.8938e+07,
         7.5001e+07, 7.9150e+07, 6.4897e+07],
        [8.3983e+07, 1.0506e+08, 8.8824e+07, 8.1690e+07, 7.8068e+07, 8.8005e+07,
         9.0400e+07, 9.0718e+07, 7.3648e+07],
        [7.5531e+07, 8.8824e+07, 9.7246e+07, 7.8248e+07, 7.3248e+07, 8.2366e+07,
         8.3714e+07, 8.4572e+07, 6.6701e+07],
        [7.2401e+07, 8.1690e+07, 7.8248e+07, 8.1883e+07, 7.0577e+07, 7.4775e+07,
         7.5540e+07, 7.8246e+07, 6.4738e+07],
        [6.6664e+07, 7.8068e+07, 7.3248e+07, 7.0577e+07, 7.5668e+07, 7.5522e+07,
         7.5591e+07, 7.5660e+07, 6.5375e+07],
        [7.8937e+07, 8.8005e+07, 8.2366e+07, 7.4775e+07, 7.5522e+07, 9.7536e+07,
         9.0096e+07, 8.7065e+07, 7.6509e+07],
        [7.5001e+07, 9.0400e+07, 8.3714e+07, 7.5540e+07, 7.5591e+07, 9.0096e+07,
         1.0382e+08, 9.1732e+07, 7.2190e+07],
        [7.9150e+07, 9.0718e+07, 8.4572e+07, 7.8246e+07, 7.5660e+07, 8.7065e+07,
         9.1732e+07, 1.0059e+0

In [14]:
agc.verify_NFA(init_net.to(device), init_net.to(device), trainloader, layer_idx=1)

Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([288, 288])
Correlation between Initial and Trained CNFM:  tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Initial CNFM and Trained AGOP:  tensor(0.2695, device='cuda:0', grad_fn=<DivBackward0>)
Correlation between Trained CNFM and Trained AGOP:  tensor(0.2695, device='cuda:0')


tensor([[896.3096, 391.1142, 378.9974,  ..., 287.5947, 357.4277, 434.5674],
        [387.7697, 787.4019, 310.4827,  ..., 245.9232, 381.1740, 403.9646],
        [379.6405, 311.6460, 861.4999,  ..., 395.1466, 374.5538, 372.5050],
        ...,
        [284.9590, 245.2186, 394.3567,  ..., 801.1650, 353.5975, 309.3864],
        [356.6096, 379.0543, 378.0798,  ..., 354.6365, 750.3665, 343.2893],
        [435.0036, 403.7472, 372.4563,  ..., 307.5289, 340.9539, 790.8931]])

In [None]:
print("hello")