# Objective

This experiment checks the following for a simple convnet on MNIST.
1. Verify Agop and NFM relations for the conv layers
2. Run RFM to construct similar matrices as the above.(TBD)

The model is taken from MNIST/model2

# Setup

In [1]:
import os
model_dir= os.path.join('/work/DLR','trained_models', 'MNIST', 'model2', 'nn_models/')


In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from utils import agop_conv as agc
from torch.utils.data import Dataset
import random
import torch.backends.cudnn as cudnn
import rfm
import numpy as np
from trained_models.MNIST.model2 import trainer as t
import numpy as np
from sklearn.model_selection import train_test_split
from torch.linalg import norm
from torchvision import models
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from copy import deepcopy

Setting up a new session...
Without the incoming socket you cannot receive events from the server or register event handlers to your Visdom client.


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device='cpu'
print(f"Using device: {device}")

Using device: cuda:0


In [4]:
torch.cuda.empty_cache()

In [7]:
trainloader, valloader, testloader = t.get_loaders()
net= t.get_untrained_net()
init_net= deepcopy(net)
import os
if os.path.exists(model_dir+'mnist_conv_trained_nn.pth'):
    checkpoint = torch.load(model_dir+'mnist_conv_trained_nn.pth', map_location=torch.device(device), weights_only=True)
    net.load_state_dict(checkpoint['state_dict'])  # Access the 'state_dict' within the loaded dictionary
    print("Model weights loaded successfully.")
else:
    print("Train the network first")

Model weights loaded successfully.


# Verify NFA for conv layers:

In [8]:
G = agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=0)

Correlation between Initial and Trained CNFM:  tensor(0.7726, device='cuda:0', grad_fn=<DivBackward0>)
Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([9, 9])
Correlation between Trained CNFM and AGOP:  tensor(0.9851, device='cuda:0')
Final:  tensor(0.7726, device='cuda:0', grad_fn=<DivBackward0>) tensor(0.9851, device='cuda:0')


In [5]:
# Here layer_idx refers to layer_idx+1 th conv layer. 
G= agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=1)

Correlation between Initial and Trained CNFM:  tensor(0.2294, device='cuda:0', grad_fn=<DivBackward0>)
Computing GOP for sample 0 out of 10
Computing GOP for sample 1 out of 10
Computing GOP for sample 2 out of 10
Computing GOP for sample 3 out of 10
Computing GOP for sample 4 out of 10
Computing GOP for sample 5 out of 10
Computing GOP for sample 6 out of 10
Computing GOP for sample 7 out of 10
Computing GOP for sample 8 out of 10
Computing GOP for sample 9 out of 10
Computing GOP for sample 10 out of 10
Shpae after gradients:  torch.Size([288, 288])
Correlation between Trained CNFM and AGOP:  tensor(0.9106, device='cuda:0')
Final:  tensor(0.2294, device='cuda:0', grad_fn=<DivBackward0>) tensor(0.9106, device='cuda:0')


In [21]:
agc.vis_transform_image(net, imgs[0], None, layer_idx=1)
agc.vis_transform_image(net, imgs[0], G, layer_idx=1)

torch.Size([64, 32, 3, 3])
Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 0
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 1
torch.Size([64, 14, 14, 32, 3, 3])
torch.Size([64, 32, 3, 3])
Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 0
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 1
torch.Size([64, 14, 14, 32, 3, 3])


# Init AGOP vs Init CNFM

In [13]:
import torch.nn.init as init
print(init_net)

rand = torch.empty(init_net.features[0].weight.shape)
init.uniform_(rand, a=100.0, b=1000.0)
init_net.features[0].weight= nn.Parameter(rand)

rand = torch.empty(init_net.features[3].weight.shape)
init.uniform_(rand, a=300.0, b=500.0)
init_net.features[3].weight= nn.Parameter(rand)

ConvNet(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [None]:
agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=0)

In [None]:
agc.verify_NFA(net.to(device), init_net.to(device), trainloader, layer_idx=1)