<a href="https://colab.research.google.com/github/jkordonis/TropicalML/blob/main/SVHN_VGG_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

** Model Modified from https://github.com/randomaccess2023/MG2023/blob/main/Video%2028/VGG16.ipynb**  

In [None]:
!pip install torchinfo
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.utils.data import Dataset

import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

from torchinfo import summary

import numpy as np
import matplotlib.pyplot as plt



In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
transform = transforms.Compose([ transforms.ToTensor(),
                               transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
training_dataset = datasets.SVHN('./data_src', split='train', download=True, transform=transform)
test_dataset = datasets.SVHN('./data_src', split='test', download=True, transform=transform)
training_dataloader = DataLoader(training_dataset, batch_size=32, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=True)


Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to ./data_src/train_32x32.mat


100%|██████████| 182040794/182040794 [00:21<00:00, 8666279.35it/s] 


Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to ./data_src/test_32x32.mat


100%|██████████| 64275384/64275384 [00:13<00:00, 4754320.14it/s] 


In [None]:
class VGG_like_Net(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG_like_Net, self).__init__()
        self.layer1= nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU())

        self.layer2= nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3= nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer4= nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))


        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4*4*128, 1024),
            nn.ReLU())
        self.fc_out= nn.Sequential(
            nn.Linear(1024, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc_out(out)
        return out

model = VGG_like_Net().to(device)

In [None]:
def training_batch(data, labels, model, criterion, optimizer):
    model.train()
    data = data.to(device)
    labels = labels.to(device)
    output = model(data)
    loss = criterion(output, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss

In [None]:
with torch.inference_mode():
    def test_batch(data, labels, model, criterion):
        model.eval()
        data = data.to(device)
        labels = labels.to(device)
        output = model(data)
        loss = criterion(output, labels)
        return loss

In [None]:
with torch.inference_mode():
    def accuracy(data, labels, model):
        model.eval()
        data = data.to(device)
        labels = labels.to(device)
        output = model(data)
        _, pred_labels = output.max(-1)
        correct = (pred_labels == labels)
        return correct.cpu().detach().numpy().tolist()

In [None]:
model_to_train = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_to_train.parameters(), lr=0.001)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

n_epochs = 15

training_loss, test_loss = [], []
training_accuracy, test_accuracy = [], []

for epoch in range(n_epochs):
    training_losses, test_losses = [], []
    training_accuracies, test_accuracies = [], []

    for data, labels in training_dataloader:
        trng_batch_loss = training_batch(data, labels, model_to_train, criterion, optimizer)
        training_losses.append(trng_batch_loss.item())
        trng_batch_correct = accuracy(data, labels, model_to_train)
        training_accuracies.extend(trng_batch_correct)
    training_per_epoch_loss = np.array(training_losses).mean()
    training_per_epoch_accuracy = np.mean(training_accuracies)

    for data, labels in test_dataloader:
        tst_batch_loss = test_batch(data, labels, model_to_train, criterion)
        test_losses.append(tst_batch_loss.item())
        tst_batch_correct = accuracy(data, labels, model_to_train)
        test_accuracies.extend(tst_batch_correct)
    test_per_epoch_loss = np.array(test_losses).mean()
    test_per_epoch_accuracy = np.mean(test_accuracies)

    training_loss.append(training_per_epoch_loss)
    training_accuracy.append(training_per_epoch_accuracy)
    test_loss.append(test_per_epoch_loss)
    test_accuracy.append(test_per_epoch_accuracy)

    print(f'Epoch: {epoch+1}/{n_epochs}\t| Training loss: {training_per_epoch_loss:.4f} | ', end='')
    print(f'Training accuracy: {training_per_epoch_accuracy:.4f} | Test loss: {test_per_epoch_loss:.4f} | ', end='')
    print(f'Test accuracy: {test_per_epoch_accuracy:.4f}')

Epoch: 1/15	| Training loss: 0.3946 | Training accuracy: 0.9628 | Test loss: 0.3088 | Test accuracy: 0.9073
Epoch: 2/15	| Training loss: 0.2947 | Training accuracy: 0.9761 | Test loss: 0.2472 | Test accuracy: 0.9293
Epoch: 3/15	| Training loss: 0.2411 | Training accuracy: 0.9821 | Test loss: 0.2299 | Test accuracy: 0.9343
Epoch: 4/15	| Training loss: 0.2120 | Training accuracy: 0.9868 | Test loss: 0.2500 | Test accuracy: 0.9286
Epoch: 5/15	| Training loss: 0.1895 | Training accuracy: 0.9883 | Test loss: 0.2227 | Test accuracy: 0.9378
Epoch: 6/15	| Training loss: 0.1701 | Training accuracy: 0.9912 | Test loss: 0.2012 | Test accuracy: 0.9442
Epoch: 7/15	| Training loss: 0.1567 | Training accuracy: 0.9922 | Test loss: 0.2159 | Test accuracy: 0.9388
Epoch: 8/15	| Training loss: 0.1429 | Training accuracy: 0.9939 | Test loss: 0.2134 | Test accuracy: 0.9396
Epoch: 9/15	| Training loss: 0.1299 | Training accuracy: 0.9950 | Test loss: 0.1973 | Test accuracy: 0.9461
Epoch: 10/15	| Training loss

tensor(True, device='cuda:0')

In [None]:
from google.colab import drive
drive.mount('/content/drive')
model_path = '/content/drive/My Drive/SVHN_VGG.pth'
torch.save(model.state_dict(), model_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
K=10
Vectors_to_choose=np.random.rand(30,2048)
Corresp_bias = np.random.rand(30)

In [None]:
W_form_Division = Vectors_to_choose[:2*K]
W_form_Division = torch.from_numpy((W_form_Division)).float()
biass_from_Division = Corresp_bias[:2*K]
biass_from_Division = torch.from_numpy(biass_from_Division).float()


A_mat1 = torch.zeros(K, 2 * K) .float()
A_mat2 = torch.zeros(K, 2 * K).float()

for i in range(K):
    A_mat1[i,2*i] = 1
    A_mat2[i,2*i+1] =  1

class small_nn_head(nn.Module):
    def __init__(self, number_of_tropical_terms=K):
        super(small_nn_head, self).__init__()

        # Define layers
        self.Layer1 = nn.Linear(4 * 4 * 128, 2 * number_of_tropical_terms)
        self.Lin_part1 = nn.Linear(2*number_of_tropical_terms, number_of_tropical_terms)
        self.Lin_part2 = nn.Linear(2*number_of_tropical_terms, number_of_tropical_terms)
        self.Layer2 = nn.Linear(3*number_of_tropical_terms,10)


        self.Lin_part1.weight.data = A_mat1
        self.Lin_part1.bias.data = torch.zeros(number_of_tropical_terms)
        self.Lin_part2.weight.data = A_mat2
        self.Lin_part2.bias.data = torch.zeros(number_of_tropical_terms)
        self.Layer1.weight.data = W_form_Division
        self.Layer1.bias.data = biass_from_Division

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        out = self.Layer1(x)
        out1 = self.Lin_part1(out)
        out2 = self.Lin_part2(out)
        max_12 = torch.max(out1, out2)
        out = torch.cat((out, max_12), dim=1)
        out = self.Layer2(out)
        return out

# Create the model instance and move it to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_substitute = small_nn_head(number_of_tropical_terms=K)
model_substitute.to(device)

small_nn_head(
  (Layer1): Linear(in_features=2048, out_features=20, bias=True)
  (Lin_part1): Linear(in_features=20, out_features=10, bias=True)
  (Lin_part2): Linear(in_features=20, out_features=10, bias=True)
  (Layer2): Linear(in_features=30, out_features=10, bias=True)
)

In [None]:
x=torch.randn(1,2048)
x=x.to(device)
print(model_substitute(x))

In [None]:
intermediate_output = None
batch_size=32
def hook_fn(module, input, output):
    global intermediate_output
    intermediate_output = output

hook_handle = model.layer6.register_forward_hook(hook_fn)
X_sample=np.zeros([0,4*4*128])

cnt=0
for i in training_dataloader:
  inputs = i[0].to(device)
  output = model(inputs)
  X_to_add = intermediate_output.reshape(batch_size, -1).detach().to('cpu').numpy()
  X_sample=np.concatenate((X_sample,X_to_add),axis=0)
  cnt+=1
  if cnt>=7:
    break

X_sample = X_sample[:200]




In [None]:
# Create Dataset for compressed Layer
activation = {}
def getActivation(name):
  def hook(model, input, output):
    activation[name] = output.detach()
  return hook

def set_hooks(INP_Layer):
  hook_INP_to_Compressed_Layer = INP_Layer.register_forward_hook(getActivation('INP_to_Compressed_Layer'))

class LayerComputeDataset(Dataset):
    def __init__(self, Init_model,from_layer):
        self.from_layer= from_layer
        self.Init_model=Init_model
        self.num_samples = training_dataset.data.shape[0]

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        x= training_dataset.data[idx]
        with torch.no_grad():
#          self.Init_model(transforms.ToTensor()(x).unsqueeze(0).float().to(device))
          self.Init_model(torch.tensor(x,dtype=torch.float32).unsqueeze(0).float().to(device))

        input_data = activation['INP_to_Compressed_Layer']
        labels = training_dataset.labels[idx]

        return input_data.squeeze(), labels


In [None]:
set_hooks( model.layer6)
DataSetForCompressLayer=LayerComputeDataset(model,'INP_to_Compressed_Layer')
DataLoaderForCompressLayer = DataLoader(DataSetForCompressLayer, batch_size=32, shuffle=True,num_workers=0)

In [None]:
model_to_train=model_substitute
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_to_train.parameters(), lr=0.001)

n_epochs = 15

for Epoch in range(n_epochs):
    training_losses, test_losses = [], []
    training_accuracies, test_accuracies = [], []
    for data, labels in DataLoaderForCompressLayer:
        trng_batch_loss = training_batch(data, labels, model_to_train, criterion, optimizer)
        training_losses.append(trng_batch_loss.item())
        trng_batch_correct = accuracy(data, labels, model_to_train)
        training_accuracies.extend(trng_batch_correct)
    training_per_epoch_loss = np.array(training_losses).mean()
    training_per_epoch_accuracy = np.mean(training_accuracies)
    print(f'Epoch: {Epoch+1}/{n_epochs}\t| Training loss: {training_per_epoch_loss:.4f} | ', end='')

Epoch: 1/15	| Training loss: nan | 

KeyboardInterrupt: 

In [None]:
training_per_epoch_accuracy

0.0675430334302524