In [2]:

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms

from torch.nn import functional as F

import time
import math


In [3]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
def create_data_loader(batch_size):
  # Split the provided CIFAR-10 train set (50,000 images) into your train and val sets
  # Use the first 40,000 images as your train set and the remaining 10,000 images as val set
  # Use all 10,000 images in the provided test set as your test set

  transform = transforms.Compose([
    transforms.ToTensor(),
      # This is the mean and the average of the cifar db because we are normalizing it.
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)), 
  ])

  # load cifar
  train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
  test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

  # split train set into train and val
  train_set, val_set = torch.utils.data.random_split(train_set, [40000, 10000])

  # create data loaders
  train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True,num_workers=4)
  val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True,num_workers=4)
  test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True,num_workers=4)

  train_loader = [(inputs.to(device), labels.to(device)) for inputs, labels in train_loader]
  val_loader = [(inputs.to(device), labels.to(device)) for inputs, labels in val_loader]
  test_loader = [(inputs.to(device), labels.to(device)) for inputs, labels in test_loader]




  return train_loader, val_loader, test_loader

In [94]:

class QuasiPolySynapse(nn.Module):
    def __init__(self):
        super().__init__() 
        self.main = nn.ParameterList([torch.randn(1),torch.tensor([1.],requires_grad=False)])
        # self.main = nn.ParameterList([torch.randn(1)])
        self.terms = nn.ParameterList()

        
    def forward(self, x):
        if  len(self.terms) < math.floor(self.main[1]) - 1:
            self.add_term()
        y = self.main[0] * x ** self.main[1]
        for idx,coeff in enumerate(self.terms):
             y = y + coeff * x**(idx+1)
        return y

    def add_term(self):
        print("Adding term")
        # Add new term, init coeff to 1. non-trainable exponent to floor of main exponent
        self.terms.append(nn.Parameter(torch.tensor([1.])))

    def __str__(self):
        s = ""
        s += f"Main: {self.main[0]}x^{self.main[1]} + "
        for idx,term in enumerate(self.terms):
            s += f"{term}x^{idx+1} + "
        return s[:-3]



class QuasiPolyLayer(nn.Module):
  def __init__(self, in_features, out_features, product=False):
    super().__init__()
    self.out_features = out_features
    self.in_features = in_features
    # a 2d module list of quasipoly synapses sizes in_features x out_features
    self.synapses = nn.ModuleList([
        nn.ModuleList([QuasiPolySynapse() for _ in range(self.in_features)])
        for _ in range(self.out_features)
    ])
    self.bias = nn.Parameter(torch.randn(out_features))
    self.product = product
  
  def forward(self, x):
    batch_size = x.shape[0]
    y = torch.zeros(batch_size, self.out_features, dtype=torch.float32)

    for i in range(self.out_features):
        if self.product:
            log_sum = torch.zeros(batch_size, dtype=torch.float32)
        else:
            y_i = torch.zeros(batch_size, dtype=torch.float32)

        for j in range(self.in_features):
            x_j = x[:, j]
            synapse_output = self.synapses[i][j](x_j)
            # print(f"syanpse_output {synapse_output}")
            if self.product:
                # Assuming you have a tensor named 'tensor' and a threshold value
                threshold = 1e-4
                synapse_output = torch.where(synapse_output < threshold, threshold, synapse_output)
                log_sum = log_sum + torch.log(synapse_output) 
            else:
                y_i = y_i + synapse_output

        if self.product:
            log_sum_max, _ = torch.max(log_sum, dim=0, keepdim=True)
            # print(f"log_sum {log_sum} log_sum_max {log_sum_max}")
            y[:, i] = torch.exp(log_sum - log_sum_max)
        else:
            y[:, i] = y_i

    y = y + self.bias

    return y
  


class AddEpsilon(nn.Module):
    def __init__(self, epsilon=1e-10): # 1e-10 is smallest possible float in pytorch
        super().__init__()
        self.epsilon = epsilon

    def forward(self, x):
        return x + self.epsilon

    

class LeNet5(nn.Module):

    def __init__(self, num_classes, grayscale=False):
        super(LeNet5, self).__init__()
        
        self.grayscale = grayscale
        self.num_classes = num_classes

        in_channels = 3 # because cifar10 is colored

        self.features = nn.Sequential(
            
            nn.Conv2d(in_channels, 6*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6*in_channels, 16*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(16*5*5*in_channels, 84*in_channels),  
            nn.ReLU(),
            # nn.Linear(120*in_channels, 84*in_channels),
            # QuasiPolyLayer(60*in_channels, 21*in_channels),
            nn.ReLU(),
            # nn.Linear(84*in_channels, num_classes)
            AddEpsilon(),
            QuasiPolyLayer(84*in_channels, num_classes, product=True)
        )


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        return logits



In [95]:
def init_train_var(model):
  
  criterion = nn.CrossEntropyLoss()
  # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.2,weight_decay=0.001) # set momentum to 0.2 after trial and error
  # optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.00001)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

  return criterion, optimizer

In [96]:
def test(model_path, test_loader):
  # use the provided test loader
  # return the test accuracy
  model = torch.load(model_path)
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
      outputs = model(images)
      _, predicted = torch.max(outputs, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    test_acc = correct / total
  return test_acc


In [97]:

BATCH_SIZE = 1024
train_loader, val_loader, test_loader = create_data_loader(batch_size=BATCH_SIZE)

EPOCH_COUNT = 30

model = LeNet5(10)
model = model.to(device)
model = nn.DataParallel(model)
criterion, optimizer = init_train_var(model)


Files already downloaded and verified
Files already downloaded and verified


In [98]:
%%time

for epoch in range(EPOCH_COUNT):
    model.train()
    train_loss = 0
    train_correct = 0
    # for each batch
    for i, (inputs, labels) in enumerate(train_loader):
        # count the number of weights with very small magnitute < 1e-3
        nans_count = 0
        for param in model.parameters():
            if param.grad is not None:
                nans_count += torch.sum(torch.isnan(param.grad)).item()
        
        print(f"nans count: {nans_count}")
        
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        # print(f"outputs {outputs}")
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print all gradients of the first synapse
        # print(f"synapse grads : {model.classifier[-1].synapses[0][0].main[0].grad}")
        # print(f"weights of first layer {model.classifier[-1].synapses[0][0]}")
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        train_correct += (predicted == labels).sum().item()


    print(f'Epoch {epoch+1}/{EPOCH_COUNT}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_correct/40000:.4f}')
    # validate
    model.eval()
    val_loss = 0
    val_correct = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_loader):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
    val_acc = val_correct/10000
    print(f'Epoch {epoch+1}/{EPOCH_COUNT}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')



nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
Epoch 1/30, Train Loss: 101.5030, Train Accuracy: 0.0994
Epoch 1/30, Val Loss: 25.1924, Val Accuracy: 0.1025
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans count: 0
nans coun

In [None]:
# print weights of quasi poly layer

quasi_poly_layer = model.module.classifier[-1]
for i in range(quasi_poly_layer.out_features):
    for j in range(quasi_poly_layer.in_features):
        synapse = quasi_poly_layer.synapses[i][j]
        print(f"Weights of synapse ({i}, {j}): {synapse}")



Weights of synapse (0, 0): Main: Parameter containing:
tensor([0.9256], requires_grad=True)x^Parameter containing:
tensor([1.0002], requires_grad=True)
Weights of synapse (0, 1): Main: Parameter containing:
tensor([-1.6803], requires_grad=True)x^Parameter containing:
tensor([0.9974], requires_grad=True)
Weights of synapse (0, 2): Main: Parameter containing:
tensor([-1.7774], requires_grad=True)x^Parameter containing:
tensor([0.9984], requires_grad=True)
Weights of synapse (0, 3): Main: Parameter containing:
tensor([1.3900], requires_grad=True)x^Parameter containing:
tensor([1.0010], requires_grad=True)
Weights of synapse (0, 4): Main: Parameter containing:
tensor([-1.5643], requires_grad=True)x^Parameter containing:
tensor([0.9996], requires_grad=True)
Weights of synapse (0, 5): Main: Parameter containing:
tensor([-0.4038], requires_grad=True)x^Parameter containing:
tensor([1.0000], requires_grad=True)
Weights of synapse (0, 6): Main: Parameter containing:
tensor([0.6651], requires_gra

In [22]:
# TODO: report test performance of best model

test_acc = test('best_model.pth', test_loader)
print(test_acc)


FileNotFoundError: [Errno 2] No such file or directory: 'best_model.pth'