# ECS695P - Neural Networks and Deep Learning
## Coursework: Darrell Hoffman, 2022-04-22

The purpose of this assignment was to build this specific NN architecture from our professor's description and then optimize the hyperparameters to obtain a validation accuracy of 90% on the MNIST dataset. I was one of 5% of students to receive a 100% grade on the assignment.

In [None]:
#Connect to Drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
import sys
sys.path.append('/content/gdrive/MyDrive/Colab Notebooks')

#Import Libraries
import my_utils_edit as mu
import torch
from torch import nn
from IPython import display

### Create Dataloader and Load Dataset 

In [None]:
#Load Dataset
batch_size = 32
train_iter, test_iter = mu.load_data_fashion_mnist(batch_size) # function defined in my_utils_edit.py edited from my_utils.py provided in week 3

### Create the Model

In [None]:
from torchvision import transforms
random_flip = transforms.RandomHorizontalFlip(p=0.5)

In [None]:
#Define Model
class Net(torch.nn.Module):
    def __init__(self, num_inputs, num_hidden, num_patches, num_outputs):
        super(Net, self).__init__()
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_patches = num_patches
        self.num_outputs = num_outputs

        #Stem Linear Layers
        self.LS1 = nn.Linear(num_inputs, num_hidden)
        self.LS2 = nn.Linear(num_hidden, num_hidden)

        #Stem Backbone Layers
        self.LB1 = nn.Linear(num_patches, num_patches)
        self.LB2 = nn.Linear(num_patches, num_patches)
        self.LB3 = nn.Linear(num_hidden, num_hidden)
        self.LB4 = nn.Linear(num_hidden, num_hidden)

        self.LB5 = nn.Linear(num_patches, num_patches)
        self.LB6 = nn.Linear(num_patches, num_patches)
        self.LB7 = nn.Linear(num_hidden, num_hidden)
        self.LB8 = nn.Linear(num_hidden, num_hidden)

        #Classifier Layers
        self.LC1 = nn.Linear(num_hidden, num_hidden)
        self.LC2 = nn.Linear(num_hidden, num_outputs)

        #Activation Function
        self.relu = nn.LeakyReLU(0.1)

        #Dropout Function
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        #STEM
        #Divide batch into patches
        x = transforms.Lambda(lambda x: torch.stack([random_flip(x_) for x_ in x]))(x)
        Img = torch.flatten(x, 1,2)

        X_pij = Img.unfold(1,7,7).unfold(2,7,7)
        X_pij = torch.flatten(X_pij, 1,2)

        #Vectorize Patches
        X_pij = torch.flatten(X_pij, 2,3)

        #Transform to Feature Vector with MLP
        X_xij = self.LS1(X_pij)
        X_xij = self.relu(X_xij)
        X_xij = self.LS2(X_xij)

        #Block 1
        #First B1 MLP
        XT = torch.transpose(X_xij, 1, 2)
        XW = self.LB1(XT)
        XW_dropout = self.dropout(XW)
        gXW = self.relu(XW_dropout)
        O1 = self.LB2(gXW)
        #Second B1 MLP
        O1T = torch.transpose(O1, 1, 2)
        O1W = self.LB3(O1T)
        O1W_dropout = self.dropout(O1W)
        gO1W = self.relu(O1W_dropout)
        O2 = self.LB4(gO1W)

        #Block 2
        #First B2 MLP
        O2T = torch.transpose(O2, 1, 2)
        O2W = self.LB5(O2T)
        O2W_dropout = self.dropout(O2W)
        gO2W = self.relu(O2W_dropout)
        O3 = self.LB6(gO2W)
        #Second B2 MLP
        O3T = torch.transpose(O1, 1, 2)
        O3W = self.LB7(O3T)
        O3W_dropout = self.dropout(O3W)
        gO3W = self.relu(O3W_dropout)
        O4 = self.LB8(gO3W)

        #CLASSIFIER
        #Compute Mean Feature
        O4_mean = torch.mean(O4, 1, keepdim=False)

        #Classifier MLP
        out = self.LC1(O4_mean)
        out = self.relu(out)
        out = self.LC2(out)
        return out        

In [None]:
#Model Initialization edited from week 6 lab solutions
def init_weights(m):
    if type(m) == nn.Linear: # by checking type we can init different layers in different ways
        torch.nn.init.kaiming_normal_(m.weight) #changed from normal_ to kaiming_normal_
        torch.nn.init.zeros_(m.bias)

In [None]:
#Create Model:
num_inputs, num_hidden, num_patches, num_outputs = 49, 128, 16, 10
net = Net(num_inputs, num_hidden, num_patches, num_outputs)
net.apply(init_weights)

### Create Loss and Optimizer

In [None]:
#Cross Entropy Loss:
loss = nn.CrossEntropyLoss()

#learning rate and weight decay
lr, wd = 0.1, 0.0005

#SGD Optimizer
optimizer = torch.optim.ASGD(net.parameters(), lr=lr, weight_decay=wd, lambd=0.0001, alpha=0.75, t0=1000000.0)

### Train Model

In [None]:
#Training Script function mu.train_ch3 defined in my_utils_edit.py
num_epochs = 100
mu.train_ch3(net, train_iter, test_iter, loss, num_epochs, optimizer) #edited from my_utils.py file provided in week 3