---
# Homework 1: Multilayer Perceptron (MLP)   (100 points)

In this homework, we're going to build a multilayer perceptron (MLP) model and then train it on an image classification dataset.

In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time
import copy
torch.set_num_threads(4)
torch.set_num_interop_threads(4)

# Loading Data (0 points)

This is an example of building a torch data loader for a dataset. Here we have used it to load the Fashion MNIST image dataset. No code needed here.

The raw data is downloaded from: [Kaggle - Fashion MNIST Dataset](https://www.kaggle.com/datasets/zalando-research/fashionmnist?resource=download)

We have already built training, validation, and test datasets for this homework. 

In [2]:
train_csv=pd.read_csv('../../assets/fashion_mnist/train.csv')
test_csv=pd.read_csv('../../assets/fashion_mnist/test.csv')
valid_csv=pd.read_csv('../../assets/fashion_mnist/val.csv')

In [3]:
valid_csv.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,8,0,0,0,0,0,0,0,0,0,...,187,190,196,201,209,221,218,211,102,0
2,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
class FashionDataset(Dataset):
    """User defined class to build a datset using Pytorch class Dataset."""
    
    def __init__(self, data, transform = None):
        """Method to initilaize variables.""" 
        self.fashion_MNIST = list(data.values)
        self.transform = transform
        
        label = []
        image = []
        
        for i in self.fashion_MNIST:
             # first column is of labels.
            label.append(i[0])
            image.append(i[1:])
        self.labels = np.asarray(label)
        # Dimension of Images = 28 * 28 * 1. where height = width = 28 and color_channels = 1.
        self.images = np.asarray(image).reshape(-1, 28, 28, 1).astype('float32')
        self.images = self.images/256

    def __getitem__(self, index):
        label = self.labels[index]
        image = self.images[index]
        
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.images)

In [5]:
batch_size=256

train_set = FashionDataset(train_csv, transform=transforms.Compose([transforms.ToTensor()]))
val_set = FashionDataset(valid_csv, transform=transforms.Compose([transforms.ToTensor()]))
test_set = FashionDataset(test_csv, transform=transforms.Compose([transforms.ToTensor()]))

train_loader = DataLoader(train_set, batch_size=batch_size)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

# Question 1: Build a MLP (50 pts)
In this part, you will build a multilayer perceptron (MLP) neural network.

The input to the model initialization should include the number of inputs, the number of outputs and the size of the hidden layer. 

In the image classification dataset we use (Fashion MNIST), all images are 28*28=784, so the number of inputs should always be 784. The dataset has 10 labels (classes), so the number of outputs should be always 10. The size of a hidden layer is a tunable hypeerparameter.

For a fair comparison, we ask you to implement exactly two perceptron layers in the MLP. Please ensure that the hidden layer size is consistent to connect the two perceptron layers. Use ReLU as the activation function. 

A simple trick here is the initialization. A good initialization is always helpful to the model performance.

In [6]:
class MLP(nn.Module):
    def __init__(self,num_inputs=784,num_outputs=10,num_hiddens=256):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(num_inputs,num_hiddens)
        self.relu = nn.ReLU()
        self.output = nn.Linear(num_hiddens,num_outputs)

    def forward(self, X):
        X = X.view(X.size(0), -1)  # Flatten the input tensor
        X = self.relu(self.hidden(X))
        X = self.output(X)  # Now correctly using the output from the hidden layer
        return X

In [7]:
# hyperparameter values to consider.
hiddens=[256,512,1024]
lrs=[5e-2, 1e-1, 5e-1]
num_inputs=784
num_outputs=10
loss_function = nn.CrossEntropyLoss()

In [8]:
# function for evaluating the model performance
def eval_model(model,data_loader):
    model.eval()
    y_true_list=[]
    y_pred_list=[]
    model.eval()
    for x,y in data_loader:
        outputs=model(x)
        _, y_pred = torch.max(outputs, 1)
        y_pred_list.extend(y_pred.clone().detach().tolist())
        y_true_list.extend(y.clone().detach().tolist())
    acc=classification_report(y_true_list, y_pred_list,output_dict=True)['accuracy']
    return acc

In [9]:
# Hidden tests in this cell

# Question 2: Train the MLP (50 pts)
In this part, you will write a few lines to train the multilayer perceptron neural network you just built.

Here's the list of things that you need to implement. All of them can (should) be done using one line of code.

    Initialize the model with a set of hyperparameters
    Initialize the optimizer with the model's trainable parameters
    Set the model into the training mode
    For every batch of data: 
        zero the gradient in the optimizer
        feed the input into the model
        compute the loss
        back propagate the loss
        update the optimizer
        

In [10]:
random_seed = 3407
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [11]:
start_time=time.time()
current_best=0.0
best_model=None
for h in hiddens:
    for lr in lrs:
        ## TODO: initialize a MLP model with a set of hyperparameters
        ## TODO: initialize a optimizer for the model
        model = MLP(num_inputs = num_inputs,num_outputs = num_outputs, num_hiddens = h)
        optimizer = torch.optim.SGD(model.parameters(),lr = lr)
        for i in range(30):
            model.train()
            for X, y in train_loader:
                optimizer.zero_grad()
                outputs = model(X)
                loss = loss_function(outputs, y)
                loss.backward()
                optimizer.step()
            if (i+5) % 5 == 0:
                accuracy = eval_model(model, val_loader)
                print(f"Epoch [{i+1}/30], Loss: {loss.item():.4f}, Validation Accuracy: {accuracy:.4f}")
                if accuracy > current_best:
                    current_best = accuracy
                    best_model = copy.deepcopy(model)
                

Epoch [1/30], Loss: 0.7484, Validation Accuracy: 0.7385
Epoch [6/30], Loss: 0.4982, Validation Accuracy: 0.8268
Epoch [11/30], Loss: 0.4007, Validation Accuracy: 0.8403
Epoch [16/30], Loss: 0.3497, Validation Accuracy: 0.8452
Epoch [21/30], Loss: 0.3182, Validation Accuracy: 0.8490
Epoch [26/30], Loss: 0.2923, Validation Accuracy: 0.8533
Epoch [1/30], Loss: 0.7358, Validation Accuracy: 0.7664
Epoch [6/30], Loss: 0.4106, Validation Accuracy: 0.8340
Epoch [11/30], Loss: 0.3377, Validation Accuracy: 0.8466
Epoch [16/30], Loss: 0.2959, Validation Accuracy: 0.8524
Epoch [21/30], Loss: 0.2639, Validation Accuracy: 0.8562
Epoch [26/30], Loss: 0.2313, Validation Accuracy: 0.8588
Epoch [1/30], Loss: 0.5750, Validation Accuracy: 0.7867
Epoch [6/30], Loss: 0.3237, Validation Accuracy: 0.8238
Epoch [11/30], Loss: 0.2020, Validation Accuracy: 0.8417
Epoch [16/30], Loss: 0.1577, Validation Accuracy: 0.8534
Epoch [21/30], Loss: 0.1375, Validation Accuracy: 0.8640
Epoch [26/30], Loss: 0.0935, Validati

In [12]:
# Hidden tests in this cell

In [13]:
# Hidden tests in this cell