## ELEC4840 Assignment 1

Name:

Student ID:

- Problem 1 (40%)

- Problem 2 (60%)

In [None]:
# import packages
import torch
from torch import nn
from torchvision import datasets, transforms
from tqdm import tqdm

## HINT
You should run training on __training dataset__, validate the model's performance during training using __validation dataset__. After finishing training, select the model that has the best performance on validation dataset, then report the model's performance based on the data of the __test dataset__.

In [None]:
# load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=0.5,
        std=0.5
    )
])

data_train = datasets.MNIST(
    root="./data/",
    transform=transform,
    train=True,
    download=True
)

data_test = datasets.MNIST(
    root="./data/",
    transform=transform,
    train=False
)

split_train_size = int(0.8*(len(data_train)))  # from test data, split 50% as validation set
split_valid_size = len(data_train) - split_train_size  # split 50% as test set

train_set, valid_set = torch.utils.data.random_split(
    data_train, [split_train_size, split_valid_size])

loader_train = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=64,
    shuffle=True
)

loader_valid = torch.utils.data.DataLoader(
    dataset=valid_set,
    batch_size=64,
    shuffle=False
)

loader_test = torch.utils.data.DataLoader(
    dataset=data_test,
    batch_size=64,
    shuffle=False
)

In [None]:
# load EMNIST dataset
train_set = datasets.EMNIST(
    root="data",
    split="balanced",
    download="True",
    train=True,
    transform=transform
)


test_set = datasets.EMNIST(
    root="data",
    split="balanced",
    download="True",
    train=False,
    transform=transform

)
entire_trainset = torch.utils.data.DataLoader(train_set, shuffle=True)

split_train_size = int(0.8*(len(entire_trainset)))  # use 80% as train set
split_valid_size = len(entire_trainset) - split_train_size  # use 20% as validation set

train_set, val_set = torch.utils.data.random_split(
    train_set, [split_train_size, split_valid_size])

print(f'train set size: {split_train_size}, validation set size: {split_valid_size}')
# EMNIST loader
loader_train_emnist = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=256, # Can be modified
    shuffle=True
)

loader_val_emnist = torch.utils.data.DataLoader(
    dataset=val_set,
    batch_size=256, # Can be modified
    shuffle=True
)

loader_test_emnist = torch.utils.data.DataLoader(
    dataset=test_set,
    batch_size=256, # Can be modified
    shuffle=True
)

### Problem 1. Train an MLP network on the MNIST dataset (40%)

Use a four-layer MLP to train the MNIST dataset.

| Layer |  Type  |       Input       |      Output       | Activation |
| :---: | :----: | :---------------: | :---------------: | :--------: |
|   1   | Linear |      28 * 28      |         128         |    ReLU    |
|   2   | Linear |         128         |        64         |    ReLU    |
|  3  | Dropout | 64 | 64 | - |
|   4   | Linear |        64         |        32         |    ReLU    |
|  5   | Dropout | 32 | 32 | - |
|   6   | Linear |       32         |        10         |     -      |


a.) Implement the function `__init__` and `forward` in the class `Model` (15%);

b.) Implement the training code (15%).

c.) Plot the curve of accuracy and loss(10%). Compare and discuss the results' difference between the model with / without dropout operation(5%).

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        ## ----- write your code here

    def forward(self, x):
        ## ----- write your code here
        ## Remember to return the output and delete the 'pass' command below
        pass

In [None]:
max_epoch = 10
use_cuda = True

# model initialization
model = Model()
if use_cuda:
    model = model.cuda()

# loss function
criterion = # -- write your code here --

# optimizer
optimizer = # -- write your code here --

loss_list = []
acc_train_list = []
acc_valid_list = []

best_valid_loss = float('inf')

for epoch in range(max_epoch):
    running_loss = 0.0
    running_correct = 0
    valid_loss = 0.0
    valid_correct = 0
    print(" -- Epoch {}/{}".format(epoch + 1, max_epoch))

    # training
    # call a function to control the dropout behaviour 
    # your code here

    for data in tqdm(loader_train):
        # set all gradients to zero
        optimizer.zero_grad()

        # fetch data
        images, labels = data
        if use_cuda:
            images = images.cuda()
            labels = labels.cuda()

        # model forward
        outputs = # -- write your code here --

        # calculate loss
        loss = # -- write your code here --

        loss.backward()
        optimizer.step()

        pred = torch.argmax(outputs, dim=1)
        running_loss += loss.item()
        running_correct += torch.sum(pred == labels)




    # record loss, accuracy
    loss = running_loss / len(train_set)
    loss_list.append(loss)
    acc_train = running_correct / len(train_set)
    acc_train_list.append(acc_train.item())

    # testing

    # call a function to control the dropout behaviour 
    # your code here

    valid_correct = 0
    for data in loader_valid:
        # fetch data
        # model forward
        # calculate loss

    # Save best checkpoint
    if # -- write your code here --:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'best_checkpoint_mnist.pt')

    acc_valid = valid_correct / len(valid_set)
    acc_valid_list.append(acc_valid.item())
    print("Loss {:.4f}, Train Accuracy {:.4f}%, Validation Accuracy {:.4f}%".format(
        loss,
        acc_train * 100,
        acc_valid * 100
    ))


model.load_state_dict(torch.load('best_checkpoint_mnist.pt'))

# call a function to control the dropout behaviour 
# your code here

test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for images, labels in loader_test:
        # fetch data
        # model forward
        # calculate accuracy on test set
        pass
acc_test=correct/total
print("Best model on test set: Test Accuracy {:.4f}%".format(
        acc_test * 100
    ))

c.) Plot loss and accuracy curve

In the previous cell, you have recorded the loss and train/test accuracy in `loss_list`, `acc_train_list`, and `acc_test_list`, respectively. In this problem, you are required to plot two figures: 1.) training loss curve (5%); 2.) training and testing accuracy curves in the same figure (5%).

In [None]:
import numpy as np
from matplotlib import pyplot as plt

## ----- write your code here

### Problem 2. Train a LeNet network on the EMINIST dataset (60%)

1. Implement the model and training code as instructed in the Jupyter notebook (30%);

2. In the summary report, plot the loss and accuracy curve (10%);

3. Try different choices of batch sizes, learning rates, or optimizers in your experiments. In the summary report, report the accuracy of your three choices. You may discuss the findings, the explaination, and possible solutions in a short paragraph followed by your results (30%).

In [None]:
## ----- write your code here.