# Homework 6 - Experiments on MNIST for 10-class Classification

Please implement the following three functions:
- MnistMLP() - Design a 2-layer MLP
- MnistCNN() - Design a 2-layer CNN 

Please train the 2-layer MLP and CNN models on the Mnist dataset and print the training results for each epoch.

In [1]:
from torchvision.datasets import MNIST

In [2]:
from torchvision.datasets import MNIST
from torchvision.transforms import Compose,ToTensor,Normalize
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import os
import torch
import numpy as np

BATCH_SIZE = 128
TEST_BATCH_SIZE = 1000
device = "cuda" if torch.cuda.is_available() else "cpu"

# dataloader for the dataset
def get_dataloader(train,batch_size=BATCH_SIZE):
    transform_fn = Compose([
        ToTensor(),
        Normalize(mean = (0.1307,),std = (0.3081,))
        ]) 
    dataset = MNIST(root = './data',train = train,transform = transform_fn, download = True)
    data_loader = DataLoader(dataset,batch_size = batch_size,shuffle = True)
    return data_loader

In [5]:
# 2-lyer MLP 
class MnistMLP(nn.Module):
    # Tip: write `def __init__(self)` and `def forward(self,input)`
    # pass
    def __init__(self):
        super(MnistMLP, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
# 2-lyer CNN
class MnistCNN(nn.Module):
    # Tip: write `def __init__(self)` and `def forward(self,input)`
    # pass
    def __init__(self):
        super(MnistCNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64*7*7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Train the MLP model

In [7]:
model = MnistMLP().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

In [9]:
def train(epoch, num_epochs):
    data_loader = get_dataloader(True)
    total_step = len(data_loader)
    for idx, (input, target) in enumerate(data_loader):
        optimizer.zero_grad()
        output = model(input.to(device))
        loss = F.nll_loss(output, target.to(device))
        loss.backward()
        optimizer.step()
        if (idx+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, idx+1, total_step, loss.item()))

In [10]:
def test():
    loss_list = []
    acc_list = []
    test_dataloader = get_dataloader(train = False,batch_size=TEST_BATCH_SIZE)
    for idx,(input,target) in enumerate(test_dataloader):
        with torch.no_grad():
            output = model(input.to(device))
            target = target.to(device)
            cur_loss = F.nll_loss(output, target)
            loss_list.append(cur_loss.cpu())
            pred = output.max(dim = -1)[-1]
            cur_acc = pred.eq(target).float().mean()
            acc_list.append(cur_acc.cpu())
    print("Mean accuracy: ", np.mean(acc_list), "Mean loss: ", np.mean(loss_list))

In [11]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Mean accuracy:  0.0802 Mean loss:  0.00712392
Epoch [1/3], Step [100/469], Loss: -1312.1116
Epoch [1/3], Step [200/469], Loss: -6817.9761
Epoch [1/3], Step [300/469], Loss: -16425.3398
Epoch [1/3], Step [400/469], Loss: -30878.6719
Epoch [2/3], Step [100/469], Loss: -63305.4766
Epoch [2/3], Step [200/469], Loss: -92035.7656
Epoch [2/3], Step [300/469], Loss: -120774.4141
Epoch [2/3], Step [400/469], Loss: -148456.4688
Epoch [3/3], Step [100/469], Loss: -210554.5625
Epoch [3/3], Step [200/469], Loss: -253836.4688
Epoch [3/3], Step [300/469], Loss: -291576.0000
Epoch [3/3], Step [400/469], Loss: -358135.5938
Mean accuracy:  0.0974 Mean loss:  -390300.1


## Train the CNN model

In [12]:
model = MnistCNN().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

In [13]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

Mean accuracy:  0.0766 Mean loss:  -0.0023528824
Epoch [1/3], Step [100/469], Loss: -206188.0000
Epoch [1/3], Step [200/469], Loss: -6746471.0000
Epoch [1/3], Step [300/469], Loss: -46076316.0000
Epoch [1/3], Step [400/469], Loss: -169745536.0000
Epoch [2/3], Step [100/469], Loss: -783035840.0000
Epoch [2/3], Step [200/469], Loss: -1505780224.0000
Epoch [2/3], Step [300/469], Loss: -2828993536.0000
Epoch [2/3], Step [400/469], Loss: -4365740544.0000
Epoch [3/3], Step [100/469], Loss: -8771931136.0000
Epoch [3/3], Step [200/469], Loss: -12989839360.0000
Epoch [3/3], Step [300/469], Loss: -18487248896.0000
Epoch [3/3], Step [400/469], Loss: -22285948928.0000
Mean accuracy:  0.113500014 Mean loss:  -28374931000.0
