In [2]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
import os
from torchvision import datasets, transforms
from torch import optim, nn, unsqueeze
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import ToTensor, Lambda, Compose
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [3]:
torch.manual_seed(77)

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,))])

train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = transform, 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = transform,
    download = True
)
# Split the training data into 50 000 training instances and 10 000 validation instances
traindata, valdata = train_test_split(train_data, test_size=10000, random_state=42)
batch_size = 16
trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch_size, shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valdata, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [4]:
# Define the model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,16,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16,32,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(2),)
        self.conv3 = nn.Sequential(
            nn.Conv2d(32,64,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(2),)
        self.out = nn.Linear(64*3*3, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1) # Flatten , same as x = torch.flatten(x, 1)
        output = self.out(x)
        return output

model = CNN()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Use cross-entropy as the loss function, and Adam as the optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(dataloader, model, loss_func, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        y_hot = F.one_hot(y, 10)
        y_hot = y_hot.float()
        # y_hot = torch.zeros(batch_size, 10)
        # y_hot[range(y_hot.shape[0]), y]=1      


        X, y_hot = X.to(device), y_hot.to(device)
        # Compute prediction error
        pred = model(X)
        loss = loss_func(pred, y_hot)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return loss

def test(dataloader, model, loss_func):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    for batch, (X, y) in enumerate(dataloader):
        y_hot = F.one_hot(y, 10)
        y_hot = y_hot.float()
        # y_hot = torch.zeros(batch_size, 10)
        # y_hot[range(y_hot.shape[0]), y]=1     
        X, y_hot = X.to(device), y_hot.to(device) 
        # Compute prediction error
        pred = model(X)
        test_loss += loss_func(pred, y_hot).item()
        correct += (pred.argmax(axis=1) == y_hot.argmax(axis=1)).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Average loss: {test_loss:>8f} \n")
    return test_loss, correct*100

In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 10
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

Epoch 1
-------------------------------


KeyboardInterrupt: ignored

In [None]:
# Baseline
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
plt.plot(range(5), [0.0031650410192771232, 0.0028633269980142357, 0.002594966785109864, 0.0022605173577328968, 0.002502553227165231]
 , '-o', label='lr=1e-03')
plt.plot(range(5), [0.007731382175777981, 0.015254152096401902, 0.006859211771761148, 0.007565116742734852, 0.005825696614252957], '-o',label='lr=1e-02')
plt.plot(range(5), [0.004654977587856024, 0.004453237637505697, 0.004261726191478602, 0.004093769083383881, 0.00405370544852264], '-o',label='lr=1e-04')
plt.plot(range(5), [0.00404618132344301, 0.004039355023919262, 0.004034300562935369, 0.00402999843546371, 0.004025004300348815], '-o',label='lr=1e-05')

plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title('Loss over time for validation set for different learning rates')
plt.legend()
plt.savefig('loss_lr.png', dpi=300)

In [None]:
# different batch sizes
# batch_size = 8

epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
# batch_size = 4

epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(valloader, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

In [None]:
plt.plot(range(5), [0.0031650410192771232, 0.0028633269980142357, 0.002594966785109864, 0.0022605173577328968, 0.002502553227165231] , '-o', label='batch_size=16')
plt.plot(range(5), [0.01308384236522793, 0.012966860896266414, 0.009789905387962861, 0.010797198192722405, 0.009511369838027452], '-o',label='batch_size=4')
plt.plot(range(5), [0.006741597526940677, 0.008251433646622024, 0.005145266192427562, 0.0047450756808123115, 0.006215380649445582], '-o',label='batch_size=8')

plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title('Loss over time for validation set for different batch sizes')
plt.legend()
plt.savefig('loss_batch_size_val.png', dpi=300)

In [15]:
# test set with batch_size = 16, learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 10
test_loss_list = []
test_acc = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_func, optimizer)
    test_loss, acc = test(testloader, model, loss_func)
    test_loss_list.append(test_loss)
    test_acc.append(acc)
print("Test loss: ", test_loss_list)
print("Test accuracy: ", test_acc)


Epoch 1
-------------------------------
Test Error: 
 Accuracy: 99.0%, Average loss: 0.004543 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 99.2%, Average loss: 0.003270 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 99.0%, Average loss: 0.004157 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 99.0%, Average loss: 0.004406 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 98.8%, Average loss: 0.005130 

Epoch 6
-------------------------------
Test Error: 
 Accuracy: 99.2%, Average loss: 0.003669 

Epoch 7
-------------------------------
Test Error: 
 Accuracy: 99.2%, Average loss: 0.004128 

Epoch 8
-------------------------------
Test Error: 
 Accuracy: 99.1%, Average loss: 0.004014 

Epoch 9
-------------------------------
Test Error: 
 Accuracy: 99.1%, Average loss: 0.003809 

Epoch 10
-------------------------------
Test Error: 
 Accuracy: 99.2%, Average loss: 0.003740 

Test loss:  [0.004543057866915849, 0.00

In [10]:
# Question 9
# Define a transform to normalize the data 
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,))])

# data augmentation for training data
train_transform = transforms.Compose([
                                    # transforms.ToPILImage(),
                                    # transforms.CenterCrop(21),
                                    # transforms.RandomRotation(30),
                                    # transforms.GaussianBlur(1),
                                    transforms.RandomAdjustSharpness(17),
                                    transforms.RandomAutocontrast(),
                                    # transforms.ColorJitter(brightness=0.2, contrast=0.2),
                                    # transforms.RandomAffine(degrees=20, translate=(0.1,0.1), scale=(0.9, 1.1)),
                                    # transforms.AugMix(),
                                    # transforms.RandomHorizontalFlip(),
                                    # transforms.RandomCrop(32, 4),
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,)),
                                    ])

# Download and load the training data
trainset0 = datasets.MNIST('data_augm', download=True, train=True, transform=train_transform)
testset = datasets.MNIST('data_augm', download=True, train=False, transform=transform)

# Also create a validation set 
trainset_augm, valset_augm = random_split(trainset0, [50000, 10000], generator=torch.Generator().manual_seed(42))
# trainset, valset = train_test_split(trainset0, test_size=10000, random_state=42)

batch_size = 16

trainloader_augm = torch.utils.data.DataLoader(trainset_augm, batch_size=batch_size, shuffle=True, num_workers=2)
valloader_augm = torch.utils.data.DataLoader(valset_augm, batch_size=batch_size, shuffle=True, num_workers=2)
testloader_augm = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=2)

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 5
test_loss_list = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader_augm, model, loss_func, optimizer)
    test_loss, acc = test(valloader_augm, model, loss_func)
    test_loss_list.append(test_loss)
print("Val loss: ", test_loss_list)

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 98.8%, Average loss: 0.002985 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 99.1%, Average loss: 0.002687 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 99.0%, Average loss: 0.002870 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 99.1%, Average loss: 0.002806 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 99.1%, Average loss: 0.002801 

Val loss:  [0.0029850508381238923, 0.002687005309452158, 0.002869515186538854, 0.002805865121220016, 0.002801040686118075]


In [13]:
# test set with data augmentation
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 10
test_loss_list = []
test_acc = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader_augm, model, loss_func, optimizer)
    test_loss, acc = test(testloader_augm, model, loss_func)
    test_loss_list.append(test_loss)
    test_acc.append(acc)
print("Test loss: ", test_loss_list)
print("Test accuracy: ", test_acc)

Epoch 1
-------------------------------


KeyboardInterrupt: ignored