# Statistical Machine Learning - HW2
Adam Ilyas 1002010

## Convolutional Neural Network (CNN)

We will use PyTorch to train a Convolutional Neural Network (CNN) to improve classification
accuracy on the Fashion MNIST dataset. This dataset comprises 50,000 training examples and
10,000 test examples of 28x28-pixel monochrome images of various clothing items. Let us begin by
importing the libraries:

In [1]:
import numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
%matplotlib inline

There are a total of 10 classes enumerated in the following way:

In [2]:
labels = {
    0: 'T-shirt',
    1: 'Trouser',
    2: 'Pullover',
    3: 'Dress', 
    4: 'Coat',
    5: 'Sandal',
    6: 'Shirt',
    7: 'Sneaker',
    8: 'Bag',
    9: 'Angle boot'
}

In [3]:
import os
# data
data_path = './data'
if not os.path.isdir(data_path):
    os.mkdir(data_path)

train_dataset = datasets.FashionMNIST(data_path, train=True, 
                                      download=True, transform=transforms.ToTensor())
test_dataset = datasets.FashionMNIST(data_path, train=False, 
                                      download=True, transform=transforms.ToTensor())

In [4]:
print(train_dataset)
print(test_dataset)

Dataset FashionMNIST
    Number of datapoints: 60000
    Split: train
    Root Location: ./data
    Transforms (if any): ToTensor()
    Target Transforms (if any): None
Dataset FashionMNIST
    Number of datapoints: 10000
    Split: test
    Root Location: ./data
    Transforms (if any): ToTensor()
    Target Transforms (if any): None


In [None]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True);
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False);

In [None]:
fig = plt.figure(figsize=(12,12));
columns = 7;
rows = 7;
for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    
    img_no = numpy.random.randint(len(train_dataset))
    img = train_dataset[img_no][0][0]
    img_class = train_dataset[img_no][1].item()
    label = labels[img_class]
    plt.title(label)
    plt.axis('off')
    plt.imshow(img)
plt.show()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        #initialize the layers 
        self.layer1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1, 
                out_channels=16, 
                kernel_size=3, 
                stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16, 
                out_channels=32, 
                kernel_size=3, 
                stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc = nn.Linear(7*7*32, 10)
        
        
    def forward(self, x):
        # invoke the layers
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
    
        return out

In [None]:
def train(model,device,train_loader,optimizer,epoch):
    model.train()
    for batch_idx, (data,target) in enumerate(train_loader):
        data,target = data.to(device),target.to(device)
            
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), 0.001)
        
        # Foward pass
        outputs = model(data)
        loss = criterion(outputs,target)
        
        #Optimizer's step() function is used to update the weights after 
        # backpropogating the gradients
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Epoch:',epoch,',loss:',loss.item())
                     
# Define test function
# defind the variable "pred" which predicts the output and update the variable "correct" 
# to keep track of the no. of correctly classified objects to compute the accuracy of the CNN            
def test(model,device,test_loader, plot=False):
    model.eval()
    correct = 0
    exampleSet = False
    example_data = numpy.zeros([10,28,28])
    example_pred = numpy.zeros(10)
    
    with torch.no_grad():
        for data, target in test_loader:
            data,target = data.to(device), target.to(device)
            
            outputs = model(data)
            _, pred = torch.max(outputs.data,1)
            correct += (pred == target).sum()
            
            if not exampleSet:
                for i in range(10):
                    example_data[i] = data[i][0].to('cpu').numpy()
                    example_pred[i] = pred[i].to('cpu').numpy()
                exampleSet = True
    
    set_accuracy = (100*correct/len(test_loader.dataset)).item()
    print(f'Test set accuracy: {set_accuracy}%')
    
    if plot:
        fig = plt.figure(figsize=(12,6));
        for i in range(10):
            plt.subplot(2,5,i+1)
            plt.imshow(example_data[i],cmap='gray',interpolation='none')
            plt.title(labels[example_pred[i]])
            plt.axis('off')
        plt.show

In [None]:
NUM_EPOCHS = 10
LRATE = 0.001

model = CNN().to(device)
optimizer = optim.SGD(model.parameters(), lr=LRATE)

In [None]:
model

### Xavier initialization
Initizialize weights using
$$w^{(l)} \sim \mathcal{N}\left(0, \sqrt{\frac{2}{n^{(l)}+n^{(l-1)}}}\right)$$
where $n^{(l)}$ is the number of neurons in layer $l$.

This makes the variance of the activations in each layer similar to
one another.

In [None]:
# initialization function, first checks the module type,
# then applies the desired changes to the weights
def xavier_init_to_linear(m):
    if type(m) == nn.Linear:
        nn.init.uniform_(m.weight)
        
model.apply(xavier_init_to_linear)

In [None]:
for epoch in range(1,NUM_EPOCHS + 1):
    test(model,device,test_loader)
    train(model,device,train_loader,optimizer,epoch)