<a href="https://colab.research.google.com/github/abialbon/pytorch-udacity-scholarship/blob/master/PyTorch/MNIST_MLP_Exercise_Revision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST MLP Exercise Revision

Things to do:
1. Get data from the MNIST dataset (I will use this so training takes a lesser time)
2. Split the data into train, validation and test sets
3. Create a neural network with 2 hidden layers
4. Train the layer
5. Calculate the training loss, validation loss and accuracy in the test set
6. Save the model that performs best

## Installing PyTorch

In [1]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision

tcmalloc: large alloc 1073750016 bytes == 0x58906000 @  0x7fdd7fa4c2a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070


## Importing the datasets

In [0]:
import torch
import numpy as np
from torchvision import datasets, transforms

n_val = 0.2

transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = datasets.MNIST('data', train=True, download=True, transform=transform)
testset = datasets.MNIST('data', train=False, download=True, transform=transform)

total_to_train = len(trainset)
idx = np.array([i for i in range(total_to_train)])
np.random.shuffle(idx)
split = int(np.floor(n_val * total_to_train))
val_idx, train_idx = idx[:split], idx[split:]

from torch.utils.data import SubsetRandomSampler
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(trainset, batch_size=32, sampler=val_sampler)
test_loader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=True)

## Creating the neural network

In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"

from torch import nn
import torch.nn.functional as F
from torch import optim

class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(0.2)
        
    
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        return x
    

model = Classifier()
print(model)
model.to(device)

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

Classifier(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


## Training & validating the model

In [22]:
epochs = 30
min_val_loss = np.Inf

for e in range(epochs):
    model.train()
    
    r_train_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        # Reseting the optimizer
        optimizer.zero_grad()
        # Forward pass
        output = model.forward(images)
        loss = criterion(output, labels)
        r_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    else:
        model.eval()
        r_val_loss = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                output = model.forward(images)
                loss = criterion(output, labels)
                r_val_loss += loss.item()
            else:
                print('Epoch: {} ---- Training loss: {:.3f} ---- Val loss: {:.3f}'.format(e+1, r_train_loss/len(train_loader), r_val_loss/len(val_loader)))
                if r_val_loss <= min_val_loss:
                    min_val_loss = r_val_loss
                    model_dict = {
                        'n_input': 784,
                        'n_hidden': [256, 128, 32],
                        'n_output': 10,
                        'state_dict': model.state_dict()
                    }
                    torch.save(model_dict, 'model.pt')
                    print('The model parameters have been saved')

Epoch: 1 ---- Training loss: 0.204 ---- Val loss: 0.186
The model parameters have been saved
Epoch: 2 ---- Training loss: 0.195 ---- Val loss: 0.150
The model parameters have been saved
Epoch: 3 ---- Training loss: 0.180 ---- Val loss: 0.135
The model parameters have been saved
Epoch: 4 ---- Training loss: 0.180 ---- Val loss: 0.141
Epoch: 5 ---- Training loss: 0.162 ---- Val loss: 0.122
The model parameters have been saved
Epoch: 6 ---- Training loss: 0.165 ---- Val loss: 0.126
Epoch: 7 ---- Training loss: 0.159 ---- Val loss: 0.122
The model parameters have been saved
Epoch: 8 ---- Training loss: 0.154 ---- Val loss: 0.142
Epoch: 9 ---- Training loss: 0.149 ---- Val loss: 0.125
Epoch: 10 ---- Training loss: 0.148 ---- Val loss: 0.180
Epoch: 11 ---- Training loss: 0.144 ---- Val loss: 0.126
Epoch: 12 ---- Training loss: 0.138 ---- Val loss: 0.122
The model parameters have been saved
Epoch: 13 ---- Training loss: 0.140 ---- Val loss: 0.131
Epoch: 14 ---- Training loss: 0.138 ---- Val l

In [26]:
# Calculating the accuracy
with torch.no_grad():
    acc = 0
    model.eval()
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        output = model.forward(images)
        ps = torch.exp(output)
        _ , y_pred = torch.max(ps, dim=1)
        equals = y_pred == labels.view(*y_pred.shape)
        equals = equals.type(torch.FloatTensor)
        acc += equals.mean().item()
    else:
        print('The accuracy: {:.2f}%'.format((acc/len(test_loader)*100)))

The accuracy: 96.91%
