# Importing project related libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torch import optim
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#initializing network related variables
VALIDATION_SIZE = 0.2 #20 % of the train set
BATCH_SIZE = 20
NUM_WORKERS = 0
NUM_EPOCHS = 10

In [3]:
# convert data to torch.FloatTensor
transform = transforms.ToTensor()

In [6]:
#Loading the data
train_data = datasets.MNIST('../data/', download=True, train=True, transform=transform)
test_data = datasets.MNIST('../data/', download=True, train=False, transform=transform)

# Splitting the data into train & val using torch's SubsetRandomSampler

In [7]:
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(VALIDATION_SIZE * num_train))
train_index, val_index = indices[split:], indices[:split] 

In [8]:
train_sampler = SubsetRandomSampler(train_index)
val_sampler = SubsetRandomSampler(val_index)

In [9]:
train_set = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
val_set = DataLoader(train_data, sampler=val_sampler, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
test_set = DataLoader(test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

In [10]:
#Get the shape of images & Labels for trainset
images, labels = next(iter(train_set))
print(f"images shape: {images.shape}\nlabels shape: {labels.shape}")

images shape: torch.Size([20, 1, 28, 28])
labels shape: torch.Size([20])


In [11]:
#Get the sample numbers fort train,val & test set
print(f"shape of train_set : {len(train_set)}\nshape of val_set : {len(val_set)}\nshape of test_set : {len(test_set)}")

shape of train_set : 2400
shape of val_set : 600
shape of test_set : 500


# Defining Network architectures

<!-- As we are dealing with MNIST claasification problems objects are more or less of similar size and hence
1.Multilayer Perceptron architecture will do better
2.We'll also see more complex cases were MLP will not be good choice and we'll start dealing with CNNs -->

In [17]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        #defining the fuly connected layers
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 264)
        self.fc3 = nn.Linear(264, 128)
        self.fc4 = nn.Linear(128, 10)

        #dropout
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        out = self.fc4(x)
        return out

    
#initialize the NN
network = Net()
print(network)

Net(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=264, bias=True)
  (fc3): Linear(in_features=264, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


# Defining Loss & Optimizer

In [13]:
criterian = nn.CrossEntropyLoss()
optimizer = optim.SGD(params=network.parameters(), lr=0.01)

# Trainig the architecture

In [19]:
# Nevere ever forget to initialize the "optimizer.zero_grad()" before prediction 

In [14]:
for i in range(NUM_EPOCHS):
    #monitoring training loss
    train_loss =0
    
    for images, labels in train_set:
        optimizer.zero_grad()
        out = network.forward(images)
        loss = criterian(out, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
    print(f"Number of Epochs done: {i}/{NUM_EPOCHS}, Train loss: {train_loss/len(train_set)}")

Number of Epochs done: 0/10, Train loss: 1.4043731387332081
Number of Epochs done: 1/10, Train loss: 0.438463756494845
Number of Epochs done: 2/10, Train loss: 0.3099811168791105
Number of Epochs done: 3/10, Train loss: 0.23990778061018014
Number of Epochs done: 4/10, Train loss: 0.1921890571443752
Number of Epochs done: 5/10, Train loss: 0.1625956611026777
Number of Epochs done: 6/10, Train loss: 0.14017074929705511
Number of Epochs done: 7/10, Train loss: 0.12267133021348854
Number of Epochs done: 8/10, Train loss: 0.10998310694926962
Number of Epochs done: 9/10, Train loss: 0.09671217351948144


# Running model against the validation set

In [15]:
#running model against the validation set
with torch.no_grad():
    val_loss = 0
    for images, labels in val_set:
        out = network.forward(images)
        loss = criterian(out, labels)
        val_loss += loss.item()
    print(f"validation loss: {val_loss/len(val_set)}")

validation loss: 0.12192165999673307


In [16]:
images, labels = next(iter(test_set))
_, pred = torch.max(network.forward(images[0]), 1)
print(f"Predicted label : {pred.item()}\nActual label: {labels[0]}")

Predicted label : 7
Actual label: 7
