In [1]:
# Setting up the environment
import torch  
import matplotlib.pyplot as plt  
import numpy as np  
import torch.nn.functional as func  
from torch import nn  
from torchvision import datasets,transforms  

In [2]:
# Applying transforms (converting to tensor; normalizing) on our training and validation datasets
transform1=transforms.Compose([transforms.Resize((28,28)),transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])  
training_dataset=datasets.MNIST(root='./data',train=True,download=True,transform=transform1)  
validation_dataset=datasets.MNIST(root='./data',train=False,download=True,transform=transform1)  
training_loader=torch.utils.data.DataLoader(dataset=training_dataset,batch_size=100,shuffle=True) # Shuffling training data 
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=100,shuffle=False)   

In [3]:
# Instantiating a class object to define our neural network of two hidden layers; note that relu was used for the activation functions
class classification1(nn.Module):  
    def __init__(self,input_layer,hidden_layer1,hidden_layer2,output_layer):  
        super().__init__()  
        self.linear1=nn.Linear(input_layer,hidden_layer1)  
        self.linear2=nn.Linear(hidden_layer1,hidden_layer2)  
        self.linear3=nn.Linear(hidden_layer2,output_layer)  
    def forward(self,x):  
        x=func.relu(self.linear1(x))  
        x=func.relu(self.linear2(x))  
        x=self.linear3(x)  
        return x  

In [4]:
#Building the network with 10 neurons per hidden layer
#Loss function used was cross-entropy; a combo of NLLLoss and log_softmax
model=classification1(784,10,10,10)  
criteron=nn.CrossEntropyLoss()  
optimizer=torch.optim.Adam(model.parameters(),lr=0.0001)  
epochs=10 

In [6]:
#Running the model for 10 epochs and observing the model accuracy
for e in range(epochs):  
    loss=0.0  
    correct=0.0  
    for input,labels in training_loader:  
        inputs=input.view(input.shape[0],-1)  
        outputs=model(inputs)  
        loss1=criteron(outputs,labels)  
        optimizer.zero_grad()  
        loss1.backward()  
        optimizer.step()  
        _,preds=torch.max(outputs,1)  
        loss+=loss1.item()  
        correct+=torch.sum(preds==labels.data)  
    else:  
        epoch_loss=loss/len(training_loader)  
        epoch_acc=correct.float()/len(training_loader)  
        print('training_loss:{:.4f},{:.4f}'.format(epoch_loss,epoch_acc.item())) 

training_loss:1.1629,68.4067
training_loss:0.8244,77.1867
training_loss:0.6572,81.6017
training_loss:0.5683,83.8167
training_loss:0.5151,85.1167
training_loss:0.4790,86.1467
training_loss:0.4529,86.8067
training_loss:0.4334,87.3683
training_loss:0.4173,87.9150
training_loss:0.4051,88.2517


In [7]:
# Note that increasing the number of neurons would increase the number of tuneable parameters, making the model
# more sensitive to the training data. However, this comes at a computational cost and also may result in overfitting.

In [8]:
# Building a model with 150 nuerons for each hidden layer
model=classification1(784,150,150,10)  
criteron=nn.CrossEntropyLoss()  
optimizer=torch.optim.Adam(model.parameters(),lr=0.0001)  
epochs=10   
for e in range(epochs):  
    loss=0.0  
    correct=0.0  
    for input,labels in training_loader:  
        inputs=input.view(input.shape[0],-1)  
        outputs=model(inputs)  
        loss1=criteron(outputs,labels)  
        optimizer.zero_grad()  
        loss1.backward()  
        optimizer.step()  
        _,preds=torch.max(outputs,1)  
        loss+=loss1.item()  
        correct+=torch.sum(preds==labels.data)  
    else:  
        epoch_loss=loss/len(training_loader)  
        epoch_acc=correct.float()/len(training_loader)  
        print('training_loss:{:.4f},{:.4f}'.format(epoch_loss,epoch_acc.item())) 

training_loss:0.8391,80.3667
training_loss:0.3421,90.1717
training_loss:0.2863,91.6767
training_loss:0.2494,92.7517
training_loss:0.2206,93.6467
training_loss:0.1976,94.2583
training_loss:0.1792,94.8150
training_loss:0.1624,95.3050
training_loss:0.1486,95.6183
training_loss:0.1367,95.9883


In [9]:
# We got a training accuracy of 96 percent here as opposed to a simpler network's accuracy of a 88 percent.
