In [4]:
import numpy as np
from torch import utils
from torchvision import datasets, transforms

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
#Load MNIST dataset
trainData = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()) # Transform the PIL image to tensor so that it can be used as input to the network, it converts a PIL image with shape (height, width, num_channels) to a tensor with shape (num_channels, height, width). You can check it using 
testData = datasets.MNIST('data', train=False, download=True, transform=transforms.ToTensor())
# Create data loaders for training and test sets

batchSize = 500
trainLoader = torch.utils.data.DataLoader(trainData, batch_size=batchSize, shuffle=True)
testLoader = torch.utils.data.DataLoader(testData, batch_size=batchSize, shuffle=True)

In [None]:
fig, ax = plt.subplots(1, 10, figsize=(20, 10))
for i in range(10):
    ax[i].imshow(trainData[i][0].squeeze().numpy(), cmap='gray')
    ax[i].set_title(trainData[i][1])



In [15]:
# Define neural network architecture
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [16]:
net = SimpleNet() # Create an instance of the network
criterion = nn.CrossEntropyLoss() # The CrossEntropyLoss function calculates the softmax of the predicted output and then computes the negative log-likelihood loss between the softmax probabilities and the target labels. It essentially measures how well the predicted class probabilities match the true class labels.
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5) # Using the SGD(Stochastic Gradient Descend) optimizer with learning rate 0.01 and momentum 0.5

# Train the network
numEpochs = 50
for epoch in range(numEpochs):
    runningLoss = 0.0
    for i, data in enumerate(trainLoader, 0):
        inputs, labels = data
        optimizer.zero_grad() #  to clear gradients from the previous batch before computing gradients for the current batch
        outputs = net(inputs) # forward pass to compute the output of the network
        loss = criterion(outputs, labels)
        loss.backward() # backward pass to compute gradients
        optimizer.step() # update the weights

        runningLoss += loss.item()
        if i % 10 == 9: # print the average loss every 10 iterations during the training process
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, runningLoss/10))
            runningLoss = 0.0

    # Evaluate the network on test data after each epoch of training
    correct = 0
    total = 0
    with torch.no_grad(): # to stop PyTorch from calculating gradients during the forward pass of the evaluation phase
        for data in testLoader: # iterate over the test data
            images, labels = data # get the inputs and labels
            outputs = net(images)   # forward pass to compute the output of the network
            _, predicted = torch.max(outputs.data, 1) # get the predicted class from the network's output
            total += labels.size(0) # update the total number of images
            correct += (predicted == labels).sum().item() # update the total number of correctly classified images

    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

[1,    10] loss: 2.303
[1,    20] loss: 2.295
[1,    30] loss: 2.288
[1,    40] loss: 2.281
[1,    50] loss: 2.271
[1,    60] loss: 2.264
[1,    70] loss: 2.255
[1,    80] loss: 2.244
[1,    90] loss: 2.234
[1,   100] loss: 2.222
[1,   110] loss: 2.205
[1,   120] loss: 2.191
Accuracy of the network on the 10000 test images: 39 %
[2,    10] loss: 2.177
[2,    20] loss: 2.152
[2,    30] loss: 2.132
[2,    40] loss: 2.113
[2,    50] loss: 2.081
[2,    60] loss: 2.050
[2,    70] loss: 2.022
[2,    80] loss: 1.980
[2,    90] loss: 1.947
[2,   100] loss: 1.902
[2,   110] loss: 1.856
[2,   120] loss: 1.823
Accuracy of the network on the 10000 test images: 62 %
[3,    10] loss: 1.757
[3,    20] loss: 1.711
[3,    30] loss: 1.659
[3,    40] loss: 1.598
[3,    50] loss: 1.556
[3,    60] loss: 1.490
[3,    70] loss: 1.434
[3,    80] loss: 1.398
[3,    90] loss: 1.332
[3,   100] loss: 1.289
[3,   110] loss: 1.260
[3,   120] loss: 1.203
Accuracy of the network on the 10000 test images: 75 %
[4,    