In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Constructing the layers of the network:

        # input: (1, 28, 28), output: (32, 28, 28), RF: 3x3
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)

        # input: (32, 28, 28), output: (64, 28, 28), RF: 5x5
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)

        # input: (64, 28, 28), output: (64, 14, 14), RF: 10x10
        self.pool1 = nn.MaxPool2d(2, 2)

        # input: (64, 14, 14), output: (128, 14, 14), RF: 12x12
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)

        # input: (128, 14, 14), output: (256, 14, 14), RF: 14x14
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)

        # input: (256, 14, 14), output: (256, 7, 7), RF: 28x28
        self.pool2 = nn.MaxPool2d(2, 2)

        # input: (256, 7, 7), output: (512, 5, 5), RF: 30x30
        self.conv5 = nn.Conv2d(256, 512, 3)

        # input: (512, 5, 5), output: (1024, 3, 3), RF: 32x32
        self.conv6 = nn.Conv2d(512, 1024, 3)

        # input: (1024, 3, 3), output: (10, 1, 1), RF: 34x34
        self.conv7 = nn.Conv2d(1024, 10, 3)

    def forward(self, x):
        """
        Defines operations to be carried out during forward prop.

        Args:
            x (Tensor): The input tensor to the model
        
        Returns:
            Tensor: a tensor of size (B, 10)
        """
        # initial shape of x: (B, 1, 28, 28)
        # shape of x: (B, 64, 14, 14)
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))

        # shape of x: (B, 256, 7, 7)
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))

        # shape of x: (B, 1024, 3, 3)
        x = F.relu(self.conv6(F.relu(self.conv5(x))))

        # shape of x: (B, 10, 1, 1)
        x = F.relu(self.conv7(x))

        # shape of x: (B, 10)
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [3]:
! pip install torchsummary
from torchsummary import summary

# checking if cuda is available for use
use_cuda = torch.cuda.is_available()

# if it is, setting the device to "cuda". else setting the device to be "cpu"
device = torch.device("cuda" if use_cuda else "cpu")

# instantiating the model, moving it to the device
model = Net().to(device)

# summarizing the model
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------

In [4]:
# setting the seed for the random number generator used by pytorch
torch.manual_seed(1)

# declaring batch size
batch_size = 128
# declaring additional arguments to be used if cuda is being used.
# num_workers: number of subprocesses needed to load data to memory. if 0, main process loads 
# data to memory.
# pin_memory: decides whether to use pinned memory or not. pinned memory is a non-paged staging area
# for data transfers between CPU and GPU, and setting this argument to True can lead to speed up.
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# creating the dataset
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        '../data', # the path to store the MNIST data
        train=True, # defining that the data is training data
        download=True, # defines that the data should be downloaded
        transform=transforms.Compose([ # defining preprocessing operations that have to be performed on the dataset
            transforms.ToTensor(), # first operation: Convert PIL image (downloaded image) to pytorch tensor
            transforms.Normalize((0.1307,), (0.3081,)) # second operation: normalize the image with mean 0.1307 and std 0.3081
        ]),
    ),
    batch_size=batch_size, # defining the batch size to be used
    shuffle=True, # defining that the data should be shuffled prior to selecting batches
    **kwargs # passing additional arguments
)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        '../data', # the path to store the MNIST data
        train=False,  # defining that the data is NOT training data
        transform=transforms.Compose([ # defining preprocessing operations that have to be performed on the dataset
            transforms.ToTensor(), # first operation: Convert PIL image (downloaded image) to pytorch tensor
            transforms.Normalize((0.1307,), (0.3081,)) # second operation: normalize the image with mean 0.1307 and std 0.3081
        ]),
    ),
    batch_size=batch_size, # defining the batch size to be used
    shuffle=True, # defining that the data should be shuffled prior to selecting batches
    **kwargs # passing additional arguments
)


In [5]:
from tqdm import tqdm

def train(model, device, train_loader, optimizer, epoch):
    """
    Performs a single train step on the model.

    Args:
        model (nn.Module): The model to train
        device (str): The device the data should be moved to
        train_loader (torch.utils.data.DataLoader): The data loader for the training data
        optimizer (torch.optim.Optimizer): The optimizer to be used
        epochs (int): The current epoch
    """

    # setting the model in train mode
    model.train()

    # creating a progress bar
    pbar = tqdm(train_loader)

    # for each batch in the dataset,
    for batch_idx, (data, target) in enumerate(pbar):
        # move the data to the device
        data, target = data.to(device), target.to(device)

        # zero out the optimizer since pytorch accumulates the gradients on subsequent backward passes
        optimizer.zero_grad()

        # perform forward pass
        output = model(data)

        # calculate the loss (loss function being used is negative log likelihood)
        loss = F.nll_loss(output, target)

        # calculate the gradients (mathematical gradients)
        loss.backward()

        # step the optimizer (updates the weights of the model)
        optimizer.step()

        # update progress bar
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    """
    Performs a single test step on the model.

    Args:
        model (nn.Module): The model to train
        device (str): The device the data should be moved to
        test_loader (torch.utils.data.DataLoader): The data loader for the test data
    """

    # setting the model in evaluation mode
    model.eval()

    # defining variables to store the test loss and the number 
    # of correctly classified images
    test_loss = 0
    correct = 0
 
    # defining a "no gradient calculation" context
    with torch.no_grad():
        # for every batch in the dataset,
        for data, target in test_loader:
            # move the data to the device
            data, target = data.to(device), target.to(device)

            # perform the forward pass
            output = model(data)
            
            # calculating test loss (summing up the losses in a batch instead of averaging them)
            test_loss += F.nll_loss(output, target, reduction='sum')

            # getting index of the element in the output tensor which has the largest value
            # the index corresponds to the number the model thinks the image is. 
            # the output tensor has a shape of (B, 10), and pred will have the shape
            # (B, 1)
            pred = output.argmax(dim=1, keepdim=True)
            
            # checking if the prediction was correct or not
            correct += pred.eq(target.view_as(pred)).sum().item()

    # dividing the test loss by the size of the dataset
    test_loss /= len(test_loader.dataset)

    # printing results
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [6]:
# defining the optimizer that is to be used
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# running training loop for 1 epoch
for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

loss=0.47023406624794006 batch_id=468: 100%|██████████| 469/469 [00:19<00:00, 23.66it/s]

Test set: Average loss: 0.5663, Accuracy: 8615/10000 (86%)

