In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

importing necessary libraries



Constructing a CNN model with two convolutional layer and two fully connected layers

In [14]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

running tensors on gpu (cuda)



In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

Preprocessing data with torch's transforms function


In [None]:
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])

downloading train and test Mnist dataset using torch's dataset function


In [5]:
dataset1 = datasets.MNIST('../data', train=True, download=True,transform=transform)
dataset2 = datasets.MNIST('../data', train=False,transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



using dataloader loading the downloaded train dataset and test dataset with a batch size of 64 and 32


In [6]:
train_loader = torch.utils.data.DataLoader(dataset1,batch_size=64)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size=32)

loading model to gpu (cuda)

using adamdelta optimizer with a learning rate of 0.1

using scheduler to increase learning rate with the gamma rate 0.7

In [9]:
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=0.1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

with epochs as 10
looping 10 times

In [15]:
epochs = 10
for i in range(epochs):
  #entering into train mode so it can track gradients
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    #loading data values to gpu (cuda)
    data, target = data.to(device), target.to(device)
    #setting optimizer with zero value
    optimizer.zero_grad()
    #passing the inputs to the model 
    output = model(data)
    #measuring loss with negative log likelihood function for real target and predicted one
    loss = F.nll_loss(output, target)
    #doing backward propogation
    loss.backward()
    #Updating weights by these gradients with respect to the learning rate
    optimizer.step()
  print(f'The loss for the epoch {i} is {loss.item()}')
  #tuning the model to into evaluation mode to turn off gradient tracking
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      test_loss += F.nll_loss(output, target, reduction='sum').item()
      #predicting evaluation loss
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'The test Accuracy is {100. * correct / len(test_loader.dataset)}')
    print(f'Correctly classified {correct} / {len(test_loader.dataset)}')
  scheduler.step()

The loss for the epoch 0 is 0.004306136630475521
The test Accuracy is 98.66
Correctly classified 9866 / 10000
The loss for the epoch 1 is 0.005632137414067984
The test Accuracy is 98.69
Correctly classified 9869 / 10000
The loss for the epoch 2 is 0.006178741808980703
The test Accuracy is 98.7
Correctly classified 9870 / 10000
The loss for the epoch 3 is 0.010759001597762108
The test Accuracy is 98.75
Correctly classified 9875 / 10000
The loss for the epoch 4 is 0.009433471597731113
The test Accuracy is 98.79
Correctly classified 9879 / 10000
The loss for the epoch 5 is 0.0025148435961455107
The test Accuracy is 98.77
Correctly classified 9877 / 10000
The loss for the epoch 6 is 0.0028364232275635004
The test Accuracy is 98.78
Correctly classified 9878 / 10000
The loss for the epoch 7 is 0.006457797717303038
The test Accuracy is 98.78
Correctly classified 9878 / 10000
The loss for the epoch 8 is 0.022438902407884598
The test Accuracy is 98.8
Correctly classified 9880 / 10000
The loss f

saving the model

In [16]:
torch.save(model.state_dict(), "mnist_cnn.pt")