In [1]:
import torch
import torch.nn as nn
import torchvision
import torch.cuda as cuda

import torchvision.transforms as transforms
import torchvision.datasets as datasets

                the kernel may be left running.  Please let us know
                about your system (bitness, Python, etc.) at
                ipython-dev@scipy.org
  ipython-dev@scipy.org""")


## Importing the dataset and creating the DataLoaders

In [2]:
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor())
valid_dataset = datasets.MNIST('./data', train=False, download=True, transform=transforms.ToTensor())

In [3]:
train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=32)
valid_loader = torch.utils.data.DataLoader(valid_dataset, shuffle=True, batch_size=32)

## CNN Architecture

In [4]:
class MNIST_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(2)
        
        self.conv1 = nn.Conv2d(1, 32, 3, padding = 1)
        torch.nn.init.xavier_uniform_(self.conv1.weight)
        self.conv2 = nn.Conv2d(32, 32, 3, padding = 1)
        torch.nn.init.xavier_uniform_(self.conv2.weight)
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1)
        torch.nn.init.xavier_uniform_(self.conv3.weight)
        self.conv4 = nn.Conv2d(64, 64, 3, padding = 1)
        
        self.dense1 = nn.Linear(3136, 500)
        self.dense2 = nn.Linear(500, 10)
    
    def forward(self, x):
        #1st conv layer
        x = self.conv1(x)     #32x28x28 output
        x = self.relu(x)
        
        #2nd conv layer
        x = self.conv2(x) #32x28x28 output
        x = self.relu(x)
        x = self.maxpool(x) #32x14x14 output
        
        #3rd conv layer
        x = self.conv3(x) #64x14x14 output
        x = self.relu(x)

        #4th conv layer
        x = self.conv4(x) #64x14x14 output
        x = self.relu(x)
        x = self.maxpool(x) #64x7x7 output // 64,992 parameters up to this point
        
        x = x.view(-1, 3136)
        
        #1st fully connected layer
        x = self.dense1(x)
        x = self.relu(x)
        
        #2nd fully connected layer
        x = self.dense2(x)
        
        return x
        

## Creating objects, loss function and optimizer

In [5]:
mnist_net = MNIST_Net()

try:
    if cuda.is_available:
        mnist_net.cuda()
except:
    print('cuda unavailable')
    
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(mnist_net.parameters(), lr = 0.08)

cuda unavailable


## Training loop

In [6]:
train_loss = []
train_accuracy = []

valid_loss = []
valid_accuracy = []

for epoch in range(10):
    epoch_loss = 0           #keeps the count of the total loss per epoch
    accurate_predictions = 0 #keeps the count of the number of accurate predictions in the current epoch
    
    for i, data in enumerate(train_loader):
        inputs, targets = data
        
        try:
            if cuda.is_available():
                inputs = inputs.cuda()
                targets = targets.cuda()
        except:
            pass
            
        optimizer.zero_grad()
        outputs = mnist_net(inputs)
        loss = criterion(outputs, targets)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        #Evaluation of accurate predictions
        _, prediction = torch.max(outputs.data, 1)
        accurate_predictions += (prediction == targets.data).sum().numpy().astype('float64')
    
    train_loss.append(epoch_loss)
    train_accuracy.append(100.0 * accurate_predictions/len(train_loader.dataset))
    
    epoch_loss = 0
    accurate_predictions = 0
    
    for i, data in enumerate(valid_loader):
        inputs, targets = data
        
        try:
            if cuda.is_available():
                inputs = inputs.cuda()
                targets = targets.cuda()
        except: 
            pass
        
        #evaluation of the loss function
        outputs = mnist_net(inputs)
        loss = criterion(outputs, targets)
        epoch_loss += loss.item()
        
        #evaluation of accurate predictions
        _, prediction = torch.max(outputs,1)
        accurate_predictions += (prediction == targets.data).sum().numpy().astype('float64')
    
    valid_loss.append(epoch_loss)
    valid_accuracy.append(100 * accurate_predictions/len(valid_loader.dataset))
    
    print('Epoch: ', epoch+1, 'train_accuracy: ', train_accuracy[-1], 'train_loss: ', train_loss[-1])
    print('Epoch: ', epoch+1, 'valid_accuracy: ', valid_accuracy[-1], 'valid_loss: ', valid_loss[-1])
        
        
        
    

Epoch:  1 train_accuracy:  91.705 train_loss:  461.5371407456696
Epoch:  1 valid_accuracy:  98.26 valid_loss:  17.495558477938175
Epoch:  2 train_accuracy:  98.54666666666667 train_loss:  86.34943240135908
Epoch:  2 valid_accuracy:  99.0 valid_loss:  10.293900817632675
Epoch:  3 train_accuracy:  99.08833333333334 train_loss:  53.98476525768638
Epoch:  3 valid_accuracy:  98.91 valid_loss:  9.849783957004547
Epoch:  4 train_accuracy:  99.32666666666667 train_loss:  40.22226344048977
Epoch:  4 valid_accuracy:  98.88 valid_loss:  10.245033904910088
Epoch:  5 train_accuracy:  99.46666666666667 train_loss:  28.627984367311
Epoch:  5 valid_accuracy:  99.3 valid_loss:  7.006854213774204
Epoch:  6 train_accuracy:  99.62166666666667 train_loss:  22.11600709706545
Epoch:  6 valid_accuracy:  99.21 valid_loss:  7.280959911644459
Epoch:  7 train_accuracy:  99.75333333333333 train_loss:  15.469483450055122
Epoch:  7 valid_accuracy:  99.07 valid_loss:  8.173068568110466
Epoch:  8 train_accuracy:  99.7

array([0., 1.])