In [None]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
import timeit

In [None]:
train = torchvision.datasets.MNIST('./data', train = True, download = True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,),(0.3081,))]))
train_loader = torch.utils.data.DataLoader(train, batch_size = 50, shuffle = True)
print(type(train_loader))

In [None]:
test = torchvision.datasets.MNIST('./data', train = False, download = True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,),(0.3081,))]))
test_loader = torch.utils.data.DataLoader(test, batch_size = 50, shuffle = True)

## 1. Create a CNN model:

In [None]:
class Cnn_model(nn.Module):
    def __init__(self):
        super(Cnn_model, self).__init__()
    
        # Convolution 1
        self.cnn1 = nn.Conv2d(1, 16, [5, 5], [1, 1], [0, 0], [1, 1])
        self.relu1 = nn.ReLU()
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d([2, 2], [1, 1], [0, 0], [1, 1])
     
        # Convolution 2
        self.cnn2 = nn.Conv2d(16, 32, [5, 5], [1, 1], [0, 0], [1, 1])
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d([2, 2], [2, 2], [0, 0], [1, 1])
        
        # Fully connected 1
        self.fc1 = nn.Linear(2592, 50)
        self.relu3 = nn.ReLU()
        
        
        # Fully connected 2
        self.fc2 = nn.Linear(50, 10)
        
#         Forward Propagation Layer
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max pool 2 
        out = self.maxpool2(out)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        out = self.relu3(out)
        
        out = self.fc2(out)
        
        return out

## 2. Train the CNN model: 

In [None]:
epochs = 10

model = Cnn_model()

# Cross Entropy Loss Function
error = nn.CrossEntropyLoss()

# SGD Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
# CNN model training
count = 0
loss_list = []
iteration_list = []
accuracy_list = []

for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # Clear gradients
        optimizer.zero_grad()
        
        # Forward propagation
        outputs = model(images)
        
        # Calculate softmax and ross entropy loss
        loss = error(outputs, labels)
        
        # Calculating gradients
        loss.backward()
        
        # Update parameters
        optimizer.step()
        count += 1
        if count % 2000 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                
                # Forward propagation
                outputs = model(images)
                
                # Get predictions from the maximum value
                predicted = torch.max(outputs.data, 1)[1]
                
                # Total number of labels
                total += len(labels)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / float(total)
            
            # store loss and iteration
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)
            if count % 2000 == 0:
                # Print Loss
                print('Iteration: {}  Loss: {}  Accuracy: {} %'.format(count, [loss.data], accuracy))

In [None]:
# visualization loss 
plt.plot(iteration_list,loss_list)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
# plt.ylim((0.0,0.001))
plt.title("CNN: Loss vs Number of iteration")
plt.show()

# visualization accuracy 
plt.plot(iteration_list,accuracy_list,color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
# plt.ylim((99,99.7))
plt.title("CNN: Accuracy vs Number of iteration")
plt.show()

### 3. Train the CNN model by changing the optimization algorithm:

In [None]:
for num in range(1,4):
    model_for_optimizers = Cnn_model()
    if num == 1:
        new_optimizer = torch.optim.SGD(model_for_optimizers.parameters(), lr=0.1, momentum = 0.5)
    elif num == 2:
        new_optimizer = torch.optim.Adam(model_for_optimizers.parameters(), lr=0.1)
    elif num == 3:
        new_optimizer = torch.optim.RMSprop(model_for_optimizers.parameters(), lr=0.1)
    
    count = 0
    loss_list = []
    iteration_list = []
    accuracy_list = []
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(train_loader):        
            labels = Variable(labels)                    
            new_optimizer.zero_grad()            
            outputs = model_for_optimizers(images)
            loss = error(outputs, labels)
            loss.backward()
            new_optimizer.step()
            count += 1
            if count % 2000 == 0:                         
                correct = 0
                total = 0                
                for images, labels in test_loader:                                    
                    outputs = model_for_optimizers(images)                                    
                    predicted = torch.max(outputs.data, 1)[1]                                    
                    total += len(labels)                
                    correct += (predicted == labels).sum()            
                accuracy = 100 * correct / float(total)                            
                loss_list.append(loss.data)
                iteration_list.append(count)
                accuracy_list.append(accuracy)
    if num == 1:
        print("CNN: GD with momentum: ")
    elif num == 2:
        print("CNN: MSProp: ")
    elif num == 3:
        print("CNN: Adam optimizers: ")            
    
    plt.plot(iteration_list,loss_list)
    plt.xlabel("Number of iteration")
    plt.ylabel("Loss")
    plt.show()
    
    plt.plot(iteration_list,accuracy_list,color = "red")
    plt.xlabel("Number of iteration")
    plt.ylabel("Accuracy")
    plt.show()

The best optimizer for this problem is SGD with momentum = 0.5

### 4. Train the CNN model by changing minibatch size:

In [None]:
batch_sizes = [1,100,1000,10000]

for batch in batch_sizes:
    train_var_batch = torch.utils.data.DataLoader(train, batch_size = batch, shuffle = True)
    test_var_batch = torch.utils.data.DataLoader(test, batch_size = batch, shuffle = True)
    
    epochs = 10
    model_for_batch = Cnn_model()
    error = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model_for_batch.parameters(), lr=0.1, momentum= 0.5)
    count = 0
    loss_list = []
    iteration_list = []
    accuracy_list = []
    time = 0
    for epoch in range(epochs):
        start = timeit.timeit()
        for i, (images, labels) in enumerate(train_var_batch):        
            labels = Variable(labels)
            optimizer.zero_grad()
            outputs = model_for_batch(images)
            loss = error(outputs, labels)
            loss.backward()
            optimizer.step()
            count += 1
            if count % 2000 == 0:
                correct = 0
                total = 0
                for images, labels in test_var_batch:                            
                    outputs = model_for_batch(images)                            
                    predicted = torch.max(outputs.data, 1)[1]                        
                    total += len(labels)                
                    correct += (predicted == labels).sum()
                accuracy = 100 * correct / float(total)
                loss_list.append(loss.data)
                iteration_list.append(count)
                accuracy_list.append(accuracy)
        end = timeit.timeit()
        time += (end-start)
    print("Average time taken per epoch for batch size ",batch," is: ", time/10)
    print("\nBatch Size: ",batch)
    plt.plot(iteration_list,loss_list)
    plt.xlabel("Number of iteration")
    plt.ylabel("Loss")
    plt.title("CNN: Loss vs Number of iteration")
    plt.show()

    plt.plot(iteration_list,accuracy_list,color = "red")
    plt.xlabel("Number of iteration")
    plt.ylabel("Accuracy")
    plt.title("CNN: Accuracy vs Number of iteration")
    plt.show()

The larger minibatch runs faster than smaller batch sizes.

### 5. Train the CNN model by changing learning rates:

In [None]:
learning_rates = [0.1,0.01,0.001,0.0001]
for rate in learning_rates:
    model_for_learning_rate = Cnn_model()
    optimizer_var_lr = torch.optim.SGD(model_for_learning_rate.parameters(), lr=rate, momentum = 0.5)
    count = 0
    loss_list = []
    iteration_list = []
    accuracy_list = []
    
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(train_loader):                            
            optimizer_var_lr.zero_grad()            
            outputs = model_for_learning_rate(images)
            loss = error(outputs, labels)
            loss.backward()
            optimizer_var_lr.step()
            count += 1
            if count % 2000 == 0:                         
                correct = 0
                total = 0                
                for images, labels in test_loader:                                    
                    outputs = model_for_learning_rate(images)                                    
                    predicted = torch.max(outputs.data, 1)[1]                                    
                    total += len(labels)                
                    correct += (predicted == labels).sum()            
                accuracy = 100 * correct / float(total)                            
                loss_list.append(loss.data)
                iteration_list.append(count)
                accuracy_list.append(accuracy)
    print("Learning Rate: ",rate)
    plt.plot(iteration_list,loss_list)
    plt.xlabel("Number of iteration")
    plt.ylabel("Loss")
    plt.title("CNN: Loss vs Number of iteration")
    plt.show()

    plt.plot(iteration_list,accuracy_list,color = "red")
    plt.xlabel("Number of iteration")
    plt.ylabel("Accuracy")
    plt.title("CNN: Accuracy vs Number of iteration")
    plt.show()

The best learning rate in terms of convergence is 0.1 since it reaches the maximum value faster and does not deviates much after that.