## Model: Convolutional Neural Network
### Dataset - MNIST

Models included: 

| Model A | Model B   | Model C |
| ----- | ---- |
|  ReLU | ReLU | ReLU |
| 2 Convolution Layers | 2 Convolution Layers | 3 Convolution Layers |
| 2 Hidden Layers | 1 Hidden Layers | 3 Hidden Layers | 
| 91.58% | 95.76% | 96.13 |

### Step 0. Necessary Imports

In [9]:
#Necessary imports
import torch
import numpy as np
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

### Step 1. Load Dataset

In [10]:
train_dataset = dsets.MNIST(root='./MNISTdata',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = dsets.MNIST(root='./MNISTdata',
                          train=False,
                          transform=transforms.ToTensor())

### 2. Make Dataset iterable

In [11]:
batch_size = 100
no_iters = 12000
no_epochs = no_iters / ( len(train_dataset) / batch_size )
no_epochs = int(no_epochs)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

### Model A: 2 Layer CNN along with maxpool downscalling with Batch Normalization + 2 Connected Layers
#### Optimizer - SGD + Nesterov Momentum

In [12]:
#### 3. Create Model Class
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        #Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.norm1 = nn.BatchNorm2d(32)
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.norm2 = nn.BatchNorm2d(64)
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        #FNN 1
        self.fc1 = nn.Linear(64*7*7, 64*7*7)
        self.relu3 = nn.ReLU()
        
        #Linear
        self.fc2 = nn.Linear(64*7*7, 10)
        
    def forward(self,x):
        #Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.norm1(out)
        
        #Max pool 1
        out = self.maxpool1(out)
        
        #Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.norm2(out)
        
        #Max pool 2
        out = self.maxpool2(out)
        
        #Resize
        #original size: (100, 64, 7, 7)
        #out.size(0): 100
        #New out size: (100, 64*7*7)
        out = out.view(out.size(0), -1)
        
        #FNN 1
        out = self.fc1(out)
        out = self.relu3(out)
        
        #Linear
        out = self.fc2(out)
        
        return out
    
#### 4. Instantiate Model Class
if torch.cuda.is_available():
    model =CNNModel().cuda()
else:
    model = CNNModel()
    
#### 5. Instantiate Loss Class
criterion = nn.CrossEntropyLoss()

#### 6. Instantiate Optimizer Class
learning_rate = 0.01
moment = 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = moment, nesterov = True)

#### 7. Train Model
iter = 1
for epoch in range(no_epochs):
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    for i,(images,labels) in enumerate(train_loader):
        #Variables
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
            
        #Clear Gradient
        optimizer.zero_grad()
        
        #Outpus
        outputs = model(images)
        
        #Calculate loss: softmax -> cross entropy loss
        loss = criterion(outputs, labels)
        
        #Generate gradients w.r.t. parameters
        loss.backward()
        
        #Update Parameters
        optimizer.step()
        
        iter += 1
        if iter %2000 ==0:
            #Calculate Accuracy
            correct=0
            total=0
            train_accuracy = 0
            test_accuracy = 0
            
            #Train Accuracy
            for images,labels in train_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            train_accuracy = 100 * float(correct) / total
            correct=0
            total=0
            
            #Test Accuracy
            for images,labels in test_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            test_accuracy = 100 * float(correct) / total
            
            print('Iteration: {}, Loss: {}, Train Accuracy: {}, Test Accuracy: {}'.format(iter, loss.item(), train_accuracy, test_accuracy))

Iteration: 2000, Loss: 0.03746171295642853, Train Accuracy: 99.855, Test Accuracy: 99.2
Iteration: 4000, Loss: 0.00221843714825809, Train Accuracy: 99.99666666666667, Test Accuracy: 99.26
Iteration: 6000, Loss: 0.0008015346247702837, Train Accuracy: 100.0, Test Accuracy: 99.32
Iteration: 8000, Loss: 0.00014590263890568167, Train Accuracy: 100.0, Test Accuracy: 99.33
Iteration: 10000, Loss: 0.00012578963651321828, Train Accuracy: 100.0, Test Accuracy: 99.34
Iteration: 12000, Loss: 9.32502734940499e-05, Train Accuracy: 100.0, Test Accuracy: 99.32


### Model B: 2 Layer CNN along with maxpool downscalling with Batch Normalization + 1 Connected Layers
#### Optimizer - SGD + Nesterov Momentum

In [13]:
#### 3. Create Model Class
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        #Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.norm1 = nn.BatchNorm2d(32)
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.norm2 = nn.BatchNorm2d(64)
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        #Linear
        self.fc = nn.Linear(64*7*7, 10)
        
    def forward(self,x):
        #Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.norm1(out)
        
        #Max pool 1
        out = self.maxpool1(out)
        
        #Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.norm2(out)
        
        #Max pool 2
        out = self.maxpool2(out)
        
        #Resize
        #original size: (100, 64, 7, 7)
        #out.size(0): 100
        #New out size: (100, 64*7*7)
        out = out.view(out.size(0), -1)
        
        #Linear
        out = self.fc(out)
        
        return out
    
#### 4. Instantiate Model Class
if torch.cuda.is_available():
    model =CNNModel().cuda()
else:
    model = CNNModel()
    
#### 5. Instantiate Loss Class
criterion = nn.CrossEntropyLoss()

#### 6. Instantiate Optimizer Class
learning_rate = 0.01
moment = 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = moment, nesterov = True)

#### 7. Train Model
iter = 1
for epoch in range(no_epochs):
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    for i,(images,labels) in enumerate(train_loader):
        #Variables
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
            
        #Clear Gradient
        optimizer.zero_grad()
        
        #Outpus
        outputs = model(images)
        
        #Calculate loss: softmax -> cross entropy loss
        loss = criterion(outputs, labels)
        
        #Generate gradients w.r.t. parameters
        loss.backward()
        
        #Update Parameters
        optimizer.step()
        
        iter += 1
        if iter %2000 ==0:
            #Calculate Accuracy
            correct=0
            total=0
            train_accuracy = 0
            test_accuracy = 0
            
            #Train Accuracy
            for images,labels in train_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            train_accuracy = 100 * float(correct) / total
            correct=0
            total=0
            
            #Test Accuracy
            for images,labels in test_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            test_accuracy = 100 * float(correct) / total
            
            print('Iteration: {}, Loss: {}, Train Accuracy: {}, Test Accuracy: {}'.format(iter, loss.item(), train_accuracy, test_accuracy))

Iteration: 2000, Loss: 0.013690710067749023, Train Accuracy: 99.70166666666667, Test Accuracy: 99.05
Iteration: 4000, Loss: 0.0013229942414909601, Train Accuracy: 99.97833333333334, Test Accuracy: 99.13
Iteration: 6000, Loss: 0.000182514195330441, Train Accuracy: 100.0, Test Accuracy: 99.21
Iteration: 8000, Loss: 0.0003331089101266116, Train Accuracy: 100.0, Test Accuracy: 99.23
Iteration: 10000, Loss: 0.00012549399980343878, Train Accuracy: 100.0, Test Accuracy: 99.22
Iteration: 12000, Loss: 0.00011435509077273309, Train Accuracy: 100.0, Test Accuracy: 99.23


### Model C: 3 Layer CNN along with maxpool downscalling and Batch Normalization + 2 Connected Layers
#### Optimizer - SGD + Nesterov Momentum

In [14]:
#### 3. Create Model Class
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        #Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.norm1 = nn.BatchNorm2d(16)
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=1)
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.norm2 = nn.BatchNorm2d(32)
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        #Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3,stride=1,padding=1)
        self.relu3 = nn.ReLU()
        self.norm3 = nn.BatchNorm2d(64)
        
        #Max pool 3
        self.maxpool3=nn.MaxPool2d(kernel_size=2)

        #FNN 1
        self.fc1 = nn.Linear(64*6*6, 64*6*6)
        self.relu4 = nn.ReLU()
        
        #Linear
        self.fc2 = nn.Linear(64*6*6, 10)
        
    def forward(self,x):
        #Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.norm1(out)
        
        #Max pool 1
        out = self.maxpool1(out)
        
        #Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.norm2(out)
        
        #Max pool 2
        out = self.maxpool2(out)
        
        #Convolution 3
        out = self.cnn3(out)
        out = self.relu3(out)
        out = self.norm3(out)
        
        #Max pool 3
        out = self.maxpool3(out)
        
        #Resize
        #original size: (100, 64, 6, 6)
        #out.size(0): 100
        #New out size: (100, 64*6*6)
        out = out.view(out.size(0), -1)
        #FNN 1
        out = self.fc1(out)
        out = self.relu4(out)
        
        #Linear
        out = self.fc2(out)
        
        return out
    
#### 4. Instantiate Model Class
if torch.cuda.is_available():
    model =CNNModel().cuda()
else:
    model = CNNModel()
    
#### 5. Instantiate Loss Class
criterion = nn.CrossEntropyLoss()

#### 6. Instantiate Optimizer Class
learning_rate = 0.01
moment = 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = moment, nesterov = True)

#### 7. Train Model
iter = 1
for epoch in range(no_epochs):
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    for i,(images,labels) in enumerate(train_loader):
        #Variables
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
            
        #Clear Gradient
        optimizer.zero_grad()
        
        #Outpus
        outputs = model(images)
        
        #Calculate loss: softmax -> cross entropy loss
        loss = criterion(outputs, labels)
        
        #Generate gradients w.r.t. parameters
        loss.backward()
        
        #Update Parameters
        optimizer.step()
        
        iter += 1
        if iter %2000 ==0:
            #Calculate Accuracy
            correct=0
            total=0
            train_accuracy = 0
            test_accuracy = 0
            
            #Train Accuracy
            for images,labels in train_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            train_accuracy = 100 * float(correct) / total
            correct=0
            total=0
            
            #Test Accuracy
            for images,labels in test_loader:
                if torch.cuda.is_available():
                    images = images.cuda()
                #outputs
                outputs = model(images)
                
                #Get predictions from maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                #Total number of labels
                total += labels.size(0)
                
                #Total Correct Labels
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                    
            test_accuracy = 100 * float(correct) / total
            
            print('Iteration: {}, Loss: {}, Train Accuracy: {}, Test Accuracy: {}'.format(iter, loss.item(), train_accuracy, test_accuracy))

Iteration: 2000, Loss: 0.008390073664486408, Train Accuracy: 99.66, Test Accuracy: 99.25
Iteration: 4000, Loss: 0.003684034338220954, Train Accuracy: 99.94833333333334, Test Accuracy: 99.26
Iteration: 6000, Loss: 0.00016717911057639867, Train Accuracy: 99.99833333333333, Test Accuracy: 99.37
Iteration: 8000, Loss: 0.00014894484775140882, Train Accuracy: 100.0, Test Accuracy: 99.33
Iteration: 10000, Loss: 0.00033684729714877903, Train Accuracy: 100.0, Test Accuracy: 99.37
Iteration: 12000, Loss: 7.460594497388229e-05, Train Accuracy: 100.0, Test Accuracy: 99.35
