In [65]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            )
        self.conv2 = nn.Conv2d(
                in_channels=16,              
                out_channels=32,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            )                                  
        self.relu = nn.ReLU()                     
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)       
        output = self.fc(x)
        return output    # return x for visualization

In [66]:
model1 = CNN()
# model.parameters()
pytorch_total_params1 = sum(p.numel() for p in model1.parameters() if p.requires_grad)
pytorch_total_params1

28938

In [67]:
class MLP(nn.Module):
  def __init__(self):
    super(MLP,self).__init__()
    self.lin1 = nn.Linear(784,37)
    self.relu1 = nn.ReLU()
    self.lin2 = nn.Linear(37,10)

  def forward(self,x):
    x = x.view(x.size(0), -1)   
    x = self.lin1(x)
    x = self.relu1(x)
    x = self.lin2(x)
    return(x)

In [68]:
model2 = MLP()
# model.parameters()
pytorch_total_params2 = sum(p.numel() for p in model2.parameters() if p.requires_grad)
pytorch_total_params2

29425

In [69]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [70]:
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(root = 'data',train = True,transform = ToTensor(),download = True)
test_data = datasets.MNIST(root = 'data', train = False,transform = ToTensor())

In [71]:
print(train_data)
print(test_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [72]:
from torch.utils.data import DataLoader
# loaders = {
#     'train' : torch.utils.data.DataLoader(train_data, 
#                                           batch_size=100, 
#                                           shuffle=True, 
#                                           num_workers=1),
    
#     'test'  : torch.utils.data.DataLoader(test_data, 
#                                           batch_size=100, 
#                                           shuffle=True, 
#                                           num_workers=1),
# }
train_loader = torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1)
    
test_loader =   torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1)

In [73]:
loss_func = nn.CrossEntropyLoss() 
from torch import optim
optimizer1 = optim.Adam(model1.parameters(), lr = 0.01)
optimizer2 = optim.Adam(model2.parameters(), lr = 0.01)

In [74]:
epochs = 1 
for epoch in range(epochs):
  for i,(images,labels) in enumerate(train_loader):
    output = model1(images)
    loss = loss_func(output,labels)
    optimizer1.zero_grad()
    loss.backward()
    optimizer1.step()
    if (i%100==0):
      print ('Epoch [{}/{}], iter {}, Loss: {:.4f}' 
                       .format(epoch + 1, epochs, i + 1, loss.item()))


Epoch [1/1], iter 1, Loss: 2.3000
Epoch [1/1], iter 101, Loss: 0.1388
Epoch [1/1], iter 201, Loss: 0.0685
Epoch [1/1], iter 301, Loss: 0.0591
Epoch [1/1], iter 401, Loss: 0.0501
Epoch [1/1], iter 501, Loss: 0.0206


In [75]:
epochs = 1 
for epoch in range(epochs):
  for i,(images,labels) in enumerate(train_loader):
    output = model2(images)
    loss = loss_func(output,labels)
    optimizer2.zero_grad()
    loss.backward()
    optimizer2.step()
    if (i%100==0):
      print ('Epoch [{}/{}], iter {}, Loss: {:.4f}' 
                       .format(epoch + 1, epochs, i + 1, loss.item()))


Epoch [1/1], iter 1, Loss: 2.3145
Epoch [1/1], iter 101, Loss: 0.5158
Epoch [1/1], iter 201, Loss: 0.2758
Epoch [1/1], iter 301, Loss: 0.1298
Epoch [1/1], iter 401, Loss: 0.3769
Epoch [1/1], iter 501, Loss: 0.1630


In [76]:
correct_model1 = 0
correct_model2 = 0
for i,(images,labels) in enumerate(test_loader):
  output1 = model1(images)
  output2 = model2(images)
  model1_pred = torch.max(output1,1)[1]
  model2_pred = torch.max(output2,1)[1]
  correct_model1 += (model1_pred == labels).sum()
  correct_model2 += (model2_pred == labels).sum()

In [77]:
print(f'Accuracy of CNN is {(correct_model1*100/len(test_data)):.2f}% \n Accuracy of MLP is {(correct_model2*100/len(test_data)):.2f}% \n')

Accuracy of CNN is 98.56% 
 Accuracy of MLP is 94.49% 

