In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
#hyper params
num_epochs =5
batch_size = 4
learning_rate = 0.001

In [3]:


train_dataset = torchvision.datasets.MNIST(root = './data', train = True, download = True, transform = transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root ='./data', train= False, download=True, transform = transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size)

In [4]:
# implement CNN
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) #input size, ouptut size, filer(kernel) size
        self.pool = nn.MaxPool2d(2,2) # size of pool, and then the stride of the pool
        self.conv2 = nn.Conv2d(6,16,5) # conv1 ouput size = conv2 input size
        self.fc1 = nn.Linear(16*4*4, 120)
        # 16*5*5 becasue after conv2, the output size is 4(batch_size),16(num_filters),5,5(l,w)
        #we have to flatten the thing for lin reg, so thats why we change the input size
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # conv layers
        out = self.conv1(x)
        out = self.relu(out)
        out = self.pool(out)
        out = self.conv2(out)
        out = self.relu(out)
        out = self.pool(out)
        
        #flatten to pass to linear layers
        out = out.view(-1,16*4*4)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        
        return out
        

In [5]:
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

# Initialize the network
model = ConvNet().to(device)


criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)




CUDA Available:  False


In [6]:
#training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        #forward pass
        preds = model(images)
        
        #loss
        loss = criterion(preds, labels)
        
        #backward pass
        loss.backward()
        
        #update weights
        optimizer.step()
        
        optimizer.zero_grad()
        
        if (i+1)%5000 == 0:
            print(f'Epoch:  {epoch+1}/{num_epochs},  Step:  {i+1}/{len(train_loader)},  Loss:  {loss.item()}')

Epoch:  1/5,  Step:  5000/15000,  Loss:  0.21614986658096313
Epoch:  1/5,  Step:  10000/15000,  Loss:  0.30207422375679016
Epoch:  1/5,  Step:  15000/15000,  Loss:  0.029919056221842766
Epoch:  2/5,  Step:  5000/15000,  Loss:  0.07375909388065338
Epoch:  2/5,  Step:  10000/15000,  Loss:  0.0016093271551653743
Epoch:  2/5,  Step:  15000/15000,  Loss:  0.006552206818014383
Epoch:  3/5,  Step:  5000/15000,  Loss:  0.45278868079185486
Epoch:  3/5,  Step:  10000/15000,  Loss:  0.00463282223790884
Epoch:  3/5,  Step:  15000/15000,  Loss:  0.5200854539871216
Epoch:  4/5,  Step:  5000/15000,  Loss:  0.005285507533699274
Epoch:  4/5,  Step:  10000/15000,  Loss:  2.1903755623498e-05
Epoch:  4/5,  Step:  15000/15000,  Loss:  3.129231117782183e-06
Epoch:  5/5,  Step:  5000/15000,  Loss:  0.0020429722499102354
Epoch:  5/5,  Step:  10000/15000,  Loss:  0.0027493874076753855
Epoch:  5/5,  Step:  15000/15000,  Loss:  1.2665774193010293e-05


In [8]:
#test
with torch.no_grad():
    total_correct = 0
    total = 0
    class_correct = [0 for i in range(10)]
    class_total = [0 for i in range(10)]
    for images, labels in test_loader:
        outputs = model(images)
        
        _, predicted = torch.max(outputs,1)
        total+= labels.shape[0]
        total_correct += (predicted ==labels).sum().item()
        
        for i in range(batch_size):
            class_total[labels[i]]+=1
            if labels[i]== predicted[i]:
                class_correct[labels[i]]+=1

In [9]:
acc = 100.0 * total_correct/ total
print(f'Overall Accuracy:  {acc}')
class_acc =  [0 for i in range(10)]
for i in range(10):
    class_acc[i] = 100 * class_correct[i]/ class_total[i]
    print(f'{i} accuracy:  {class_acc[i]}')

Overall Accuracy:  98.79
0 accuracy:  99.59183673469387
1 accuracy:  99.20704845814979
2 accuracy:  98.54651162790698
3 accuracy:  99.20792079207921
4 accuracy:  98.87983706720978
5 accuracy:  98.87892376681614
6 accuracy:  98.12108559498957
7 accuracy:  98.5408560311284
8 accuracy:  98.870636550308
9 accuracy:  98.01783944499505


In [11]:
PATH = 'CNN_learn_pytorch_MNIST_v2.pth'

#save params
torch.save(model.state_dict(), PATH)


model = ConvNet()
model.load_state_dict(torch.load(PATH))
model.eval()



ConvNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (relu): ReLU()
)