# MNIST




In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets 
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [None]:
pip install -U tensorboard-plugin-profile
%load_ext tensorboard

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=True)

In [None]:
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=8,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(
            in_channels=8,
            out_channels=16,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
        torch.nn.init.xavier_uniform(self.conv1.weight)
        torch.nn.init.xavier_uniform(self.conv2.weight)
        torch.nn.init.xavier_uniform(self.fc1.weight)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)

        return x

In [None]:
in_channel = 1
num_classes = 10
num_epoch = 8

In [None]:
model = CNN().to(device)

In [None]:
batch_sizes = [64,128,256]
l_rates = [0.1, 0.01, 0.001]

In [None]:
for batch in batchs_sizes:
  for l_rate in l_rates:
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    optimise = optim.Adam(model.parameters(), lr = l_rate)
    writer = SummaryWriter('runs/MNIST/ Batch size {}, L_rate {}'.format(batch, l_rate))
    step = 0
    for epoch in range(num_epoch):
      losses = []
      for batch_id,(data, target) in tqdm(enumerate(train_loader), total=len(train_loader),
                                          position = 0, leave = False):
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        loss = criterion(output, target)
        losses.append(loss.item())

        optimise.zero_grad()
        loss.backward()
        optimise.step()

        losses.append(loss.item())
        _, correct_test = output.max(1)
        num_correct = (correct_test == target).sum()
        running_train_acc = float(num_correct)/float(data.shape[0])
        
        ''' writes scalars and plots histogram for every forward pass'''
        writer.add_histogram('layer1', model.conv1.weight)
        writer.add_histogram('layer2', model.conv2.weight)
        writer.add_histogram('layer3', model.fc1.weight)
        writer.add_scalar('training_loss', loss, global_step = step)
        writer.add_scalar('training_acc', running_train_acc, global_step = step)    
        step += 1
      
      ''' Helps in choosing best combination of hyperparameters. '''
      writer.add_hparams({'lr': l_rate, 'b_size': batch, 'loss':sum(losses)/len(losses)}) 

In [None]:
%tensorboard --logdir runs/

In [None]:
model.eval()
num_classes = 10
num_correct = 0
num_samples = 0
confusion_matrix = torch.zeros(num_classes, num_classes)
counter = 0
with torch.no_grad():
  for y_hat, y in test_loader:
    counter += 1
    y_hat = y_hat.to(device)
    y = y.to(device)
    pred = model(y_hat)
    _, correct_test = pred.max(1)
    num_correct += (correct_test == y).sum()
    num_samples += correct_test.size(0)
    for t, p in zip(y.view(-1), correct_test.view(-1)):
           confusion_matrix[t, p] += 1
  print('acc : {}'.format(num_correct/num_samples))

acc : 0.9827999472618103


In [None]:
print(confusion_matrix) # num_classes*num_classes
print(confusion_matrix.diag()/confusion_matrix.sum(1)) # TP/TP+FP - Precision for every class