# Assignment 1 - Part B


In [1]:
# torch and torchvision provide some very handy utilities for dataset loading
from torch.utils.data import DataLoader
import torchvision.datasets as tv_datasets
import torchvision.transforms as tv_transforms
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# some experimental setup
num_epochs = 2
batch_size = 32
num_workers = 2
print_every = 300

In [3]:
# prepare datasets
dataset, loader = {}, {}
for data_type in ("train", "test"):
    is_train = data_type=="train"
    dataset[data_type] = tv_datasets.MNIST(
        root="./data", train=is_train, download=True,
        transform=tv_transforms.Compose([ # preprocessing pipeline for input images
            tv_transforms.ToTensor(),
            tv_transforms.Normalize((0.1307,), (0.3081,)),
    ]))
    loader[data_type] = DataLoader(
        dataset[data_type], batch_size=batch_size, shuffle=is_train, num_workers=num_workers,
    )


In [None]:
def training(net):
    optim_adam = "SGD"
    adam_kwargs = dict(
        lr=1e-2,
        momentum = 0.9,
        weight_decay=1e-5,
    )
    
    for layer in net.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    optimizer = getattr(optim, optim_adam)(net.parameters(), **adam_kwargs)
    
    criterion = nn.CrossEntropyLoss()
    train_losses = []
    train_accuracies = []
    
    # training loop
    net.train()
    for epoch in range(num_epochs):
    
        running_loss = 0.0
        correct_train, total_train = 0,0
        for i, (img, target) in enumerate(loader["train"]):
            img, target = img.to(device), target.to(device)
    
            pred = net(img)
            loss = criterion(pred, target)
    
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            total_train += len(target)
            correct_train += (torch.argmax(pred, dim=1) == target).sum().item()
            if i % print_every == print_every - 1:
                print(f"[epoch={epoch + 1:3d}, iter={i + 1:5d}] loss: {running_loss / print_every:.3f}")
                running_loss = 0.0
        train_losses.append(running_loss / len(loader["train"]))
        train_accuracies.append(100 * correct_train / total_train)
    
    print("Finished Training")
    return net



## Evaluate

In [None]:
## same net as the one in trained cifar-10
net = nn.Sequential(
    nn.Conv2d(1, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.MaxPool2d(2), nn.Dropout(0.3),
    nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d(2), nn.Dropout(0.3),
    nn.Conv2d(256, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
    nn.Conv2d(512, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),nn.MaxPool2d(2), nn.Dropout(0.4),
    nn.Flatten(),
    nn.Linear(512 * 3 * 3, 512), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Dropout(0.5),
    nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(inplace=True), nn.Dropout(0.5),
    nn.Linear(256, 10),
)


In [9]:
def evaluate(net):
    net.eval()
    y_true, y_pred = [], []
    correct, total = 0, 0
    with torch.no_grad():
        for img, target in loader["test"]:
            img, target = img.to(device), target.to(device)
    
            pred = net(img)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(torch.argmax(pred, dim=1).cpu().numpy())
            total += len(target)
            correct += (torch.argmax(pred, dim=1) == target).sum().item()
    
    print(f"Accuracy of the network on the {total} test images: {100 * correct / total:.2f}%")


In [11]:
trained_net = training(net)
evaluate(trained_net)

[epoch=  1, iter=  300] loss: 0.567
[epoch=  1, iter=  600] loss: 0.231
[epoch=  1, iter=  900] loss: 0.176
[epoch=  1, iter= 1200] loss: 0.138
[epoch=  1, iter= 1500] loss: 0.141
[epoch=  1, iter= 1800] loss: 0.125
[epoch=  2, iter=  300] loss: 0.117
[epoch=  2, iter=  600] loss: 0.116
[epoch=  2, iter=  900] loss: 0.106
[epoch=  2, iter= 1200] loss: 0.100
[epoch=  2, iter= 1500] loss: 0.095
[epoch=  2, iter= 1800] loss: 0.094
Finished Training
Accuracy of the network on the 10000 test images: 99.22%
