In [1]:
from __future__ import print_function
from collections import namedtuple
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR

Arg = namedtuple('Arg', ["model", # type of model: CNN or ANN
                         "batch_size", # size of training batch: 64, 128
                         "test_batch_size", # size of testing batch: 1000, 2000
                         "epochs", # how many epochs?
                         "lr", # learning rate?
                         "gamma", # if use adagrad, gamma = ?
                         "seed", # seed for random
                         "log_interval", 
                         "no_cuda", # use GPU or not: True or False
                         "dry_run", # run training 1 batch (to check error)? True or False
                         "save_model", # want to store model?
                         "checkpoint", # checkpoint path to store
                         "num_class", # output classes? 10 or 26
                         "restore_ck"]) # want to restore model from checkpoint?

In [2]:
class ANN(nn.Module):
    def __init__(self, hidden_sizes, num_class):
        super(ANN, self).__init__()
        # Complete the code (in the ...)
        if len(hidden_sizes) != 2:
            raise(AttributeError)

        self.model = nn.Sequential(
            nn.Linear(28*28, hidden_sizes[0]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[0], hidden_sizes[1]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[1], num_class),
        )
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.model(x)
        return x

In [3]:
class CNN(nn.Module):
    def __init__(self, num_class):
        super(CNN, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=5),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(10),
            nn.Dropout2d(p=0.2),
            nn.ReLU(),
            nn.Conv2d(10, 20, kernel_size=5),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(20),
            nn.Dropout2d(p=0.2),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(320, 160),
            nn.BatchNorm1d(160),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(160, 50),
            nn.BatchNorm1d(50),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(50, num_class),
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [4]:
class CNN1(nn.Module):
    def __init__(self, num_class):
        super(CNN1, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=5),
            nn.MaxPool2d(2),
            nn.ReLU(),
            nn.Conv2d(10, 20, kernel_size=5),
            nn.MaxPool2d(2),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(320, 160),
            nn.ReLU(),
            nn.Linear(160, 50),
            nn.ReLU(),
            nn.Linear(50, num_class),
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [5]:
class CNN2(nn.Module):
    def __init__(self, num_class):
        super(CNN2, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=5),
            nn.MaxPool2d(2),
            nn.Dropout2d(p=0.2),
            nn.ReLU(),
            nn.Conv2d(10, 20, kernel_size=5),
            nn.MaxPool2d(2),
            nn.Dropout2d(p=0.2),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(320, 160),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(160, 50),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(50, num_class),
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [6]:
class CNN3(nn.Module):
    def __init__(self, num_class):
        super(CNN3, self).__init__()
        # Complete the code (in the ...)
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=5),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(10),
            nn.ReLU(),
            nn.Conv2d(10, 20, kernel_size=5),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(20),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(320, 160),
            nn.BatchNorm1d(160),
            nn.ReLU(),
            nn.Linear(160, 50),
            nn.BatchNorm1d(50),
            nn.ReLU(),
            nn.Linear(50, num_class),
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [7]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        #labels = torch.eye(3)[labels].to(device)
        #target = torch.eye(10)[target]
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        #loss = F.nll_loss(output, target)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args.dry_run:
                break
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [19]:
from collections import OrderedDict

"""
Parameters:
-----------
model: torch model (CNN, ANN)
checkpoint: a file path to saved checkpoint
"""
def load_model(model, checkpoint):
    model.load_state_dict(torch.load(checkpoint))

In [20]:
import time 

def main(args, train_loader, test_loader=None, use_cuda=False):
    # Training settings
    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    if args.model == "ANN":
        # fill the size of hidden layers in ...
        model = ANN([256, 128], args.num_class).to(device)
    elif args.model == "CNN":
        model = CNN(args.num_class).to(device)
    elif args.model == "CNN1":
        model = CNN1(args.num_class).to(device)
    elif args.model == "CNN2":
        model = CNN1(args.num_class).to(device)
    elif args.model == "CNN3":
        model = CNN3(args.num_class).to(device)
    else:
        print(f"wrong model {args.model}")
        raise(AttributeError)

    if args.restore_ck:
        # if restore_ck is passed, load model from this checkpoint
        load_model(model, args.restore_ck)
    
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
    
    scheduler = StepLR(optimizer, step_size=10, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        st = time.time()
        train(args, model, device, train_loader, optimizer, epoch)
        if not test_loader is None:
            test(model, device, test_loader)
        scheduler.step()
        print("Time for running epoch %i, %.2f\n" % (epoch,time.time() - st))

    if args.save_model:
        torch.save({'model_state_dict': model.state_dict()}, args.checkpoint)

In [10]:
# run this code to download MNIST dataset. If errors occur, please create a folder named data & extract MNIST.zip to it.
from six.moves import urllib
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)

def load_mnist_data(batch_size, use_cuda=False, is_train=True):
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
    kwargs = {'batch_size': batch_size}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1,
                       'pin_memory': True,
                       'shuffle': True}
        kwargs.update(cuda_kwargs)
    data = datasets.MNIST('./data', train=is_train, download=True, transform=transform)
    loader = torch.utils.data.DataLoader(data,**kwargs)
    return loader

## Problem 1. ANN implimentation on mnist
- Horay! I got 98% Accuracy!

In [11]:
args = Arg(model="ANN",
           batch_size = 1000,
           test_batch_size = 1000,
           epochs = 10,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           checkpoint = "./mnist_ann.pt",
           num_class = 10,
           restore_ck="")
# use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda = True
train_loader = load_mnist_data(args.batch_size, use_cuda, True)
test_loader = load_mnist_data(args.test_batch_size, use_cuda, False)
main(args, train_loader, test_loader, use_cuda)


Test set: Average loss: 0.2931, Accuracy: 9066/10000 (90.6600%)

Time for running epoch 1, 4.73


Test set: Average loss: 0.1485, Accuracy: 9553/10000 (95.5300%)

Time for running epoch 2, 4.51


Test set: Average loss: 0.1171, Accuracy: 9631/10000 (96.3100%)

Time for running epoch 3, 4.65


Test set: Average loss: 0.1920, Accuracy: 9373/10000 (93.7300%)

Time for running epoch 4, 4.69


Test set: Average loss: 0.0895, Accuracy: 9712/10000 (97.1200%)

Time for running epoch 5, 4.71


Test set: Average loss: 0.0785, Accuracy: 9756/10000 (97.5600%)

Time for running epoch 6, 4.80


Test set: Average loss: 0.0802, Accuracy: 9749/10000 (97.4900%)

Time for running epoch 7, 4.48


Test set: Average loss: 0.0725, Accuracy: 9785/10000 (97.8500%)

Time for running epoch 8, 4.56


Test set: Average loss: 0.0684, Accuracy: 9791/10000 (97.9100%)

Time for running epoch 9, 4.50


Test set: Average loss: 0.0640, Accuracy: 9808/10000 (98.0800%)

Time for running epoch 10, 4.62



## Problem 2. CNN implementation on minst
- Horay! I got about 99% accuracy!

In [12]:
args = Arg(model="CNN",
           batch_size = 1000,
           test_batch_size = 1000,
           epochs = 10,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           checkpoint = "./mnist_cnn.pt",
           num_class = 10,
           restore_ck="")
# use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda = True
train_loader = load_mnist_data(args.batch_size, use_cuda, True)
test_loader = load_mnist_data(args.test_batch_size, use_cuda, False)
main(args, train_loader, test_loader, use_cuda)


Test set: Average loss: 0.1326, Accuracy: 9686/10000 (96.8600%)

Time for running epoch 1, 4.41


Test set: Average loss: 0.0651, Accuracy: 9814/10000 (98.1400%)

Time for running epoch 2, 4.66


Test set: Average loss: 0.0510, Accuracy: 9837/10000 (98.3700%)

Time for running epoch 3, 4.64


Test set: Average loss: 0.0483, Accuracy: 9856/10000 (98.5600%)

Time for running epoch 4, 4.59


Test set: Average loss: 0.0448, Accuracy: 9865/10000 (98.6500%)

Time for running epoch 5, 4.73


Test set: Average loss: 0.0424, Accuracy: 9871/10000 (98.7100%)

Time for running epoch 6, 4.47


Test set: Average loss: 0.0390, Accuracy: 9882/10000 (98.8200%)

Time for running epoch 7, 4.66


Test set: Average loss: 0.0381, Accuracy: 9880/10000 (98.8000%)

Time for running epoch 8, 4.75


Test set: Average loss: 0.0382, Accuracy: 9885/10000 (98.8500%)

Time for running epoch 9, 4.66


Test set: Average loss: 0.0342, Accuracy: 9892/10000 (98.9200%)

Time for running epoch 10, 4.57



## Problem 3. Different settings
- I removed some layers starting from the CNN network used in problem 2
- CNN1: removed batchnorm and dropout layer, 98.93 accuracy
- CNN2: removed batchnorm layer, 98.88 accuracy
- CNN3: removed dropout layer, 99.16% accuracy

In [13]:
args = Arg(model="CNN1",
           batch_size = 1000,
           test_batch_size = 1000,
           epochs = 10,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           checkpoint = "./mnist_cnn1.pt",
           num_class = 10,
           restore_ck="")
# use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda = True
train_loader = load_mnist_data(args.batch_size, use_cuda, True)
test_loader = load_mnist_data(args.test_batch_size, use_cuda, False)
main(args, train_loader, test_loader, use_cuda)


Test set: Average loss: 0.2365, Accuracy: 9321/10000 (93.2100%)

Time for running epoch 1, 4.50


Test set: Average loss: 0.0951, Accuracy: 9709/10000 (97.0900%)

Time for running epoch 2, 4.52


Test set: Average loss: 0.0706, Accuracy: 9777/10000 (97.7700%)

Time for running epoch 3, 4.60


Test set: Average loss: 0.0931, Accuracy: 9729/10000 (97.2900%)

Time for running epoch 4, 4.50


Test set: Average loss: 0.0579, Accuracy: 9817/10000 (98.1700%)

Time for running epoch 5, 4.67


Test set: Average loss: 0.0440, Accuracy: 9847/10000 (98.4700%)

Time for running epoch 6, 4.58


Test set: Average loss: 0.0366, Accuracy: 9878/10000 (98.7800%)

Time for running epoch 7, 4.70


Test set: Average loss: 0.0331, Accuracy: 9888/10000 (98.8800%)

Time for running epoch 8, 4.68


Test set: Average loss: 0.0306, Accuracy: 9901/10000 (99.0100%)

Time for running epoch 9, 4.46


Test set: Average loss: 0.0337, Accuracy: 9893/10000 (98.9300%)

Time for running epoch 10, 4.58



In [15]:
args = Arg(model="CNN2",
           batch_size = 1000,
           test_batch_size = 1000,
           epochs = 10,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           checkpoint = "./mnist_cnn2.pt",
           num_class = 10,
           restore_ck="")
# use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda = True
train_loader = load_mnist_data(args.batch_size, use_cuda, True)
test_loader = load_mnist_data(args.test_batch_size, use_cuda, False)
main(args, train_loader, test_loader, use_cuda)


Test set: Average loss: 0.2274, Accuracy: 9310/10000 (93.1000%)

Time for running epoch 1, 4.70


Test set: Average loss: 0.0977, Accuracy: 9681/10000 (96.8100%)

Time for running epoch 2, 4.66


Test set: Average loss: 0.0687, Accuracy: 9777/10000 (97.7700%)

Time for running epoch 3, 4.62


Test set: Average loss: 0.0613, Accuracy: 9820/10000 (98.2000%)

Time for running epoch 4, 4.54


Test set: Average loss: 0.0695, Accuracy: 9762/10000 (97.6200%)

Time for running epoch 5, 4.58


Test set: Average loss: 0.0426, Accuracy: 9867/10000 (98.6700%)

Time for running epoch 6, 4.60


Test set: Average loss: 0.0356, Accuracy: 9874/10000 (98.7400%)

Time for running epoch 7, 4.63


Test set: Average loss: 0.0314, Accuracy: 9894/10000 (98.9400%)

Time for running epoch 8, 4.56


Test set: Average loss: 0.0305, Accuracy: 9901/10000 (99.0100%)

Time for running epoch 9, 4.61


Test set: Average loss: 0.0352, Accuracy: 9888/10000 (98.8800%)

Time for running epoch 10, 4.70



In [16]:
args = Arg(model="CNN3",
           batch_size = 1000,
           test_batch_size = 1000,
           epochs = 10,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           checkpoint = "./mnist_cnn3.pt",
           num_class = 10,
           restore_ck="")
# use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda = True
train_loader = load_mnist_data(args.batch_size, use_cuda, True)
test_loader = load_mnist_data(args.test_batch_size, use_cuda, False)
main(args, train_loader, test_loader, use_cuda)


Test set: Average loss: 0.0894, Accuracy: 9850/10000 (98.5000%)

Time for running epoch 1, 4.73


Test set: Average loss: 0.0445, Accuracy: 9880/10000 (98.8000%)

Time for running epoch 2, 4.43


Test set: Average loss: 0.0358, Accuracy: 9899/10000 (98.9900%)

Time for running epoch 3, 4.66


Test set: Average loss: 0.0315, Accuracy: 9904/10000 (99.0400%)

Time for running epoch 4, 4.56


Test set: Average loss: 0.0354, Accuracy: 9896/10000 (98.9600%)

Time for running epoch 5, 4.71


Test set: Average loss: 0.0309, Accuracy: 9903/10000 (99.0300%)

Time for running epoch 6, 4.48


Test set: Average loss: 0.0303, Accuracy: 9905/10000 (99.0500%)

Time for running epoch 7, 4.79


Test set: Average loss: 0.0300, Accuracy: 9909/10000 (99.0900%)

Time for running epoch 8, 4.62


Test set: Average loss: 0.0296, Accuracy: 9906/10000 (99.0600%)

Time for running epoch 9, 4.64


Test set: Average loss: 0.0280, Accuracy: 9916/10000 (99.1600%)

Time for running epoch 10, 4.67



## Problem 4. EMnist
- This time, I added both dropout and batchnorm layers
- Horay! I got 83.98% accuracy!

In [17]:
# create a data folder & unzip emnist dataset to it. Then, you should see two folder under data (emnist_balance, emnist_test)
args = Arg(model="CNN",
           batch_size = 3000,
           test_batch_size = 1000,
           epochs = 30,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           num_class = 26,
           checkpoint = "./emnist.pt",
           restore_ck="")

def loader(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    return img

train_data = ImageFolder(root="./data/emnist_balance", transform=transforms.ToTensor(), loader=loader)
test_data = ImageFolder(root="./data/emnist_test", transform=transforms.ToTensor(), loader=loader)
train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=True, num_workers=2)
main(args, train_loader, test_loader, use_cuda=True)


Test set: Average loss: 1.7689, Accuracy: 13059/26000 (50.2269%)

Time for running epoch 1, 3.62


Test set: Average loss: 1.0512, Accuracy: 17988/26000 (69.1846%)

Time for running epoch 2, 3.65


Test set: Average loss: 0.8759, Accuracy: 18945/26000 (72.8654%)

Time for running epoch 3, 3.64


Test set: Average loss: 0.7778, Accuracy: 19667/26000 (75.6423%)

Time for running epoch 4, 3.65


Test set: Average loss: 0.7144, Accuracy: 20071/26000 (77.1962%)

Time for running epoch 5, 3.67


Test set: Average loss: 0.6769, Accuracy: 20475/26000 (78.7500%)

Time for running epoch 6, 3.65


Test set: Average loss: 0.6525, Accuracy: 20643/26000 (79.3962%)

Time for running epoch 7, 3.57


Test set: Average loss: 0.6224, Accuracy: 20882/26000 (80.3154%)

Time for running epoch 8, 3.70


Test set: Average loss: 0.6123, Accuracy: 21019/26000 (80.8423%)

Time for running epoch 9, 3.55


Test set: Average loss: 0.6085, Accuracy: 21043/26000 (80.9346%)

Time for running epoch 10, 3.75


Test set

## Problem 5. Transfer learning
- Mnist -> Eminst using checkpoint from the problem 2
- I don't think it's proper to use network that trained using easier problem

In [22]:
args = Arg(model="CNN",
           batch_size = 3000,
           test_batch_size = 1000,
           epochs = 5,
           lr = 1.0,
           gamma = 0.7,
           seed = 1234,
           log_interval = 10,
           no_cuda = False,
           dry_run = False,
           save_model = True,
           num_class = 26,
           checkpoint = "./emnist_transfer_from_mnist_cnn.pt",
           restore_ck="./mnist_cnn.pt")

def loader(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    return img

train_data = ImageFolder(root="./data/emnist_balance", transform=transforms.ToTensor(), loader=loader)
test_data = ImageFolder(root="./data/emnist_test", transform=transforms.ToTensor(), loader=loader)
train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=True, num_workers=2)
main(args, train_loader, test_loader, use_cuda=True)

RuntimeError: Error(s) in loading state_dict for CNN:
	Missing key(s) in state_dict: "model.0.weight", "model.0.bias", "model.2.weight", "model.2.bias", "model.2.running_mean", "model.2.running_var", "model.5.weight", "model.5.bias", "model.7.weight", "model.7.bias", "model.7.running_mean", "model.7.running_var", "model.11.weight", "model.11.bias", "model.12.weight", "model.12.bias", "model.12.running_mean", "model.12.running_var", "model.15.weight", "model.15.bias", "model.16.weight", "model.16.bias", "model.16.running_mean", "model.16.running_var", "model.19.weight", "model.19.bias". 
	Unexpected key(s) in state_dict: "model_state_dict". 