# CNN from scratch

## Setup


In [1]:
import torch
import torch.nn as nn
from tensorboardX import SummaryWriter
from pa2_sample_code import get_datasets

train_data, eval_data = get_datasets()

# split train set into holdout_train and holdout_eval sets
holdout_train_len = int(len(train_data) * 0.8)
holdout_eval_len = len(train_data) - holdout_train_len
holdout_train_data, holdout_eval_data = torch.utils.data.random_split(train_data, [holdout_train_len, holdout_eval_len])

## Define model

In [2]:
class CnnFromScratch(nn.Module):
    def __init__(self, n_hidden):
        super(CnnFromScratch, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=0),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=0),
            nn.Sigmoid()
        )
        
        self.predictor = nn.Sequential(
            nn.Linear(in_features=32, out_features=n_hidden),
            nn.ReLU(),
            nn.Linear(in_features=n_hidden, out_features=47)
        )
        
        self.loss_func = nn.CrossEntropyLoss(reduction='sum')
        
    def forward(self, in_data):
        img_features = self.encoder(in_data).view(in_data.size(0), 32)
        logits = self.predictor(img_features)
        return logits

    def loss(self, logits, labels):
        return self.loss_func(logits, labels) / logits.size(0)
    
    def top_k_acc(self, logits, labels, k=1):
        _, k_labels_pred = torch.topk(logits, k=k, dim=1) # shape (n, k)
        k_labels = labels.unsqueeze(dim=1).expand(-1, k) # broadcast from (n) to (n, 1) to (n, k)
        # flatten tensors for comparison
        k_labels_pred_flat = k_labels_pred.reshape(1,-1).squeeze()
        k_labels_flat = k_labels.reshape(1,-1).squeeze()
        # get num_correct in float
        num_correct = k_labels_pred_flat.eq(k_labels_flat).sum(0).float().item()
        return num_correct / labels.size(0)
        

## Refactored runner function

In [3]:
def run(model, loaders, optimizer, writer, num_epoch=10, device='cpu'):
    def run_epoch(mode):
        epoch_loss = 0.0
        epoch_top1 = 0.0
        epoch_top3 = 0.0
        for i, batch in enumerate(loaders[mode], 0):
            in_data, labels = batch
            in_data, labels = in_data.to(device), labels.to(device)

            if mode == 'train':
                optimizer.zero_grad()

            logits = model(in_data)
            batch_loss = model.loss(logits, labels)
            batch_top1 = model.top_k_acc(logits, labels, k=1)
            batch_top3 = model.top_k_acc(logits, labels, k=3)

            epoch_loss += batch_loss.item()
            epoch_top1 += batch_top1
            epoch_top3 += batch_top3

            if mode == 'train':
                batch_loss.backward()
                optimizer.step()

        # sum of all batchs / num of batches
        epoch_loss /= i + 1 
        epoch_top1 /= i + 1
        epoch_top3 /= i + 1
        
        print('epoch %d %s loss %.4f top1 %.4f top3 %.4f' % (epoch, mode, epoch_loss, epoch_top1, epoch_top3))
        # log to tensorboard
        if not (writer is None):
            writer.add_scalars('%s_loss' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_loss}, 
                         global_step=epoch)
            writer.add_scalars('%s_top1' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_top1}, 
                         global_step=epoch)
            writer.add_scalars('%s_top3' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_top3}, 
                         global_step=epoch)
    for epoch in range(num_epoch):
        run_epoch('train')
        run_epoch('eval')

## Holdout validation for choosing hyper-parameters

In [4]:
for n_hidden in [32, 64]:
    for optim_conf in [
        {'optim':'adam', 'lr':0.001},
        {'optim':'sgd', 'lr':0.1},
        {'optim':'sgd', 'lr':0.01}
    ]:
        model = CnnFromScratch(n_hidden=n_hidden)
        if optim_conf['optim'] == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=optim_conf['lr'])
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=optim_conf['lr'])
        conf_str = str(n_hidden)+'_'+optim_conf['optim']+'_'+str(optim_conf['lr'])
        print(conf_str)
        run(
            model=model,
            loaders={
                'train': torch.utils.data.DataLoader(holdout_train_data, batch_size=32, shuffle=True),
                'eval': torch.utils.data.DataLoader(holdout_eval_data, batch_size=32, shuffle=True)
            },
            optimizer=optimizer, 
            writer=SummaryWriter('./logs/cnn_scratch/%s' % (conf_str)), 
            num_epoch=10, 
            device='cpu'
        )

32_adam_0.001
epoch 0 train loss 2.8782 top1 0.2135 top3 0.4182
epoch 0 eval loss 2.0605 top1 0.3944 top3 0.6891
epoch 1 train loss 1.7449 top1 0.4777 top3 0.7673
epoch 1 eval loss 1.5698 top1 0.5270 top3 0.8091
epoch 2 train loss 1.3929 top1 0.5760 top3 0.8448
epoch 2 eval loss 1.3536 top1 0.5829 top3 0.8504
epoch 3 train loss 1.1874 top1 0.6328 top3 0.8810
epoch 3 eval loss 1.2666 top1 0.6076 top3 0.8664
epoch 4 train loss 1.0622 top1 0.6669 top3 0.9001
epoch 4 eval loss 1.0640 top1 0.6694 top3 0.8981
epoch 5 train loss 0.9827 top1 0.6890 top3 0.9120
epoch 5 eval loss 0.9844 top1 0.6887 top3 0.9129
epoch 6 train loss 0.9265 top1 0.7039 top3 0.9193
epoch 6 eval loss 0.9332 top1 0.7059 top3 0.9209
epoch 7 train loss 0.8821 top1 0.7181 top3 0.9268
epoch 7 eval loss 0.9212 top1 0.7084 top3 0.9198
epoch 8 train loss 0.8477 top1 0.7283 top3 0.9313
epoch 8 eval loss 0.8868 top1 0.7211 top3 0.9238
epoch 9 train loss 0.8172 top1 0.7363 top3 0.9355
epoch 9 eval loss 0.8360 top1 0.7324 top3 0.9

## Training final model
Hyper-parameter Selected:
- Hidden layer size: 32
- Optimization: Adam, learning rate 0.001

In [5]:
for i in range(5):
    model = CnnFromScratch(n_hidden=64)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    conf_str = 'final_'+str(i)
    print(conf_str)
    run(
        model=model,
        loaders={
            'train': torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True),
            'eval': torch.utils.data.DataLoader(eval_data, batch_size=32, shuffle=True)
        },
        optimizer=optimizer, 
        writer=SummaryWriter('./logs/cnn_scratch/%s' % (conf_str)), 
        num_epoch=20, 
        device='cpu'
    )

final_0
epoch 0 train loss 2.3090 top1 0.3533 top3 0.5909
epoch 0 eval loss 1.4254 top1 0.5661 top3 0.8350
epoch 1 train loss 1.1889 top1 0.6300 top3 0.8812
epoch 1 eval loss 1.0889 top1 0.6597 top3 0.8972
epoch 2 train loss 0.9718 top1 0.6904 top3 0.9136
epoch 2 eval loss 0.9607 top1 0.6961 top3 0.9144
epoch 3 train loss 0.8717 top1 0.7208 top3 0.9273
epoch 3 eval loss 0.8733 top1 0.7206 top3 0.9282
epoch 4 train loss 0.8051 top1 0.7394 top3 0.9362
epoch 4 eval loss 0.7971 top1 0.7448 top3 0.9377
epoch 5 train loss 0.7500 top1 0.7570 top3 0.9437
epoch 5 eval loss 0.7634 top1 0.7581 top3 0.9405
epoch 6 train loss 0.7106 top1 0.7676 top3 0.9488
epoch 6 eval loss 0.7768 top1 0.7474 top3 0.9392
epoch 7 train loss 0.6789 top1 0.7786 top3 0.9528
epoch 7 eval loss 0.6929 top1 0.7730 top3 0.9539
epoch 8 train loss 0.6528 top1 0.7849 top3 0.9560
epoch 8 eval loss 0.6798 top1 0.7782 top3 0.9512
epoch 9 train loss 0.6323 top1 0.7897 top3 0.9572
epoch 9 eval loss 0.6597 top1 0.7864 top3 0.9540
ep

epoch 2 train loss 0.9588 top1 0.6952 top3 0.9166
epoch 2 eval loss 0.9060 top1 0.7110 top3 0.9229
epoch 3 train loss 0.8690 top1 0.7211 top3 0.9289
epoch 3 eval loss 0.8761 top1 0.7222 top3 0.9285
epoch 4 train loss 0.8084 top1 0.7390 top3 0.9375
epoch 4 eval loss 0.8181 top1 0.7407 top3 0.9345
epoch 5 train loss 0.7622 top1 0.7541 top3 0.9424
epoch 5 eval loss 0.7863 top1 0.7499 top3 0.9406
epoch 6 train loss 0.7227 top1 0.7646 top3 0.9471
epoch 6 eval loss 0.7270 top1 0.7640 top3 0.9468
epoch 7 train loss 0.6940 top1 0.7725 top3 0.9519
epoch 7 eval loss 0.7259 top1 0.7651 top3 0.9454
epoch 8 train loss 0.6656 top1 0.7816 top3 0.9540
epoch 8 eval loss 0.6868 top1 0.7757 top3 0.9504
epoch 9 train loss 0.6454 top1 0.7865 top3 0.9561
epoch 9 eval loss 0.6703 top1 0.7788 top3 0.9546
epoch 10 train loss 0.6289 top1 0.7907 top3 0.9593
epoch 10 eval loss 0.6665 top1 0.7819 top3 0.9533
epoch 11 train loss 0.6077 top1 0.7985 top3 0.9627
epoch 11 eval loss 0.6397 top1 0.7895 top3 0.9572
epoch 