# CNN from scratch

## Setup


In [1]:
import torch
import torch.nn as nn
from tensorboardX import SummaryWriter
from pa2_sample_code import get_datasets

train_data, eval_data = get_datasets()

# split train set into holdout_train and holdout_eval sets
holdout_train_len = int(len(train_data) * 0.8)
holdout_eval_len = len(train_data) - holdout_train_len
holdout_train_data, holdout_eval_data = torch.utils.data.random_split(train_data, [holdout_train_len, holdout_eval_len])

## Define model

In [2]:
class CnnFromScratch(nn.Module):
    def __init__(self, n_hidden):
        super(CnnFromScratch, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=0),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=0),
            nn.Sigmoid()
        )
        
        self.predictor = nn.Sequential(
            nn.Linear(in_features=32, out_features=n_hidden),
            nn.ReLU(),
            nn.Linear(in_features=n_hidden, out_features=47)
        )
        
        self.loss_func = nn.CrossEntropyLoss(reduction='sum')
        
    def forward(self, in_data):
        img_features = self.encoder(in_data).view(in_data.size(0), 32)
        logits = self.predictor(img_features)
        return logits

    def loss(self, logits, labels):
        return self.loss_func(logits, labels) / logits.size(0)
    
    def top_k_acc(self, logits, labels, k=1):
        _, k_labels_pred = torch.topk(logits, k=k, dim=1) # shape (n, k)
        k_labels = labels.unsqueeze(dim=1).expand(-1, k) # broadcast from (n) to (n, 1) to (n, k)
        # flatten tensors for comparison
        k_labels_pred_flat = k_labels_pred.reshape(1,-1).squeeze()
        k_labels_flat = k_labels.reshape(1,-1).squeeze()
        # get num_correct in float
        num_correct = k_labels_pred_flat.eq(k_labels_flat).sum(0).float().item()
        return num_correct / labels.size(0)
        

## Refactored runner function

In [3]:
def run(model, loaders, optimizer, writer, num_epoch=10, device='cpu'):
    def run_epoch(mode):
        epoch_loss = 0.0
        epoch_top1 = 0.0
        epoch_top3 = 0.0
        for i, batch in enumerate(loaders[mode], 0):
            in_data, labels = batch
            in_data, labels = in_data.to(device), labels.to(device)

            if mode == 'train':
                optimizer.zero_grad()

            logits = model(in_data)
            batch_loss = model.loss(logits, labels)
            batch_top1 = model.top_k_acc(logits, labels, k=1)
            batch_top3 = model.top_k_acc(logits, labels, k=3)

            epoch_loss += batch_loss.item()
            epoch_top1 += batch_top1
            epoch_top3 += batch_top3

            if mode == 'train':
                batch_loss.backward()
                optimizer.step()

        # sum of all batchs / num of batches
        epoch_loss /= i + 1 
        epoch_top1 /= i + 1
        epoch_top3 /= i + 1
        
        print('epoch %d %s loss %.4f top1 %.4f top3 %.4f' % (epoch, mode, epoch_loss, epoch_top1, epoch_top3))
        # log to tensorboard
        if not (writer is None):
            writer.add_scalars('%s_loss' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_loss}, 
                         global_step=epoch)
            writer.add_scalars('%s_top1' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_top1}, 
                         global_step=epoch)
            writer.add_scalars('%s_top3' % model.__class__.__name__,
                         tag_scalar_dict={mode: epoch_top3}, 
                         global_step=epoch)
    for epoch in range(num_epoch):
        run_epoch('train')
        run_epoch('eval')

## Holdout validation for choosing hyper-parameters

In [4]:
for n_hidden in [32, 64]:
    for optim_conf in [
        {'optim':'adam', 'lr':0.001},
        {'optim':'sgd', 'lr':0.1},
        {'optim':'sgd', 'lr':0.01}
    ]:
        model = CnnFromScratch(n_hidden=n_hidden)
        if optim_conf['optim'] == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=optim_conf['lr'])
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=optim_conf['lr'])
        conf_str = str(n_hidden)+'_'+optim_conf['optim']+'_'+str(optim_conf['lr'])
        print(conf_str)
        run(
            model=model,
            loaders={
                'train': torch.utils.data.DataLoader(holdout_train_data, batch_size=32, shuffle=True),
                'eval': torch.utils.data.DataLoader(holdout_eval_data, batch_size=32, shuffle=True)
            },
            optimizer=optimizer, 
            writer=SummaryWriter('./logs/cnn_scratch/%s' % (conf_str)), 
            num_epoch=10, 
            device='cpu'
        )

32_adam_0.001
epoch 0 train loss 2.7725 top1 0.2398 top3 0.4626
epoch 0 eval loss 1.7474 top1 0.4736 top3 0.7831
epoch 1 train loss 1.4188 top1 0.5678 top3 0.8426
epoch 1 eval loss 1.2034 top1 0.6316 top3 0.8846
epoch 2 train loss 1.0835 top1 0.6607 top3 0.9008
epoch 2 eval loss 1.0265 top1 0.6805 top3 0.9085
epoch 3 train loss 0.9338 top1 0.7011 top3 0.9213
epoch 3 eval loss 0.9174 top1 0.7073 top3 0.9253
epoch 4 train loss 0.8451 top1 0.7272 top3 0.9344
epoch 4 eval loss 0.8906 top1 0.7158 top3 0.9312
epoch 5 train loss 0.7868 top1 0.7458 top3 0.9407
epoch 5 eval loss 0.7939 top1 0.7429 top3 0.9403
epoch 6 train loss 0.7403 top1 0.7581 top3 0.9461
epoch 6 eval loss 0.7539 top1 0.7598 top3 0.9457
epoch 7 train loss 0.7049 top1 0.7684 top3 0.9510
epoch 7 eval loss 0.7017 top1 0.7751 top3 0.9498
epoch 8 train loss 0.6731 top1 0.7794 top3 0.9541
epoch 8 eval loss 0.6949 top1 0.7780 top3 0.9529
epoch 9 train loss 0.6528 top1 0.7856 top3 0.9568
epoch 9 eval loss 0.6839 top1 0.7775 top3 0.9

## Training final model
Hyper-parameter Selected:
- Hidden layers: 64
- Optimization: Adam, learning rate 0.001

In [5]:
for i in range(5):
    model = CnnFromScratch(n_hidden=64)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    conf_str = 'final_'+str(i)
    print(conf_str)
    run(
        model=model,
        loaders={
            'train': torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True),
            'eval': torch.utils.data.DataLoader(eval_data, batch_size=32, shuffle=True)
        },
        optimizer=optimizer, 
        writer=SummaryWriter('./logs/cnn_scratch/%s' % (conf_str)), 
        num_epoch=20, 
        device='cpu'
    )

final_0
epoch 0 train loss 2.4859 top1 0.3039 top3 0.5359
epoch 0 eval loss 1.5378 top1 0.5255 top3 0.8149
epoch 1 train loss 1.2666 top1 0.6044 top3 0.8673
epoch 1 eval loss 1.1240 top1 0.6439 top3 0.8873
epoch 2 train loss 1.0364 top1 0.6720 top3 0.9032
epoch 2 eval loss 1.0010 top1 0.6811 top3 0.9067
epoch 3 train loss 0.9239 top1 0.7036 top3 0.9196
epoch 3 eval loss 0.9013 top1 0.7052 top3 0.9226
epoch 4 train loss 0.8480 top1 0.7281 top3 0.9297
epoch 4 eval loss 0.8534 top1 0.7227 top3 0.9271
epoch 5 train loss 0.7903 top1 0.7443 top3 0.9393
epoch 5 eval loss 0.8013 top1 0.7426 top3 0.9368
epoch 6 train loss 0.7527 top1 0.7546 top3 0.9435
epoch 6 eval loss 0.7738 top1 0.7398 top3 0.9409
epoch 7 train loss 0.7176 top1 0.7656 top3 0.9484
epoch 7 eval loss 0.7760 top1 0.7447 top3 0.9392
epoch 8 train loss 0.6937 top1 0.7722 top3 0.9511
epoch 8 eval loss 0.7055 top1 0.7669 top3 0.9505
epoch 9 train loss 0.6697 top1 0.7788 top3 0.9535
epoch 9 eval loss 0.6968 top1 0.7698 top3 0.9496
ep

epoch 2 train loss 1.0662 top1 0.6590 top3 0.9003
epoch 2 eval loss 1.0111 top1 0.6770 top3 0.9062
epoch 3 train loss 0.9445 top1 0.6979 top3 0.9179
epoch 3 eval loss 0.9109 top1 0.7088 top3 0.9217
epoch 4 train loss 0.8555 top1 0.7246 top3 0.9312
epoch 4 eval loss 0.8829 top1 0.7159 top3 0.9230
epoch 5 train loss 0.7980 top1 0.7421 top3 0.9387
epoch 5 eval loss 0.8174 top1 0.7363 top3 0.9351
epoch 6 train loss 0.7550 top1 0.7542 top3 0.9436
epoch 6 eval loss 0.7587 top1 0.7560 top3 0.9413
epoch 7 train loss 0.7235 top1 0.7645 top3 0.9469
epoch 7 eval loss 0.7397 top1 0.7623 top3 0.9441
epoch 8 train loss 0.6951 top1 0.7705 top3 0.9502
epoch 8 eval loss 0.7274 top1 0.7652 top3 0.9463
epoch 9 train loss 0.6730 top1 0.7791 top3 0.9537
epoch 9 eval loss 0.7145 top1 0.7641 top3 0.9451
epoch 10 train loss 0.6511 top1 0.7863 top3 0.9557
epoch 10 eval loss 0.6797 top1 0.7812 top3 0.9509
epoch 11 train loss 0.6357 top1 0.7906 top3 0.9583
epoch 11 eval loss 0.6651 top1 0.7825 top3 0.9514
epoch 