<a href="https://colab.research.google.com/github/ishandahal/stats453-deep_learning_torch/blob/main/Conv/Network_in_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Network in Network CIFAR-10 Classifier
based on 
- Lin, Min, Qiang Chen, and Shuicheng Yan. "Network in network." arXiv preprint arXiv:1312.4400 (2013).

### Imports

In [None]:
import time


import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Subset

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## Model Settings 

In [None]:
### SETTINGS
##########################

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 256
NUM_EPOCHS = 50

# Architecture
NUM_CLASSES = 10

# Other
DEVICE = "cuda:0"
GRAYSCALE = False

In [None]:
### CIFAR-10 Dataset

train_indices = torch.arange(49000)
valid_indices = torch.arange(49000, 50000)

train_and_valid = datasets.CIFAR10(root='data',
                                   train=True,
                                   transform=transforms.ToTensor(),
                                   download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)

test_dataset = datasets.CIFAR10(root='data',
                                train='False',
                                transform=transforms.ToTensor())

## DataLoader 

train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=4,
                          shuffle=True)

valid_loader = DataLoader(valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=4)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4)

### Checking dataset 

for image, labels in train_loader:
    print("Image batch dimensions: ", image.size())
    print("Labels batch dimensions: ", labels.size())
    break

for image, labels in valid_loader:
    print("Image batch dimensions: ", image.size())
    print("Labels batch dimensions: ", labels.size())
    break

for image, labels in test_loader:
    print("Image batch dimensions: ", image.size())
    print("Labels batch dimensions: ", labels.size())
    break

Files already downloaded and verified
Image batch dimensions:  torch.Size([256, 3, 32, 32])
Labels batch dimensions:  torch.Size([256])
Image batch dimensions:  torch.Size([256, 3, 32, 32])
Labels batch dimensions:  torch.Size([256])
Image batch dimensions:  torch.Size([256, 3, 32, 32])
Labels batch dimensions:  torch.Size([256])


In [None]:
### Model 

class NiN(nn.Module):
    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.classes = num_classes
        self.classifier = nn.Sequential(
            nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Conv2d(160, 96, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Dropout(0.5),

            nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
            nn.Dropout(0.5),

            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 10, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=8, stride=1, padding=0),

        )
    
    def forward(self, x):
        x = self.classifier(x)
        logits = x.view(x.size(0), x.size(1))
        probas = torch.softmax(logits, dim=1)
        return logits, probas

In [None]:
torch.manual_seed(RANDOM_SEED)

model = NiN(NUM_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):
            
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100
    

start_time = time.time()

# use random seed for reproducibility (here batch shuffling)
torch.manual_seed(RANDOM_SEED)

for epoch in range(NUM_EPOCHS):
    
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
    
        ### PREPARE MINIBATCH
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 120:
            print (f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
                   f'Batch {batch_idx:03d}/{len(train_loader):03d} |' 
                   f' Cost: {cost:.4f}')

    # no need to build the computation graph for backprop when computing accuracy
    with torch.set_grad_enabled(False):
        train_acc = compute_accuracy(model, train_loader, device=DEVICE)
        valid_acc = compute_accuracy(model, valid_loader, device=DEVICE)
        print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
              f' | Validation Acc.: {valid_acc:.2f}%')
        
    elapsed = (time.time() - start_time)/60
    print(f'Time elapsed: {elapsed:.2f} min')
  
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')

Epoch: 001/050 | Batch 000/192 | Cost: 2.3047
Epoch: 001/050 | Batch 120/192 | Cost: 2.1502
Epoch: 001/050 Train Acc.: 23.81% | Validation Acc.: 22.70%
Time elapsed: 0.53 min
Epoch: 002/050 | Batch 000/192 | Cost: 2.0659
Epoch: 002/050 | Batch 120/192 | Cost: 2.0531
Epoch: 002/050 Train Acc.: 26.16% | Validation Acc.: 23.90%
Time elapsed: 1.08 min
Epoch: 003/050 | Batch 000/192 | Cost: 2.0478
Epoch: 003/050 | Batch 120/192 | Cost: 1.8817
Epoch: 003/050 Train Acc.: 34.45% | Validation Acc.: 33.80%
Time elapsed: 1.65 min
Epoch: 004/050 | Batch 000/192 | Cost: 1.9123
Epoch: 004/050 | Batch 120/192 | Cost: 1.8682
Epoch: 004/050 Train Acc.: 37.00% | Validation Acc.: 36.40%
Time elapsed: 2.24 min
Epoch: 005/050 | Batch 000/192 | Cost: 1.7955
Epoch: 005/050 | Batch 120/192 | Cost: 1.8183
Epoch: 005/050 Train Acc.: 39.05% | Validation Acc.: 38.20%
Time elapsed: 2.81 min
Epoch: 006/050 | Batch 000/192 | Cost: 1.6957
Epoch: 006/050 | Batch 120/192 | Cost: 1.7128
Epoch: 006/050 Train Acc.: 40.07%

In [None]:
print(f"Test accuracy: {compute_accuracy(model, test_loader, DEVICE):.2f}%")

Test accuracy: 70.93%
