# Training a ConvNet on CIFAR10


In [None]:
import numpy as np
from datetime import datetime 
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
import torchvision
import matplotlib.pyplot as plt
import ipdb
import time
import collections
from functools import partial
from cifar_model import ConvNet
from utils import *
# check device
DEVICE = 'cuda'

In [None]:
# parameters
RANDOM_SEED = 42
LEARNING_RATE = 0.001
BATCH_SIZE = 128
num_workers = 10

IMG_SIZE = 32
N_CLASSES = 10

## Data

In [None]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(15, translate=(0.05,0.05)),
    #transforms.RandomRotation(15),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

# download and create datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
valid_dataset = datasets.CIFAR10(root='./data', train=False,transform=transform_test)

# define the data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

In [None]:
image, sample = next(iter(test_loader))
plt.imshow(image[0].permute(1, 2, 0))

In [None]:
activations = collections.defaultdict(list)
def save_activation(name, mod, inp, out):
    activations[name] = out # don't detach or move to CPU here

In [None]:
def training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs, device, print_every=1):
    best_loss = 1e10
    train_losses = []
    valid_losses = []
    logged_layers = 0
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            module.register_forward_hook(partial(save_activation, name))
            logged_layers += 1
    # writer = SummaryWriter()

    for epoch in range(0, epochs):
        # training
        model, optimizer, train_loss = train(train_loader, model, criterion, optimizer, device, activations, logged_layers)
        train_losses.append(train_loss)
        weights, biases = get_weights_biases(model)
#         activations = (out1, out2, out3, out4)
#         for i in range(len(weights)):
#             writer.add_histogram('Layer' + str(i+1) + '/weights', weights[i], epoch)
#             writer.add_histogram('Layer' + str(i+1) + '/biases', biases[i], epoch)
#             writer.add_histogram('Layer' + str(i+1) + '/activations', activations[i], epoch)
    
        with torch.no_grad():
            model, valid_loss = validate(valid_loader, model, criterion, device)
            valid_losses.append(valid_loss)
        if epoch % print_every == (print_every - 1):
            train_acc = get_accuracy(model, train_loader, device=device)
            valid_acc = get_accuracy(model, valid_loader, device=device)
            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Train accuracy: {100 * train_acc:.2f}\t'
                  f'Valid accuracy: {100 * valid_acc:.2f}')
#             writer.add_scalar('Accuracy/train', train_acc, epoch)
#             writer.add_scalar('Accuracy/test', valid_acc, epoch)
#         writer.add_scalar('Loss/train', train_loss, epoch)
#         writer.add_scalar('Loss/test', train_loss, epoch)
    #writer.close()
    plot_losses(train_losses, valid_losses)
    return model, optimizer, (train_losses, valid_losses)

In [None]:
def train(train_loader, model, criterion, optimizer, device, activations, logged_layers):
    '''
    Function for the training step of the training loop
    '''
    model.train()
    running_loss = 0

    for i, (X, y_true) in enumerate(train_loader):
        max_activities = np.zeros((logged_layers))
        optimizer.zero_grad()
        #logger.clear()
        X = X.to(device)
        y_true = y_true.to(device)
        # Forward pass
        y_hat, probs = model(X)
        names = []
        for i, (name, output) in enumerate(activations.items()):
            names.append(name)
            if output.max() > max_activities[i]: max_activities[i] = output.max().cpu()
        norm = 2
        beta = 0.0001
        loss = criterion(y_hat, y_true)
        for j, name in enumerate(names):
            loss += j*0.2*beta*torch.norm(activations[name], norm)
        # Backward pass
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * X.size(0)
        outputs = np.zeros((10))#logger.outputs.copy()
    epoch_loss = running_loss / len(train_loader.dataset)
    #print(names)
    print("Max activations: {}".format(max_activities))
    return model, optimizer, epoch_loss

In [None]:
torch.manual_seed(RANDOM_SEED)

model = ConvNet(N_CLASSES).to(DEVICE)
#model = nn.DataParallel(model) #torch.cuda.device_count()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=0.00001)
criterion = nn.CrossEntropyLoss()

In [None]:
model, optimizer, _ = training_loop(model, criterion, optimizer, train_loader, valid_loader, 80, DEVICE)

In [None]:
biggest_abs_weight(model)
weights, biases = get_weights_biases(model)

In [None]:
([weight.max() for weight in weights])

In [None]:
([weight.min() for weight in weights])

In [None]:
([bias.max() for bias in biases])

In [None]:
([bias.min() for bias in biases])

In [None]:
torch.save(model.state_dict(), "./cifar-convnet.pth") # don't forget to set model.eval() after loading

In [None]:
model

In [None]:
%debug

In [None]:
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
    