# Implementing a convnet in PT


## Setup

In [None]:
import numpy as np
from datetime import datetime 
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
import torchvision
import matplotlib.pyplot as plt
import ipdb
import time
from cifar_model import ConvNet
from utils import *
# check device
DEVICE = 'cuda'

In [None]:
# parameters
RANDOM_SEED = 42
LEARNING_RATE = 0.001
BATCH_SIZE = 128
num_workers = 10

IMG_SIZE = 32
N_CLASSES = 10

## Data

In [None]:
# define transforms
# transforms.ToTensor() automatically scales the images to [0,1] range
transform = transforms.Compose([transforms.ToTensor(), ]) # transforms.Normalize((0.1307,), (0.3081,))

# download and create datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
valid_dataset = datasets.CIFAR10(root='./data', train=False,transform=transform)

# define the data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers, pin_memory=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(dataset=valid_dataset, batch_size=1, shuffle=True, num_workers=num_workers, pin_memory=True)

In [None]:
def training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs, device, print_every=1):
    best_loss = 1e10
    train_losses = []
    valid_losses = []
    writer = SummaryWriter()#'runs/lenet_experiment_1')
    # Train model
    for epoch in range(0, epochs):
        # training
        model, optimizer, train_loss, out1, out2, out3, out4 = train(train_loader, model, criterion, optimizer, device)
        weights, biases = get_weights_biases(model)
        activations = (out1, out2, out3, out4)
        for i in range(len(weights)):
            writer.add_histogram('Layer' + str(i+1) + '/weights', weights[i], epoch)
            writer.add_histogram('Layer' + str(i+1) + '/biases', biases[i], epoch)
            writer.add_histogram('Layer' + str(i+1) + '/activations', activations[i], epoch)

        train_losses.append(train_loss)
        writer.add_scalar('Loss/train', train_loss, epoch)
        # validation
        with torch.no_grad():
            model, valid_loss = validate(valid_loader, model, criterion, device)
            valid_losses.append(valid_loss)
            writer.add_scalar('Loss/test', train_loss, epoch)
        if epoch % print_every == (print_every - 1):
            train_acc = get_accuracy(model, train_loader, device=device)
            valid_acc = get_accuracy(model, valid_loader, device=device)
            writer.add_scalar('Accuracy/train', train_acc, epoch)
            writer.add_scalar('Accuracy/test', valid_acc, epoch)
            print(f'{datetime.now().time().replace(microsecond=0)} --- '
                  f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Valid loss: {valid_loss:.4f}\t'
                  f'Train accuracy: {100 * train_acc:.2f}\t'
                  f'Valid accuracy: {100 * valid_acc:.2f}')
            if 100*valid_acc >= 99.1: break
    writer.close()
    plot_losses(train_losses, valid_losses)
    return model, optimizer, (train_losses, valid_losses)

In [None]:
def train(train_loader, model, criterion, optimizer, device):
    '''
    Function for the training step of the training loop
    '''
    model.train()
    running_loss = 0
    l1, l2, l3, l4, l5 = 0, 0, 0, 0, 0
    for i, (X, y_true) in enumerate(train_loader):
        optimizer.zero_grad()
        X = X.to(device)
        y_true = y_true.to(device)
        # Forward pass
        y_hat, probs, out1, out2, out3 = model(X)
        max1, max2, max3, max4 = out1.max(), out2.max(), out3.max(), y_hat.abs().max()
        if max1 > l1: l1 = max1
        if max2 > l2: l2 = max2
        if max3 > l3: l3 = max3
        if max4 > l4: l4 = max4
        norm = 2
        beta = 0.0001
        loss = criterion(y_hat, y_true) + 0.1*beta*torch.norm(out1, norm) + 0.3*beta*torch.norm(out2, norm)\
                + 5*beta*torch.norm(out3, norm) + 20*beta*torch.norm(y_hat, 2) 
        # Backward pass
        loss.backward()
        optimizer.step()
        #ipdb.set_trace()
        running_loss += loss.item() * X.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print("Max activations: {0:.2f}, {1:.2f}, {2:.2f}, {3:.2f}".format(l1, l2, l3, l4))
    return model, optimizer, epoch_loss, out1, out2, out3, y_hat

In [None]:
torch.manual_seed(RANDOM_SEED)

model = ConvNet(N_CLASSES).to(DEVICE)
#model = nn.DataParallel(model) #torch.cuda.device_count()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)#, weight_decay=0.00001)
criterion = nn.CrossEntropyLoss()

In [None]:
model, optimizer, _ = training_loop(model, criterion, optimizer, train_loader, valid_loader, 40, DEVICE)

In [None]:
biggest_abs_weight(model)
weights, biases = get_weights_biases(model)

In [None]:
([weight.max() for weight in weights])

In [None]:
([weight.min() for weight in weights])

In [None]:
([bias.max() for bias in biases])

In [None]:
([bias.min() for bias in biases])

In [None]:
torch.save(model.state_dict(), "./cifar-convnet.pth") # don't forget to set model.eval() after loading

In [None]:
%debug

In [None]:
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
    