In [11]:
import sys
sys.path.append('..')

In [12]:
import os
import torch
import torch.nn as nn
import numpy as np
from pathlib import Path
from tqdm import tqdm
from itertools import product
from dataclasses import dataclass
from matplotlib import pyplot as plt
from timeit import default_timer as timer
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models
from typing import List, Dict
from CommonFunctions import enhance_plot
from Models import SimpleCNN, ModularVGG
from Utils import *
%matplotlib inline

In [13]:
PARAMS = LearningParameters()
KERNEL_PARAMS = dict(kernel_size=3, stride=1, padding=1)

def VGG11():
    model_config = dict(
        CONV=dict(
            conv_layers=[[64],
                         [128],
                         [256, 256],
                         [512, 512],
                         [512, 512]],
            kernel_params=KERNEL_PARAMS,
            batch_norm=PARAMS.batch_norm
        ),
        FC=dict(
            layer_sizes=[4096, 4096, 1000],
            dropout=PARAMS.dropout
        )
    )
    return ModularVGG(input_layer_size=3,
                      num_of_classes=10,
                      image_size=32,
                      config=model_config)
PARAMS

LearningParameters(batch_size=128, cpu_count=4, learning_rate=0.05, momentum=0.9, weight_decay=0.001, epochs=150, device=device(type='cpu'), dropout=0.3, batch_norm=True)

In [14]:
def train_step_with_reg(model, dataloader, criterion, accuracy_function, optimizer, device,
                        L1_lambda=0, L2_lambda=0):
    train_loss, accuracy = 0, 0
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = criterion(y_pred, y)
        L1_norm = sum(p.abs().sum() for p in model.parameters())
        L2_norm = sum((p**2).sum() for p in model.parameters())
        train_loss += loss.item()
        accuracy += accuracy_function(y_pred.argmax(dim=1), y)
        optimizer.zero_grad()
        loss_regularized = loss + L1_norm*L1_lambda + L2_norm*L2_lambda
        loss_regularized.backward()
        optimizer.step()
    train_loss /= (data_len := len(dataloader))
    accuracy /= data_len
    return train_loss, accuracy


def plot_loss(title, epochs, loss_dict, axes_size, file):
    with plt.style.context('cyberpunk'):
        figure, axes = plt.subplots(*axes_size, layout='constrained', figsize=(6, 8))
        x_axis = range(1, epochs + 1)
        for ax, ((L1, L2), (loss_train, loss_val)) in zip(figure.axes, loss_dict.items()):
            ax.plot(x_axis, loss_train, label='Train')
            ax.plot(x_axis, loss_val, label='Test')
            ax.set(title=f'L1 = {L1:.2f}\nL2 = {L2:.2f}')
            enhance_plot(figure, axes, glow=True, alpha_gradient=0.4)
        figure.suptitle(title)
        figure.savefig(f'./images/{file}.png')

# VGG with L1, L2 and no regularization

In [17]:
data_path = Path('./data')
loss_dict = {}
_, _, train_dataloader, test_dataloader = load_CIFAR10(
    data_path, normalized_transform(), subset=(8000, 2000))

PARAMS.epochs = 10
L1_list = [0, 0.1, 0]
L2_list = [0, 0, 0.1]
model_names = ['vanilla', 'L1 regularization', 'L2 regularization']

for model_name, L1, L2 in zip(L1_list, L2_list, model_names):
    print(model_name)
    model = VGG11()
    model.to(PARAMS.device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), 
                                lr=PARAMS.learning_rate,
                                momentum=PARAMS.momentum)
    train_time_start = timer()
    train_loss = []
    val_loss = []
    for epoch in tqdm(range(PARAMS.epochs)):
        train_metrics = train_step_with_reg(
            model, train_dataloader, loss_fn, accuracy, optimizer, PARAMS.device,
            L1_lambda=L1, L2_lambda=L2
        )
        test_metrics = test_step(
            model, test_dataloader, loss_fn, accuracy, PARAMS.device)
        train_loss.append(train_metrics[0])
        val_loss.append(test_metrics[0])
    loss_dict[model_name] = (train_loss, val_loss)
    train_time = timer() - train_time_start
    print(f'Time: {train_time:.3f} seconds')

Files already downloaded and verified
Files already downloaded and verified
0


  0%|          | 0/10 [00:00<?, ?it/s]


TypeError: only integer tensors of a single element can be converted to an index

In [ ]:
title = 'Comparison between vanilla, L1 and L2 VGG11'
with plt.style.context('cyberpunk'):
    figure, axes = plt.subplot_mosaic([['vanilla', 'vanilla'], 
                                       ['L1', 'L2']],
                                      layout='constrained',
                                      figsize=(6, 8))
    x_axis = range(1, PARAMS.epochs + 1)
    for ax, (model_type, (train, val)) in zip(figure.axes, loss_dict.items()):
        ax.plot(x_axis, train, label='Train')
        ax.plot(x_axis, val, label='Validation')
        ax.set(xlabel='Epochs', ylabel='Loss', title=model_type)
    figure.suptitle(title)