In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from ds_toolkit.general_utils.gpu_utils import addGPU

addGPU()

In [3]:
import sys
sys.path.append('../')

In [4]:
from utils.data import CIFAR10_dataset
from utils.misc import (
    show_random_images,
    get_device,
    set_seed,
    is_cuda,
    show_model_summary,
    show_img_grid,
    show_random_images_for_each_class,
    get_wrong_predictions
)
from utils.model import Net
from utils.training import train
from utils.testing import test

In [5]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim
from tqdm import tqdm
import pandas as pd
import numpy as np
import torchvision

import wandb

In [6]:
set_seed()
cuda = is_cuda()
device = get_device()

In [7]:
train_loader = CIFAR10_dataset(
    train=True, cuda=cuda
).get_loader()
test_loader = CIFAR10_dataset(
    train=False, cuda=cuda
).get_loader()

In [8]:
show_random_images(train_loader)

In [9]:
results = {}
models = {}

In [10]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [11]:
EPOCHS = 120
results = {}
models = {}

In [12]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    notes="""In this run dilated conv is fixed."""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.2, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [13]:
run.finish()

In [14]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [15]:
EPOCHS = 120
results = {}
models = {}

In [16]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="b1_b2_more_kernels",
    notes="""More kernels on Block 1 and Block 2. Brining total params to 179k+""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.2, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [17]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="b1_b2_more_kernels",
    notes="""More kernels on Block 1 and Block 2. Brining total params to 179k+"""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.2, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [18]:
run.finish()

In [19]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [20]:
EPOCHS = 120
results = {}
models = {}

In [21]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="lr_plateau_factor_0p5",
    notes="""LR Plateau factor to 0.5 No change in model."""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [22]:
run.finish()

In [23]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [24]:
EPOCHS = 120
results = {}
models = {}

In [25]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="dropout_to_0p1",
    notes="""Reduce dropout value to 0.1"""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [26]:
# run.finish()

In [27]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [28]:
EPOCHS = 120
results = {}
models = {}

In [29]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="inc_b3_ksize_dec_b1_ksize",
    notes="""Increase Block 3 kernel size, and reduce Block kernel size. Total params to 195280"""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()

In [30]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [31]:
net = Net().to(device)
show_model_summary(net, (3, 32, 32))

In [32]:
EPOCHS = 120
results = {}
models = {}

In [33]:
run = wandb.init(
    project='cifar_10_lte200k_npool', 
    entity='askmuhsin', 
    reinit=True,
    name="progressive_kernel_size_blks",
    notes="""Progressive kernel size on all blocks. Total params to 194,104"""
)
results = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'epoch': [],
}
optimizer = optim.SGD(
    net.parameters(), 
    lr=0.015, momentum=0.9
)
lr_scheduler = ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, 
    patience=3, min_lr=0.0001, verbose=True
)

for epoch in range(0, EPOCHS + 1):
    print(f"[EPOCH {epoch} / {EPOCHS}] -- ")
    train_loss_batch, train_acc_batch = train(
        net, device, 
        train_loader, 
        optimizer, 
        lr_scheduler
    )
    train_acc = np.mean(train_acc_batch)
    train_loss = np.mean(train_loss_batch)
    test_loss, test_acc = test(
        net, device, 
        test_loader,
        lr_scheduler
    )
    wandb.log(
        {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
            "lr": optimizer.param_groups[0]['lr'],
        }
    )
    
    for metric in ['train_loss', 'train_acc', 'test_loss', 'test_acc', 'epoch']:
        results[metric].append(eval(metric))
run.finish()