In [None]:
%load_ext lab_black
%config IPCompleter.greedy=True

In [None]:
import multiprocessing
import time
from pathlib import Path
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision

torch.backends.cudnn.benchmark = False

from torchsummary import summary
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm

from ray import tune
from ray.tune.schedulers import ASHAScheduler

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Dataloader Example

the following class reads the data for the third assignment and creates a torch dataset object for it. With this, you can easily use a dataloader to train your model. 

Due to size limit on moodle, the data for this assignment should be obtained from 

https://drive.google.com/file/d/1khzPamThzWScipEfMmOPevtfWV7Tx6UL/view?usp=sharing


Make sure that the file "hw3.npz" is located properly (in this example, it should be in the same folder as this notebook).

 



In [None]:
class STLData(Dataset):
    def __init__(self, mode="", transform=None):
        data = np.load("hw3.npz")
        if "train" in mode:
            # trainloader
            self.images = data["arr_0"]
            self.labels = data["arr_1"]
        elif "val" in mode:
            # valloader
            self.images = data["arr_2"]
            self.labels = data["arr_3"]
        elif "test" in mode:
            # testloader
            self.images = data["arr_4"]
            self.labels = data["arr_5"]
        else:
            raise ValueError("mode should be 'train', 'val' or 'test'")

        self.images = np.float32(self.images) / 1.0
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.images[idx, :]
        labels = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample, labels

Here is an example of how you can create a dataloader. 
First read the data. Note that the STL10 class can work with torchvision.transforms that are required in HW3

In [None]:
# train_set = STLData(trn_val_tst=0, transform=torchvision.transforms.ToTensor())
# val_set = STLData(trn_val_tst=1, transform=torchvision.transforms.ToTensor())
# test_set = STLData(trn_val_tst=2, transform=torchvision.transforms.ToTensor())

# modified STLData class
train_set = STLData("train", transform=torchvision.transforms.ToTensor())
val_set = STLData("val", transform=torchvision.transforms.ToTensor())
test_set = STLData("test", transform=torchvision.transforms.ToTensor())
batch_size = 100
n_workers = 0 * multiprocessing.cpu_count()
trainloader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=n_workers
)
image_batch, labels = next(iter(trainloader))

fig, ax_arr = plt.subplots(2, 4)
for i in range(8):
    img = (image_batch[i] / 255.0).permute(1, 2, 0)
    row = i // 4
    col = i % 4
    ax_arr[row, col].imshow(img)
    # ax_arr[i // 4, i % 4].axis("off")
    ax_arr[row, col].axes.get_yaxis().set_visible(False)
    ax_arr[row, col].set_xlabel(labels[i].item())
    ax_arr[row, col].set_xticklabels([])
fig.set_figheight(5)
fig.set_figwidth(10)
plt.subplots_adjust(wspace=0.3, hspace=0.01)
plt.show()

Now for a batchsize of 100, you can have a dataloader as follows for your training data. 

# Load data

In [None]:
train_set = STLData("train", transform=torchvision.transforms.ToTensor())
val_set = STLData("val", transform=torchvision.transforms.ToTensor())
test_set = STLData("test", transform=torchvision.transforms.ToTensor())

# Define our main functions

In [None]:
def plot_log(log, model_config, save=False, select=True):
    fig, ax1 = plt.subplots()
    fig.set_figheight(7.5)
    fig.set_figwidth(12)
    # use ax1 for loss, ax2 for accuracy
    ax2 = ax1.twinx()

    epochs = model_config.get("num_epochs")
    x_axis = np.linspace(1, epochs, epochs)
    color = iter(cm.rainbow(np.linspace(0, 1, len(log))))
    selected = dict.fromkeys(log)

    count = 0
    for key, values in log.items():
        c = next(color)
        # plot data
        if "loss" in key:
            ax1.plot(x_axis, values, color=c, label=key)
        elif "acc" in key:
            ax2.plot(x_axis, values, color=c, label=key)
        if select:
            if "loss" in key:
                # search for min
                x = np.argmin(values) + 1
                y = np.amin(values)
                ax1.plot(
                    x,
                    y,
                    color=c,
                    label=f"Min. {key}",
                    markersize=16,
                    marker="x",
                )
            elif "acc" in key:
                # search for max
                x = np.argmax(log[key]) + 1
                y = np.amax(log[key])
                ax2.plot(
                    x,
                    y,
                    color=c,
                    label=f"Max. {key}",
                    markersize=16,
                    marker="x",
                )
            # save values in dict
            selected[key] = [x, y]
        count += 1

    ax1.set_ylabel("Loss")
    ax1.set_xlabel("Epochs")
    ax2.set_ylabel("Accuracy (%)")

    # 0 = 'best', 7 = 'center right'
    fig.legend(loc=7, bbox_to_anchor=(1.1, 0.5))

    if save:
        plt.savefig(f"./LR_{model_config['lr']}_{model_config['num_epochs']}.jpg")

    plt.show()

    if select:
        return selected

In [None]:
def test_model(net, data_generator, loss_fn):
    """Function to easily test model on specified dataset"""

    device = "cuda" if torch.cuda.is_available() else "cpu"
    net.eval()
    with torch.no_grad():
        batch_loss, batch_steps = 0.0, 0
        correct_pred, total_pred = 0, 0

        for batch_id, (data, label) in enumerate(data_generator):
            data, label = data.to(device), label.long().to(device)

            output = net(data)
            batch_loss += loss_fn(output, label).item()
            batch_steps += 1

            # indices where probability is maximum
            _, pred_label = torch.max(output, 1)
            correct_pred += (pred_label == label).sum().item()
            total_pred += label.shape[0]

        # average loss/acc across ALL batches
        # i.e. ACROSS specified dataset
        avg_loss = batch_loss / batch_steps
        avg_acc = correct_pred / total_pred

    return avg_loss, avg_acc

In [None]:
def train_model(config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    n_workers = 0 * torch.cuda.device_count()

    logger = {
        "train_loss": np.zeros(config["num_epochs"]),
        "val_loss": np.zeros(config["num_epochs"]),
        "train_acc": np.zeros(config["num_epochs"]),
        "val_acc": np.zeros(config["num_epochs"]),
        "test_acc": np.zeros(config["num_epochs"]),
    }

    #### LOAD DATA ####
    b_size = config["batch_size"]

    train_data = STLData(mode="train", transform=torchvision.transforms.ToTensor())
    train_dataloader = DataLoader(
        train_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=True,
        pin_memory=True,
    )

    val_data = STLData(mode="val", transform=torchvision.transforms.ToTensor())
    val_dataloader = DataLoader(
        val_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=False,
        pin_memory=True,
    )

    test_data = STLData(mode="test", transform=torchvision.transforms.ToTensor())
    test_dataloader = DataLoader(
        test_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=False,
        pin_memory=True,
    )

    #### INSTANTIATE MODEL ####
    net = config["model"].to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=config["lr"])
    scaler = torch.cuda.amp.GradScaler()

    if config["lr_scheduler"]:
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, "max", 0.5, patience=config["num_epochs"] // 10, verbose=True
        )

    #### BEGIN TRAINING ####
    start_time = time.time()
    best_val_acc = 0
    for j in range(config["num_epochs"]):
        ## START OF EPOCH ##
        train_loss, train_steps = 0.0, 0
        net.train()
        for batch_id, (data, label) in enumerate(train_dataloader):
            data, label = data.to(device), label.long().to(device)

            # forward
            with torch.cuda.amp.autocast():
                output = net(data)
                loss = loss_function(output, label)

            # backward
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()
            train_steps += 1

        ## END OF EPOCH ##

        # average training loss for 1 epoch
        train_loss /= train_steps

        # test model on validation dataset
        _, train_acc = test_model(net, train_dataloader, loss_function)
        val_loss, val_acc = test_model(net, val_dataloader, loss_function)
        _, test_acc = test_model(net, test_dataloader, loss_function)

        if config["lr_scheduler"]:
            scheduler.step(val_acc)

        logger["train_loss"][j] = train_loss
        logger["val_loss"][j] = val_loss
        logger["train_acc"][j] = train_acc
        logger["val_acc"][j] = val_acc
        logger["test_acc"][j] = test_acc

        if config["log_training"] and (j + 1) % config["log_interval"] == 0:
            print(
                f"Epoch:{j+1}/{config['num_epochs']}",
                f"Train Loss: {logger['train_loss'][j]:.4f}",
                f"Train Acc: {logger['train_acc'][j]:.4f}",
                f"Val Loss: {logger['val_loss'][j]:.4f}",
                f"Val Acc: {logger['val_acc'][j]:.4f}",
                f"Test Acc: {logger['test_acc'][j]:.4f}",
            )

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            if config["save_model"]:
                # make sure folder is created to place saved checkpoints
                path = Path.cwd() / "models" / net._name
                if not path.exists():
                    path.mkdir(parents=True, exist_ok=False)

                # pad with appropriate number of zeros i.e. epoch 10 named as 010
                checkpoint_num = str(j + 1).zfill(len(str(config["num_epochs"])))
                model_path = f"./models/{net._name}/{net._name}_{checkpoint_num}.pt"
                torch.save(net.state_dict(), model_path)

    print(f"{config['num_epochs']} epochs took {time.time() - start_time:.2f}s")

    if config["log_training"]:
        return logger

# ShallowCNN

In [None]:
class ShallowCNN(nn.Module):
    def __init__(self):
        super(ShallowCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, (7, 7), stride=2, padding=0)
        self.conv2 = nn.Conv2d(96, 64, (5, 5), stride=2, padding=0)
        self.conv3 = nn.Conv2d(64, 128, (3, 3), stride=2, padding=0)

        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=3)
        self.relu = nn.ReLU()

        self._name = self.__class__.__name__

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))

        x = self.maxpool(x)

        # flatten all dimensions except batch
        x = torch.flatten(x, 1)

        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x

In [None]:
shallow_net = ShallowCNN().to(device)
model_cfg = {
    "model": shallow_net,
    "lr": 1e-4,
    "lr_scheduler": False,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": True,
    "num_epochs": 60,
}

In [None]:
log_shallow = train_model(model_cfg)

In [None]:
plot_log(log_shallow, model_cfg)

In [None]:
shallow_net = ShallowCNN().to(device)
shallow_net.eval()
# select 210, 240, 1490 models
model_path = f"models/{shallow_net._name}/{shallow_net._name}_20.pt"
shallow_net.load_state_dict(torch.load(model_path))

In [None]:
val_data = STLData(mode="val", transform=torchvision.transforms.ToTensor())
val_dataloader = DataLoader(
    val_data,
    batch_size=model_cfg["batch_size"],
    num_workers=0,
    shuffle=False,
    pin_memory=False,
)

In [None]:
# 3D list, where index = class label
# each row of top level list, format: [prediction prob, True or False, image data]
correct_bin = [[] for _ in range(10)]
wrong_bin = [[] for _ in range(10)]
with torch.no_grad():
    shallow_net.eval()
    # loop through val dataset, collect all scores per class
    for batch_id, (data, label) in enumerate(val_dataloader):
        data, label = data.to(device), label.to(device)
        output = shallow_net(data)
        # apply softmax
        probs = F.softmax(output, dim=1)

        # output True/False for each batch
        all_idx = torch.argmax(output, 1) == label
        for idx, val in enumerate(all_idx):
            label_ = label[idx]
            # store in correct_bin
            if val == True:
                correct_bin[label_].append([probs[idx].max(), all_idx[idx], data[idx]])
            # store in wrong_bin
            elif val == False:
                wrong_bin[label_].append([probs[idx].max(), all_idx[idx], data[idx]])

# Visualization methods

In [None]:
def process_bin(data_bin):
    num_img = 5
    all_imgs = torch.empty(len(data_bin) * num_img, 3, 96, 96)
    for i, list_2d in enumerate(data_bin):
        # sort based on first element i.e. probabilities
        top_5 = sorted(list_2d, key=lambda x: x[0], reverse=True)[:num_img]
        top_5_tensor = torch.empty(num_img, *top_5[0][2].shape)
        for j in range(num_img):
            top_5_tensor[j] = top_5[j][2]
        offset = i * num_img
        all_imgs[0 + offset : 5 + offset] = top_5_tensor

    all_imgs = all_imgs / 255.0
    out = torchvision.utils.make_grid(all_imgs, nrow=num_img)
    fig, ax = plt.subplots(figsize=(10, 20))
    ax.imshow(out.permute(1, 2, 0), interpolation="nearest", aspect="auto")
    ax.axis("off")

In [None]:
process_bin(correct_bin)

In [None]:
process_bin(wrong_bin)

In [None]:
# # loop through different classes
# fig, ax = plt.subplots(10, 5)
# for i, list_2d in enumerate(correct_bin):
#     # sort based on first element i.e. probabilities
#     top_5 = sorted(list_2d, key=lambda x: x[0], reverse=True)[:5]
#     for j in range(5):
#         img = (top_5[j][2] / 255.0).permute(1, 2, 0).cpu()
#         ax[i, j].imshow(img)
#         ax[i, j].axes.get_yaxis().set_visible(False)
#         ax[i, j].axes.get_xaxis().set_visible(False)
# fig.set_figheight(10)
# fig.set_figwidth(10)
# plt.subplots_adjust(wspace=0.00, hspace=0.00)


# DeepCNN

In [None]:
class DeepCNN(nn.Module):
    def __init__(self):
        super(DeepCNN, self).__init__()

        self.blocks = self._build_blocks()
        # since the output of our conv blocks is (6,6)
        self.gap = nn.AvgPool2d(kernel_size=6, stride=1)
        self.fc1 = nn.Linear(192, 10)

        self._name = self.__class__.__name__

    def _build_blocks(self):
        conv_blk_dims = [3, 32, 64, 128, 192]
        blocks_list = []
        for i in range(len(conv_blk_dims) - 1):
            conv_block = self._create_conv_block(conv_blk_dims[i], conv_blk_dims[i + 1])
            named_block = (f"Conv-Blk-{i+1}", conv_block)
            # blocks_list.append(conv_block)
            blocks_list.append(named_block)

        # return nn.Sequential(*blocks_list)
        return nn.Sequential(OrderedDict(blocks_list))

    def _create_conv_block(self, in_channels, out_channels):
        """Create conv_block based on in/out channels"""
        conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, (3, 3), stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, (1, 1), stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, (3, 3), stride=1, padding=1),
            nn.ReLU(),
        )
        return conv_block

    def forward(self, x):
        x = self.blocks(x)
        x = self.gap(x).squeeze()
        x = self.fc1(x)
        return x

In [None]:
net = DeepCNN()

In [None]:
deep_net = DeepCNN().to(device)
model_cfg = {
    "model": deep_net,
    "lr": 2.5e-4,
    "lr_scheduler": True,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "num_epochs": 100,
}

In [None]:
log = train_model(model_cfg)

In [None]:
plot_log(log, model_cfg)

In [None]:
loss_function = nn.CrossEntropyLoss()
test_dataloader = DataLoader(
    STLData(mode="test", transform=torchvision.transforms.ToTensor()),
    batch_size=model_cfg["batch_size"],
    num_workers=0,
    shuffle=False,
    pin_memory=True,
)
test_model(deep_net, test_dataloader, loss_function)