In [None]:
%load_ext lab_black
%config IPCompleter.greedy=True

In [None]:
import multiprocessing
import time
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision

torch.backends.cudnn.benchmark = False

from torchsummary import summary
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt

from ray import tune
from ray.tune.schedulers import ASHAScheduler

# Dataloader Example

the following class reads the data for the third assignment and creates a torch dataset object for it. With this, you can easily use a dataloader to train your model. 

Due to size limit on moodle, the data for this assignment should be obtained from 

https://drive.google.com/file/d/1khzPamThzWScipEfMmOPevtfWV7Tx6UL/view?usp=sharing


Make sure that the file "hw3.npz" is located properly (in this example, it should be in the same folder as this notebook).

 



In [None]:
class STLData(Dataset):
    def __init__(self, mode="", transform=None):
        data = np.load("hw3.npz")
        if "train" in mode:
            # trainloader
            self.images = data["arr_0"]
            self.labels = data["arr_1"]
        elif "val" in mode:
            # valloader
            self.images = data["arr_2"]
            self.labels = data["arr_3"]
        elif "test" in mode:
            # testloader
            self.images = data["arr_4"]
            self.labels = data["arr_5"]
        else:
            raise ValueError("mode should be 'train', 'val' or 'test'")

        self.images = np.float32(self.images) / 1.0
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.images[idx, :]
        labels = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample, labels

Here is an example of how you can create a dataloader. 
First read the data. Note that the STL10 class can work with torchvision.transforms that are required in HW3

In [None]:
# train_set = STLData(trn_val_tst=0, transform=torchvision.transforms.ToTensor())
# val_set = STLData(trn_val_tst=1, transform=torchvision.transforms.ToTensor())
# test_set = STLData(trn_val_tst=2, transform=torchvision.transforms.ToTensor())

# modified STLData class
train_set = STLData("train", transform=torchvision.transforms.ToTensor())
val_set = STLData("val", transform=torchvision.transforms.ToTensor())
test_set = STLData("test", transform=torchvision.transforms.ToTensor())
batch_size = 100
n_workers = multiprocessing.cpu_count()
trainloader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=n_workers
)
image_batch, labels = next(iter(trainloader))
for tmpC1 in range(8):
    img = np.moveaxis(image_batch[tmpC1].numpy(), 0, 2)
    plt.subplot(2, 4, tmpC1 + 1)
    plt.imshow(img / 255.0)

Now for a batchsize of 100, you can have a dataloader as follows for your training data. 

# Load data

In [None]:
train_set = STLData("train", transform=torchvision.transforms.ToTensor())
val_set = STLData("val", transform=torchvision.transforms.ToTensor())
test_set = STLData("test", transform=torchvision.transforms.ToTensor())

# Define our main functions

In [None]:
def plot_log(log, model_config, save=False):
    fig, ax = plt.subplots()

    epochs = model_config.get("num_epochs")
    x_axis = np.linspace(1, epochs, epochs)

    ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
    ax.plot(x_axis, log.get("val_loss"), label="Validation Loss")
    ax.plot(x_axis, log.get("acc"), label="Validation Accuracy")

    ax.set_ylabel("Loss")
    ax.set_xlabel("Epochs")

    fig.set_figheight(10)
    fig.set_figwidth(16)

    ax.legend(loc="best", prop={"size": 12})
    if save:
        plt.savefig(f"./LR_{model_config['lr']}_{model_config['num_epochs']}.jpg")
    plt.show()
    return selected

In [None]:
def test_model(net, data_generator, loss_fn):
    """Function to easily test model on specified dataset"""

    device = "cuda" if torch.cuda.is_available() else "cpu"
    net.eval()
    with torch.no_grad():
        batch_loss, batch_steps = 0.0, 0
        correct_pred, total_pred = 0, 0

        for batch_id, (data, label) in enumerate(data_generator):
            label = label.long()
            data, label = data.to(device), label.to(device)

            output = net(data)
            batch_loss += loss_fn(output, label).item()
            batch_steps += 1

            # indices where probability is maximum
            _, pred_label = torch.max(output, 1)
            correct_pred += (pred_label == label).sum().item()
            total_pred += label.shape[0]

        # average loss/acc across ALL batches
        # i.e. ACROSS specified dataset
        avg_loss = batch_loss / batch_steps
        avg_acc = correct_pred / total_pred

    return avg_loss, avg_acc

In [None]:
def train_model(config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    n_workers = 0 * torch.cuda.device_count()

    logger = {
        "train_loss": np.zeros(config["num_epochs"]),
        "val_loss": np.zeros(config["num_epochs"]),
        "acc": np.zeros(config["num_epochs"]),
    }

    #### LOAD DATA ####
    b_size = config["batch_size"]

    train_data = STLData(mode="train", transform=torchvision.transforms.ToTensor())
    train_dataloader = DataLoader(
        train_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=True,
        pin_memory=True,
    )

    val_data = STLData(mode="val", transform=torchvision.transforms.ToTensor())
    val_dataloader = DataLoader(
        val_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=False,
        pin_memory=True,
    )

    #### INSTANTIATE MODEL ####
    net = config["model"].to(device)

    loss_function = nn.CrossEntropyLoss()

    optimizer = optim.Adam(net.parameters(), lr=config["lr"])

    scheduler = lr_scheduler.ReduceLROnPlateau(
        optimizer, "max", 0.5, patience=config["num_epochs"] // 10, verbose=True
    )
    scaler = torch.cuda.amp.GradScaler()

    #### BEGIN TRAINING ####
    start_time = time.time()
    best_val_acc = 0
    for j in range(config["num_epochs"]):
        ## START OF EPOCH ##
        train_loss, train_steps = 0.0, 0
        for batch_id, (data, label) in enumerate(train_dataloader):
            label = label.long()
            data, label = data.to(device), label.to(device)

            # forward
            with torch.cuda.amp.autocast():
                output = net(data)
                loss = loss_function(output, label)

            # backward
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()
            train_steps += 1

        ## END OF EPOCH ##

        # average training loss for 1 epoch
        train_loss /= train_steps

        # test model on validation dataset
        val_loss, val_acc = test_model(net, val_dataloader, loss_function)

        scheduler.step(val_acc)

        logger["train_loss"][j] = train_loss
        logger["val_loss"][j] = val_loss
        logger["acc"][j] = val_acc

        if config["log_training"] and (j + 1) % config["log_interval"] == 0:
            print(
                f"Epoch:{j+1}/{config['num_epochs']} \
                Train Loss: {logger['train_loss'][j]:.6f} \
                Val Loss: {logger['val_loss'][j]:.6f} \
                Val Acc: {logger['acc'][j]:.6f}"
            )

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            if config["save_model"]:
                # make sure folder is created to place saved checkpoints
                path = Path.cwd() / "models" / net._name
                if not path.exists():
                    path.mkdir(parents=True, exist_ok=False)

                # pad with appropriate number of zeros i.e. epoch 10 named as 010
                checkpoint_num = str(j + 1).zfill(len(str(config["num_epochs"])))
                model_path = f"./models/{net._name}/{net._name}_{checkpoint_num}.pt"
                torch.save(net.state_dict(), model_path)

    print(f"{config['num_epochs']} epochs took {time.time() - start_time:.2f}s")

    if config["log_training"]:
        return logger

# ShallowCNN

In [None]:
class ShallowCNN(nn.Module):
    def __init__(self):
        super(ShallowCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, (7, 7), stride=2, padding=0)
        self.conv2 = nn.Conv2d(96, 64, (5, 5), stride=2, padding=0)
        self.conv3 = nn.Conv2d(64, 128, (3, 3), stride=2, padding=0)

        self.fc1 = nn.Linear(1152, 128)
        self.fc2 = nn.Linear(128, 10)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=3)
        self.relu = nn.ReLU()

        self._name = self.__class__.__name__

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))

        x = self.maxpool(x)

        # flatten all dimensions except batch
        x = torch.flatten(x, 1)

        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x

In [None]:
device = "cuda"
net = ShallowCNN().to(device)
model_cfg = {
    "model": net,
    "lr": 1e-3,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 3,
    "save_model": False,
    "num_epochs": 100,
}

In [None]:
log = train_model(model_cfg)

In [None]:
loss_function = nn.CrossEntropyLoss()
test_data = STLData(mode="test", transform=torchvision.transforms.ToTensor())
test_dataloader = DataLoader(
    test_data,
    batch_size=model_cfg["batch_size"],
    num_workers=0,
    shuffle=False,
    pin_memory=True,
)
test_model(net, test_dataloader, loss_function)

In [None]:
plot_log(log, model_cfg)


# DeepCNN

In [None]:
class DeepCNN(nn.Module):
    def __init__(self):
        super(DeepCNN, self).__init__()

        self.blocks = self._build_blocks()
        # TODO
        # self.g_avgpool = nn.AvgPool2d()
        self.fc1 = nn.Linear(192, 10)

        self._name = self.__class__.__name__

    def _build_blocks(self):
        conv_blk_dims = [3, 32, 64, 128, 192]
        blocks_list = []
        for i in range(len(conv_blk_dims) - 1):
            conv_block = self._create_conv_block(conv_blk_dims[i], conv_blk_dims[i + 1])
            blocks_list.append(conv_block)
        return nn.Sequential(*blocks_list)

    def _create_conv_block(self, in_channels, out_channels):
        """Create conv_block based on in/out channels"""
        conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, (3, 3), stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, (1, 1), stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, (3, 3), stride=1, padding=1),
            nn.ReLU(),
        )
        return conv_block

    def forward(self, x):
        x = self.blocks(x)
        # TODO
        # x = self.g_avgpool(x)
        x = self.fc1(x)

        return x

In [None]:
net = DeepCNN()

In [None]:
print(net)