In [None]:
%load_ext lab_black
%matplotlib inline
%config IPCompleter.greedy=True

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import time
from pathlib import Path

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
n_workers = 4 * torch.cuda.device_count()

## Q3 dataloader

the following class reads the data for Q3 and creates a torch dataset object for it. With this, you can easily 
use a dataloader to train your model. 

Make sure that the file "hw2_Q3_data.npz" is located properly (in this example, it should be in the same folder as this notebook.

 



In [None]:
class Q3Data(Dataset):
    def __init__(self, trn_tst=0):
        data = np.load("./data/hw2_Q4_data.npz")
        if trn_tst == 0:
            # trainloader
            self.images = data["arr_0"].T
            self.labels = data["arr_1"]
        else:
            # testloader
            self.images = data["arr_2"].T
            self.labels = data["arr_3"]

        self.images = np.float32(self.images) / 255.0

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.images[idx, :]
        labels = self.labels[idx]
        return sample, labels

Here is an example of how you can create a dataloader for the Q3 data

In [None]:
trainset = Q3Data(trn_tst=0)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True)


testset = Q3Data(trn_tst=1)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

In [None]:
def plot_log(log, model_config, select=False, save=False):
    fig, ax = plt.subplots()
    if select:
        # find min/max for criteria
        selected = {}
        for key in log:
            if "loss" in key:
                idx = np.argmin(log[key][9::10])
                label = "Min "
            elif "acc" in key:
                idx = np.argmax(log[key][9::10])
                label = "Max "
            # 10 - 2000 epochs would be saved as 9 - 1999
            # take every 10th epoch, determine a usable model
            actual_idx = (idx + 1) * model_config["save_interval"] - 1
            selected[key] = actual_idx

            print(key, actual_idx)
            print(actual_idx, log.get(key)[actual_idx])

            ax.plot(
                actual_idx,
                log.get(key)[actual_idx],
                label=label + key,
                markersize=16,
                marker="X",
            )

    epochs = model_config.get("num_epochs")
    x_axis = np.linspace(1, epochs, epochs)

    ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
    ax.plot(x_axis, log.get("acc"), label="Test Accuracy")
    ax.set_ylabel("Loss")
    ax.set_xlabel("Epochs")

    fig.set_figheight(10)
    fig.set_figwidth(16)

    ax.legend(loc="best", prop={"size": 12})
    if save:
        plt.savefig(f"./LR_{model_config['lr']}_{model_config['num_epochs']}.jpg")
    plt.show()
    if select:
        return selected

In [None]:
def test_model(net, data_generator, loss_fn):
    """Function to easily test model on specified dataset"""

    device = "cuda" if torch.cuda.is_available() else "cpu"

    with torch.no_grad():
        batch_loss, batch_steps = 0.0, 0
        correct_pred, total_pred = 0, 0

        for batch_id, (data, label) in enumerate(data_generator):
            data, label = data.to(device), label.to(device)

            output = net(data)
            batch_loss += loss_fn(output, label).item()
            batch_steps += 1

            # indices where probability is maximum
            _, val_pred = torch.max(output, 1)

            correct_pred += (val_pred == label).sum().item()
            total_pred += label.shape[0]

        # average loss/acc across ALL batches
        # i.e. ACROSS specified dataset
        avg_loss = batch_loss / batch_steps
        avg_acc = correct_pred / total_pred

    return avg_loss, avg_acc

In [None]:
def train_model(config):
    """The original training function has been modified in order to use Ray's Tune"""

    device = "cuda" if torch.cuda.is_available() else "cpu"
    n_workers = 4 * torch.cuda.device_count()

    logger = {
        "train_loss": np.zeros(config["num_epochs"]),
        "acc": np.zeros(config["num_epochs"]),
    }

    #### LOAD DATA ####
    b_size = config["batch_size"]

    train_data = Q3Data(0)
    train_dataloader = DataLoader(
        train_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=True,
        pin_memory=False,
    )

    test_data = Q3Data(1)
    test_dataloader = DataLoader(
        test_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=False,
        pin_memory=False,
    )

    #### INSTANTIATE MODEL ####
    net = config["model"].to(device)

    loss_function = nn.CrossEntropyLoss()

    optimizer = optim.Adam(
        net.parameters(),
        lr=config["lr"],
        weight_decay=config["weight_decay"],
    )

    if config["lr_variable"]:
        # what approximate epoch does convergence occur? 80 in this case
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[80], gamma=0.1)

    #### BEGIN TRAINING ####
    start_time = time.time()
    for j in range(config["num_epochs"]):
        ## START OF BATCH ##
        train_loss, train_steps = 0.0, 0
        for batch_id, (data, label) in enumerate(train_dataloader):
            data, label = data.to(device), label.to(device)

            output = net(data)

            loss = loss_function(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_steps += 1

        ## END OF BATCH ##

        # average training loss for 1 epoch
        train_loss /= train_steps
        _, test_acc = test_model(net, test_dataloader, loss_function)

        logger["train_loss"][j] = train_loss
        logger["acc"][j] = test_acc

        if config["log_training"] and (j + 1) % config["log_interval"] == 0:
            print(
                f"Epoch:{j+1}/{config['num_epochs']} \
                Train Loss: {logger['train_loss'][j]:.6f} Test Acc: {logger['acc'][j]:.6f}"
            )

        # make sure folder is created to place saved checkpoints
        path = Path.cwd() / "models" / net._name
        if not path.exists():
            path.mkdir(parents=True, exist_ok=False)

        if config["save_model"] and (j + 1) % config["save_interval"] == 0:
            lr_str = "VarLR" if config["lr_variable"] else "FixedLR"
            # pad with appropriate number of zeros i.e. epoch 10 named as 010
            checkpoint_num = str(j + 1).zfill(len(str(config["num_epochs"])))

            model_path = (
                f"./models/{net._name}/{net._name}_{lr_str}_{checkpoint_num}.pt"
            )
            torch.save(net.state_dict(), model_path)

    print(f"{config['num_epochs']} epochs took {time.time() - start_time:.2f}s")

    if config["log_training"]:
        return logger

# Exploring our data

In [None]:
tmp_loader = DataLoader(Q3Data(0), batch_size=8, num_workers=4, shuffle=True)
image_batch, labels = next(iter(tmp_loader))
fig, ax_arr = plt.subplots(2, 4)
for i in range(8):
    img = image_batch[i].numpy()
    ax_arr[i // 4, i % 4].imshow(img.reshape([28, 28]), cmap="gray")
    # ax_arr[i // 4, i % 4].axis("off")
    ax_arr[i // 4, i % 4].axes.get_yaxis().set_visible(False)
    ax_arr[i // 4, i % 4].set_xlabel(labels[i].item())
fig.set_figheight(10)
fig.set_figwidth(20)
# plt.subplots_adjust(wspace=0.01, hspace=0.01)
plt.show()

# CustomMLP

In [None]:
class CustomMLP(nn.Module):
    def __init__(self):
        super(CustomMLP, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 128)
        self.fc5 = nn.Linear(128, 10)

        self._name = self.__class__.__name__

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

In [None]:
net = CustomMLP().to(device)
model_config = {
    "model": net,
    "lr": 2e-4,
    "lr_variable": False,
    "weight_decay": 0,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": True,
    "save_interval": 10,
    "num_epochs": 300,
}

In [None]:
log = train_model(model_config)

In [None]:
plot_log(log, model_config, select=True)

In [None]:
model = CustomMLP().to(device)
model.eval()

# load models 040 and 290
model_path = f"models/{model._name}/{model._name}_FixedLR_040.pt"
model.load_state_dict(torch.load(model_path))

test_data = Q3Data(1)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

# unused since not calculating loss
loss_func = nn.CrossEntropyLoss()
_, acc = test_model(model, test_dataloader, loss_func)
print(f"Accuracy on test dataset: {acc}")