In [None]:
%load_ext lab_black
%matplotlib inline
%config IPCompleter.greedy=True

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision

from torchsummary import summary

from ray import tune
from ray.tune.schedulers import ASHAScheduler

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import time
from pathlib import Path

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
n_workers = 4 * torch.cuda.device_count()

# Define Q2Data Class for custom Dataset

the following class reads the data for Q2 and creates a torch dataset object for it. With this, you can easily 
use a dataloader to train your model. 

Make sure that the file "hw2_Q2_data.npz" is located properly (in this example, it should be in the same folder as this notebook.

 



In [None]:
class Q2Data(Dataset):
    def __init__(self, mode="", ray_tune=False):
        # Ray Tune requires an absolute path
        # go back 2 folders since ray goes 2 deeper
        actual_cwd = str(Path.cwd().parents[1])
        if not ray_tune:
            actual_cwd = "."

        data = np.load(f"{actual_cwd}/data/hw2_Q2_and_Q3_data.npz")
        if "train" in mode:
            # trainloader
            self.images = data["arr_0"].T
            self.labels = data["arr_1"]
        elif "val" in mode:
            # valloader
            self.images = data["arr_2"].T
            self.labels = data["arr_3"]
        elif "test" in mode:
            # testloader
            self.images = data["arr_4"].T
            self.labels = data["arr_5"]

        self.images = np.float32(self.images) / 255.0

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.images[idx, :]
        labels = self.labels[idx]
        return sample, labels

## Example on how to load data

In [None]:
b_size = 100
n_workers = 4 * torch.cuda.device_count()

train_data = Q2Data("train")
train_loader = DataLoader(
    train_data, batch_size=b_size, num_workers=n_workers, shuffle=True
)
val_data = Q2Data("val")
# default shuffle=False
val_loader = DataLoader(
    val_data, batch_size=b_size, num_workers=n_workers, shuffle=True
)
test_data = Q2Data("test")
# default shuffle=False
test_loader = DataLoader(
    test_data, batch_size=b_size, num_workers=n_workers, shuffle=True
)

# Exploring our data

In [None]:
tmp_loader = DataLoader(Q2Data("train"), batch_size=8, num_workers=4, shuffle=True)
image_batch, labels = next(iter(tmp_loader))
fig, ax_arr = plt.subplots(2, 4)
for i in range(8):
    img = image_batch[i].numpy()
    ax_arr[i // 4, i % 4].imshow(img.reshape([28, 28]), cmap="gray")
    ax_arr[i // 4, i % 4].axis("off")
fig.set_figheight(10)
fig.set_figwidth(20)
plt.subplots_adjust(wspace=0.01, hspace=0.01)
plt.show()

# ShallowMLP

## Defining the network

In [None]:
class ShallowMLP(nn.Module):
    def __init__(self):
        super(ShallowMLP, self).__init__()
        self.fc1 = nn.Linear(784, 32)
        self.fc2 = nn.Linear(32, 10)

        self._name = self.__class__.__name__

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

## Defining the training function

In [None]:
def test_model(net, dataloader_obj, loss_fn):
    """Function to easily test model on specified dataset"""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    with torch.no_grad():
        batch_loss = 0.0
        batch_steps = 0
        correct_pred = 0
        total_pred = 0
        for batch_id, (data, label) in enumerate(dataloader_obj):
            data = data.to(device)
            label = label.to(device)

            output = net(data)
            batch_loss += loss_fn(output, label).item()
            batch_steps += 1

            # indices where probability is maximum
            _, val_pred = torch.max(output, 1)

            correct_pred += (val_pred == label).sum().item()
            total_pred += label.shape[0]

        acc = correct_pred / total_pred
        avg_loss = batch_loss / batch_steps  # average loss across batch

    return avg_loss, acc

In [None]:
def train_model(config):
    """The original training function has been modified in order to use Ray's Tune"""

    logger = {
        "train_loss": np.zeros(config["num_epochs"]),
        "val_loss": np.zeros(config["num_epochs"]),
        "acc": np.zeros(config["num_epochs"]),
    }

    #### LOAD DATA ####
    ray_tune = config["ray_tune_enabled"]
    b_size = config["batch_size"]
    n_workers = 4 * torch.cuda.device_count()

    train_data = Q2Data("train", ray_tune)
    train_dataloader = DataLoader(
        train_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=True,
        pin_memory=False,
    )

    val_data = Q2Data("val", ray_tune)
    val_dataloader = DataLoader(
        val_data,
        batch_size=b_size,
        num_workers=n_workers,
        shuffle=True,
        pin_memory=False,
    )

    #### INSTANTIATE MODEL ####
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # net = ShallowMLP().to(device)
    net = DeepMLP().to(device)
    # net = DeepWideMLP().to(device)

    loss_function = nn.CrossEntropyLoss()

    optimizer = optim.SGD(
        net.parameters(), lr=config["lr"], momentum=config["momentum"]
    )
    if config["lr_variable"]:
        # what approximate epoch does convergence occur?
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60], gamma=0.1)

    #### BEGIN TRAINING ####
    start_time = time.time()
    for j in range(config["num_epochs"]):
        ## START OF BATCH ##
        train_loss = 0.0
        train_steps = 0
        for batch_id, (data, label) in enumerate(train_dataloader):
            data = data.to(device)
            label = label.to(device)

            output = net(data)

            loss = loss_function(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_steps += 1

        ## END OF BATCH ##
        train_loss /= train_steps

        # test model on validation dataset
        val_loss, val_acc = test_model(net, val_dataloader, loss_function)

        # send current training result back to Tune
        if config["ray_tune_enabled"]:
            tune.report(loss=(train_loss), accuracy=(val_acc))

        logger["train_loss"][j] = train_loss
        logger["val_loss"][j] = val_loss
        logger["acc"][j] = val_acc

        if config["log_training"] and (j + 1) % config["log_interval"] == 0:
            print(
                f"Epoch:{j+1}/{config['num_epochs']} \
                Train Loss: {logger['train_loss'][j]:.6f} \
                Val Loss: {logger['val_loss'][j]:.6f} \
                Acc: {logger['acc'][j]:.6f}"
            )

        # make sure folder is created to place saved checkpoints
        path = Path.cwd() / "models" / net._name
        if not path.exists():
            path.mkdir(parents=True, exist_ok=False)

        if config["save_model"] and (j + 1) % config["save_interval"] == 0:
            checkpoint_num = str(j + 1).zfill(len(str(config["num_epochs"])))
            if config["lr_variable"]:
                lr_str = "VarLR"
            else:
                lr_str = "FixedLR"
            model_path = (
                f"./models/{net._name}/{net._name}_{lr_str}_{checkpoint_num}.pt"
            )
            torch.save(net.state_dict(), model_path)

        # this is used only to vary learning rate during training
        if config["lr_variable"]:
            scheduler.step()

    print(f"{config['num_epochs']} epochs took {time.time() - start_time:.2f}s")

    if config["log_training"]:
        return logger

## FixedLR

In [None]:
!mkdir models/ShallowMLP  # create folder for model storage

## Searching for ideal parameters

In [None]:
assert False  # remove to make cell work
search_space = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "lr_variable": False,
    "batch_size": tune.choice([4, 8, 16, 32, 64]),
    "log_training": False,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 1000,
    "ray_tune_enabled": True,
}
# enable early stopping
asha_scheduler = ASHAScheduler(max_t=1000, grace_period=50)
# number of samples to run
n_samples = 20
# run training with Tune
analysis = tune.run(
    train_model,
    num_samples=n_samples,
    config=search_space,
    resources_per_trial={"gpu": 1},
    scheduler=asha_scheduler,
    metric="loss",
    mode="min",
    local_dir="./",
)

### Final model config for our saved model

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}
log = train_model(model_config)

### Plot train/val loss, and val accuracy

In [None]:
fig, ax = plt.subplots()
epochs = model_config.get("num_epochs")

x_axis = np.linspace(1, epochs, epochs)
ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
ax.plot(x_axis, log.get("val_loss"), label="Validation Loss")
ax.plot(x_axis, log.get("acc"), label="Validation Accuracy")
ax.set_ylabel("Loss")
ax.set_xlabel("Epochs")
fig.set_figheight(10)
fig.set_figwidth(16)
ax.legend(loc="best", prop={"size": 20})
plt.show()

### Evaluate model on the test dataset

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
n_workers = 4 * torch.cuda.device_count()

model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}

model = ShallowMLP().to(device)
model.eval()
model_path = f"models/ShallowMLP/ShallowMLP_FixedLR_060.pt"
model.load_state_dict(torch.load(model_path))

test_data = Q2Data("test", ray_tune=False)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

# arbitrary loss function
loss_func = nn.CrossEntropyLoss()
_, acc = test_model(model, test_dataloader, loss_func)
print(f"Accuracy on test dataset: {acc}")

## Multistep Learning Rate

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": True,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": True,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}
log = train_model(model_config)

### Plot train/val loss, and val accuracy

In [None]:
fig, ax = plt.subplots()
epochs = model_config.get("num_epochs")

x_axis = np.linspace(1, epochs, epochs)
ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
ax.plot(x_axis, log.get("val_loss"), label="Validation Loss")
ax.plot(x_axis, log.get("acc"), label="Validation Accuracy")
ax.set_ylabel("Loss")
ax.set_xlabel("Epochs")
fig.set_figheight(10)
fig.set_figwidth(16)
ax.legend(loc="best", prop={"size": 20})
plt.show()

### Evaluate model on the test dataset

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
n_workers = 4 * torch.cuda.device_count()

model = ShallowMLP().to(device)
model.eval()
model_path = f"models/ShallowMLP/ShallowMLP_VarLR_060.pt"
model.load_state_dict(torch.load(model_path))

test_data = Q2Data("test", ray_tune=False)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

# arbitrary loss function
loss_func = nn.CrossEntropyLoss()
_, acc = test_model(model, test_dataloader, loss_func)
print(acc)

### Evaluating accuracy of all models

In [None]:
n_epochs = model_config["num_epochs"]
n_models = n_epochs // model_config["save_interval"]

model = DeepWideMLP().to(device)
# created just for test_model()
loss_func = nn.CrossEntropyLoss()
test_data = Q2Data("test", ray_tune=False)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

for k in range(n_models):
    model_num = str((k + 1) * 10).zfill(len(str(n_epochs)))
    model_path = f"models/{model._name}/{model._name}_FixedLR_{model_num}.pt"  # model_path = f"models/ShallowMLP/ShallowMLP_FixedLR_{model_num}.pt"
    model.load_state_dict(torch.load(model_path))
    model.eval()

    _, acc = test_model(model, test_dataloader, loss_func)

    print(f"{model_path}, Acc {acc:.6f}")

# DeepMLP

In [None]:
!mkdir models/DeepMLP

In [None]:
class DeepMLP(nn.Module):
    def __init__(self):
        super(DeepMLP, self).__init__()
        self.fc1 = nn.Linear(784, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 10)

        self._name = self.__class__.__name__

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return x

## Train the model

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": True,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}
log = train_model(model_config)

## Plot train/val loss, and val accuracy

In [None]:
fig, ax = plt.subplots()
epochs = model_config.get("num_epochs")

x_axis = np.linspace(1, epochs, epochs)
ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
ax.plot(x_axis, log.get("val_loss"), label="Validation Loss")
ax.plot(x_axis, log.get("acc"), label="Validation Accuracy")
ax.set_ylabel("Loss")
ax.set_xlabel("Epochs")
fig.set_figheight(10)
fig.set_figwidth(16)
ax.legend(loc="best", prop={"size": 20})
plt.show()

## Eval

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}

model = DeepMLP().to(device)
model.eval()
model_path = f"models/DeepMLP/DeepMLP_FixedLR_080.pt"
model.load_state_dict(torch.load(model_path))

test_data = Q2Data("test", ray_tune=False)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

# arbitrary loss function
loss_func = nn.CrossEntropyLoss()
_, acc = test_model(model, test_dataloader, loss_func)
print(f"Accuracy on test dataset: {acc}")

# DeepWideMLP

In [None]:
class DeepWideMLP(nn.Module):
    def __init__(self):
        super(DeepWideMLP, self).__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 128)
        self.fc4 = nn.Linear(128, 10)

        self._name = self.__class__.__name__

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return x

## Train the model

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}
log = train_model(model_config)

## Plot train/val loss, and val accuracy

In [None]:
fig, ax = plt.subplots()
epochs = model_config.get("num_epochs")

x_axis = np.linspace(1, epochs, epochs)
ax.plot(x_axis, log.get("train_loss"), label="Train Loss")
ax.plot(x_axis, log.get("val_loss"), label="Validation Loss")
ax.plot(x_axis, log.get("acc"), label="Validation Accuracy")
ax.set_ylabel("Loss")
ax.set_xlabel("Epochs")
fig.set_figheight(10)
fig.set_figwidth(16)
ax.legend(loc="best", prop={"size": 20})
plt.show()

## Eval

In [None]:
model_config = {
    "lr": 1e-3,
    "lr_variable": False,
    "momentum": 0.9,
    "batch_size": 128,
    "log_training": True,
    "log_interval": 10,
    "save_model": False,
    "save_interval": 10,
    "num_epochs": 200,
    "ray_tune_enabled": False,
}

model = DeepWideMLP().to(device)
model.eval()
model_path = f"models/DeepWideMLP/DeepWideMLP_FixedLR_080.pt"
model.load_state_dict(torch.load(model_path))

test_data = Q2Data("test", ray_tune=False)
test_dataloader = DataLoader(
    test_data,
    batch_size=model_config.get("batch_size"),
    num_workers=n_workers,
    shuffle=True,
    pin_memory=False,
)

# arbitrary loss function
loss_func = nn.CrossEntropyLoss()
_, acc = test_model(model, test_dataloader, loss_func)
print(f"Accuracy on test dataset: {acc}")

In [None]:
sample = next(iter(test_dataloader))

In [None]:
sample[0].shape

In [None]:
sample[1].shape

In [None]:
batch_size = 1
summary(DeepWideMLP().to(device), (batch_size, 784))

In [None]:
summary(DeepMLP().to(device), (batch_size, 784))