# Tools Setup and Determine Hardware

### Note: Classes GPU_Dataset.py for dataset inspection in GPU environment and TinyImageNetModel.py for training a new model

In [None]:
import os, pathlib
import torch
from torchsummary import summary
from torch import nn
from torch.utils.data import DataLoader
from torch.profiler import profile, record_function, ProfilerActivity
from torchvision import datasets
from torchvision.transforms import ToTensor

from tinyimagenet import TinyImageNet
from TinyImageNetModel import TinyImageNetModel, init_weights
from pathlib import Path
from GPU_Dataset import GPUDS

import numpy as np
import gc

import matplotlib.pyplot as plt

# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Dataset Inspection

### Initialize a training and validation set and view data about sample images

In [None]:
# This cell imports our dataset.

# Original Source: https://github.com/ksachdeva/tiny-imagenet-tfds
# Setup our dataset
# ---------------------------------------------------------

# Training dataset
dataset_path="~/.torchvision/tinyimagenet/"
ds_train = TinyImageNet(Path(dataset_path),split="train")
n = len(ds_train)
print(f"TinyImageNet, split \"val\", has {n} samples.")
samples_to_print = 2
print(f"Showing info of {samples_to_print} samples...")
for i in range(0,n,n//samples_to_print):
    image, img_class = ds_train[i]
    print(f"Sample of class {img_class:3d}\n Shape {image.shape}\n Image Data {image}\n Labels {ds_train.idx_to_words[img_class]}")
    # by default, [C, H, W], transpose to [W, H, C]
    image = np.transpose(image, (2, 1, 0))
    plt.imshow(image)
    plt.show()

# Validation dataset
ds_val = TinyImageNet(Path(dataset_path),split="val")
n = len(ds_val)
print(f"TinyImageNet, split \"val\", has {n} samples.")
samples_to_print = 2
print(f"Showing info of {samples_to_print} samples...")
for i in range(0,n,n//samples_to_print):
    image, img_class = ds_val[i]
    print(f"Sample of class {img_class:3d}\n Shape {image.shape}\n Image Data {image}\n Labels {ds_train.idx_to_words[img_class]}")
    # by default, [C, H, W], transpose to [W, H, C]
    image = np.transpose(image, (2, 1, 0))
    plt.imshow(image)
    plt.show()

### Export three random images to use as input feature maps for C++ framework

In [None]:
# TODO: Export each of the three inputs to a binary file which will be used to load the images into C++ later
# NOTE: First flatten the array (ex: 4D --> 1D). So 64*64*3 = 12288 element 1D array

# TODO: Print and visualize three inputs from the validation set
#     : Print the stroage data type
#     : Print and note the dimensions of each image
#     : Print the memory required to store each image

# Make a directory for our image data
img_dir = os.path.abspath('img_data')
pathlib.Path(img_dir).mkdir(exist_ok=True)

# Dataloaders
val_dataloader = DataLoader(ds_val, batch_size=1, shuffle=True)
train_dataloader = DataLoader(ds_train, batch_size=1, shuffle=True)

i = 0
samples = []
# [num images in batch, channels, width, height] by default
for image, label in val_dataloader:
    if i >= 3:
        break
    else:
        samples.append((image, label))
        # Export sample images
        # TODO: Your Code Here
        
        i = i + 1

# Model Loading and Inference

### Load a Pre-Trained Model

In [None]:
# TODO: Load the model
# Now we will load the PT model! Please make sure the pt model file is present in the below directory.
# You can download this from the Canvas Page and place it in the same directory as this notebook.

# model_path = os.path.abspath(""/home/<NETID>/path/to/your/lab1/tinyimagenet_model.pt)" # Uncomment this to use a non-relative path
model_path = os.path.abspath("./tinyimagenet_model.pt")

# TODO: Your Code Here
#model =

# TODO: Print a summary of the model structure
# Your Code Here

### Model Inference on Selected 3 Sample Images

In [None]:
# Running infrence on our model
# We can run an infrence of our model by doing the following (we are doing batches of 1 here)
image = image.to(device)
# print(image)
pred = model(image)
pred = torch.nn.functional.softmax(pred, dim=1)
top1_guess = torch.max(pred, dim=1)
# torch.max() returns [value, label_index]
top1_label = ds_val.idx_to_words[top1_guess[1].item()]
actual_label = ds_val.idx_to_words[label.item()]
print(f'Guess: {top1_label} Confidence %: {top1_guess[0].item() * 100} // Actual: {actual_label}')

In [None]:
# TODO: Run infrence for our previous 3 sample images
# NOTE: Turn off gradient computation for inference mode

# TODO: Your Code Here

### Get Accuracy of Top-1, Top-5, and Top-10 classifications of Validation Set

In [None]:
# TODO: Calculate the Top-1, Top-5, and Top-10 Accuracy of the validation dataset
total = acc_top1 = acc_top5 = acc_top10 = 0

# TODO: Your Code Here

### Print All Dataset Classes and their Associated Labels

In [None]:
# TODO: Print all of the possible classes of the dataset

# TODO: Your Code Here

## Model Exploration

### Visualize the model layers and filters

In [None]:
# TODO: Visualize the model in Netron (https://netron.app/) and include an image here.

In [None]:
# Visualize the Layer Weigths
# Get the first and last Conv2d layers
#conv2d = model.tinyimgnet_model[0]

# We can view the layer weights. Here we consider them as images of feature filters applied to intermediate feature map images.
# TODO: Visualize the 2 convolutional layers filter sets (weights) (one at the beginning and one at the end)

# TODO: Your Code Here

In [None]:
# We can view the layer outputs as well. Here we consider them as images of the spatial location of features.
# TODO: Visualize the 2 convolutional layers outputs (intermediate feature maps) (one at the beginning and one at the end)

# TODO: Your Code Here

### Export Model Params

In [None]:
# TODO: Export the filters/weights se we can use them later
# Make a directory for our image data
model_dir = os.path.abspath('model_data')
pathlib.Path(model_dir).mkdir(exist_ok=True)

# Export each layer's weights and biases
# These are the convolutional and linear (dense) layers
for layer_num, layer in enumerate(model.tinyimgnet_model):
    if isinstance(layer, nn.Linear):
        layer_type = "Linear"
    elif isinstance(layer, nn.Conv2d):
        layer_type = "Conv2d"
    else:
        continue
        
    # TODO: Your Code Here

### Export Intermediate Activations

In [None]:
# TODO: Export the intermediate layer outputs for each of the input for all of the layers
img_dir = os.path.abspath('img_data')
pathlib.Path(img_dir).mkdir(exist_ok=True)

for img_idx, img in enumerate(sample_imgs):
    file_dir = os.path.join(img_dir, f'test_input_{img_idx}')
    pathlib.Path(file_dir).mkdir(exist_ok=True)
    
    # TODO: Your Code Here

# Inference Performance Profiling

### Profile One Image

In [None]:
# Gather grab a profiling trace on one of the sample images

activities = [ProfilerActivity.CPU]
with profile(activities=activities, record_shapes=True) as prof:
    with record_function("end_to_end_inference"):
        model(samples[0][0].to(device)) 

# opening this with perfetto ui is actually pretty neat and way easier than tensorboard
prof.export_chrome_trace("image_0_trace.json")
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))

In [None]:
# TODO: Sample Profiling - Inference for a single image:

# Perform the inference profiling:
samples_to_run = [10, 100, 1000]

val_dataloader = DataLoader(ds_val, batch_size=1, shuffle=True)

for samples_num in samples_to_run:
    # Engage profiling
    with profile(activities=activities) as prof:
        with record_function(f"e2e_online_{samples_num}"):
            # Actual inference
            # TODO: Your Code Here

    prof.export_chrome_trace(f"online_inf_{samples_num}_trace.json")
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))


### Online Inference

In [None]:
# TODO: Sample Profiling - Online Inference:

# Vary this from 10, 100, 1000 to simulate multiple online inference
loop_index = [10, 100, 1000]

for idx in loop_index:
    # Starts Profile logging
    with profile(activities=activities) as prof:
        with record_function(f"e2e_online_{samples_num}"):
            # Actual online inference
            # TODO: Your Code Here

    prof.export_chrome_trace(f"online_inf_{samples_num}_trace.json")
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))

### Batch Inference

In [None]:
# TODO: Sample Profiling - Batch Inference:

# We would only perform batch inference for a subset of validation set i.e. 1000 images 
# using different batch sizes of 20, 40, 100, 200 

# Decides the size of the batch. Try: 20, 40, 100, 200
batch_size = [20, 40, 100, 200]

for size in batch_sizes:
    val_dataloader = DataLoader(ds_val, batch_size=size, shuffle=True)

    # Engage profiling
    with profile(activities=activities) as prof:
        with record_function(f"e2e_batch_{size}"):
            # Actual Batch inference
            # TODO: Your Code Here

    prof.export_chrome_trace(f"batch_inf_{size}_trace.json")
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))


# Model Training

### Set up the Training Options

Important! Use the GPU environment for running code beyond this point.

In [None]:
activities = [ProfilerActivity.CUDA]

models_dir = os.path.abspath('trained_models')
pathlib.Path(models_dir).mkdir(exist_ok=True)

### Training Loop

In [None]:
import time

# NOTE: This part is also definitely more verbose

def train_network(dataloader, epochs, batch_size, train_type):
    type_dir = os.path.join(models_dir, train_type)
    pathlib.Path(type_dir).mkdir(exist_ok=True)

    # create a new model and initialize it
    model = TinyImageNetModel(lr=.0003, device=device).to(device)
    model.apply(init_weights)

    #return the optimizer (adamax)
    opt = model.configure_optimizers()
    summary(model, input_size=(3, 64, 64))
    print(f"BEGINNING INFERENCE -- BATCH_SIZE={batch_size} FOR {epochs} EPOCHS")
    
    # enable gradient computation
    model.train()
    i = 0
    epoch_loss = 0.
    epoch_accuracy = 0.

    # begin the training loop
    for epoch in range(epochs):
        start_time = time.time() * 1000  # Convert to milliseconds
        print(f'EPOCH: {epoch}')
        
        for data, labels in dataloader:
            loss, accuracy = model.training_step((data, labels))
            epoch_loss += loss
            epoch_accuracy += accuracy
            opt.zero_grad()
            loss.backward()
            opt.step()
            i += 1
            del loss
            del accuracy

        gc.collect()
        end_time = time.time() * 1000
        elapsed_time = (end_time - start_time) / 1000.0
        print(f"Elapsed time for Epoch {epoch}: {elapsed_time:.2f} seconds")
        print(f"Average loss for Epoch {epoch}: {epoch_loss / i}\t Train accuracy: {epoch_accuracy / i}")


    # Save the model at the end of each training run
    torch.save(model, os.path.join(type_dir, f'{train_type}_train_{batch_size}_{epochs}.pt'))
    # print(prof.key_averages().table(row_limit=20))


### Training with Different Batch Sizes

In [None]:
# TODO: Attempt to train your own model with different batch sizes

# Move the whole dataset to the GPU before training
gpu_dataset = GPUDS(ds_train, device)

epoch_size = 20

for batch_size in [32, 64, 128]:
    # Setup our batched datasets
    # Use timer to track training time
    # TODO: Your Code Here

torch.cuda.empty_cache()

### Training with Different Epochs

In [None]:
# TODO: Train your model with 3 different numbers of epochs
batch_size = 32

# Setup your datasets
# TODO: Your Code Here

for epoch_size in [3, 10, 100]:
    # Run training
    # TODO: Your Code Here

torch.cuda.empty_cache()

# Validate the Newly Trained Models

In [None]:
    # TODO: Get the top-1 and top-5 of your newly trained model using different BATCHES
    # TODO: Your Code Here

In [None]:
    # TODO: Get the top-1 and top-5 of your newly trained model using different EPOCHS
    # TODO: Your Code Here

# Above and Beyond

### Add sections below for optional content in section 4 of the lab manual