# CNN Part 1: Building a CNN Classifier with PyTorch 

In [None]:
import os

import torch
import torch.nn as nn
from torchvision import datasets, models, transforms

from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

torch.hub.set_dir(os.environ['SCRATCH'])

### Downloading dataset

Copy the DesignSafe dataset to your `$SCRATCH`

In [None]:
! cp /work2/10000/zw427/data.tar.gz $SCRATCH
! tar zxf $SCRATCH/data.tar.gz -C $SCRATCH
! ls $SCRATCH/Dataset_2
! rm $SCRATCH/data.tar.gz

### Hyperparameters

This notebook will use the following hyperparameters:

In [None]:
hp = {"lr":1e-4, "batch_size":16, "epochs":5}

## Dataset Loaders and Transforms

Define the path to our train and validation sets.

In [None]:
train_path = os.path.join(os.environ['SCRATCH'], "Dataset_2/Train/")
val_path   = os.path.join(os.environ['SCRATCH'], "Dataset_2/Validation/")
test_path  = None

Define a dataset loader.

In [None]:
def load_datasets(train_path, val_path, test_path):

    # define the transformations
    img_transform = transforms.Compose([transforms.Resize((244,244)),transforms.ToTensor()])
    
    # load data
    train_dataset = train_dataset = datasets.ImageFolder(train_path, transform=img_transform)

    val_dataset = datasets.ImageFolder(val_path, transform=img_transform) 
    test_dataset = datasets.ImageFolder(test_path, transform=img_transform) if test_path is not None else None
    print(f"Train set size: {len(train_dataset)}, Validation set size: {len(val_dataset)}")
    return train_dataset, val_dataset, test_dataset

In [None]:
train_set, val_set, test_set = load_datasets(train_path, val_path, test_path)

## Construct Dataloaders 

Define a dataloader constructor.

In [None]:
def construct_dataloaders(train_set, val_set, test_set, batch_size, shuffle=True):
    
    # instantate the DataLoader
    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size, shuffle)

    val_dataloader = torch.utils.data.DataLoader(val_set, batch_size) 
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size) if test_path is not None else None
    return train_dataloader, val_dataloader, test_dataloader

In [None]:
train_dataloader, val_dataloader, test_dataloader = construct_dataloaders(train_set, val_set, test_set, hp["batch_size"], True)

## Visualizing the Design Safe Dataset

Before moving on to building the CNN models, visualize the dataset first.

In [None]:
fig,axs = plt.subplots(3,3,figsize=(8, 8))
label_map={0: 'low damage', 1:'medium damage', 2:'high damage'}
for ax in axs.ravel():
    sample_idx = torch.randint(len(train_set), size=(1,)).item()
    img, label = train_set[sample_idx]
    ax.imshow(img.permute(1, 2, 0)) #.reshape((244,244,3)))
    ax.set_title(label_map[label])
fig.tight_layout()
plt.show()

## Building the Neural Network
### ResNet
Instantiate a model with resnet's pretrained weights.

In [None]:
resnet = models.resnet18(weights="IMAGENET1K_V1")

### Transfer Learning
Freezing all the weights of the network:

In [None]:
for param in resnet.parameters():
    param.requires_grad = False

Print last fully connected layer.

In [None]:
resnet.fc

Add a new final fully connected layer. 

In [None]:
# get the input dimension for this layer
num_ftrs = resnet.fc.in_features

# build the new final fully connected layers of network
fc = nn.Sequential(
    nn.Linear(num_ftrs, num_ftrs),
    nn.ReLU(),
    nn.Linear(num_ftrs, 3),
)

# replace final fully connected layer
resnet.fc = fc

## Training the Neural Network
### Define Loss Function and Optimizer

In [None]:
opt = torch.optim.Adam(resnet.parameters(),lr=hp["lr"])
loss_fn = nn.CrossEntropyLoss()

Note that the learning rate hyperparameter is defined as:

In [None]:
hp

### Train and Model Evaluation Functions

In [None]:
@torch.no_grad()
def eval_model(data_loader, model, loss_fn, DEVICE):
    model.eval()
    loss, accuracy = 0.0, 0.0
    n = len(data_loader)

    for i, data in enumerate(data_loader):
        x,y = data
        x,y = x.to(DEVICE), y.to(DEVICE)
        pred = model(x)
        loss += loss_fn(pred, y)/len(x)
        pred_label = torch.argmax(pred, axis = 1)
        accuracy += torch.sum(pred_label == y)/len(x)
    
    return loss/n, accuracy/n 

def train(train_loader, val_loader, model, opt, loss_fn, epochs, DEVICE):
    n = len(train_loader)

    for epoch in range(epochs):
        model.train(True)
        count = 0
        avg_loss, avg_acc = 0.0, 0.0
        count = 0
        print(f"Epoch {epoch+1}/{epochs}:")
        start_time = datetime.now()
        for x, y in train_loader:

            # move data to gpu
            x, y = x.to(DEVICE), y.to(DEVICE)

            # compute model prediction
            pred = model(x)

            # compute model loss
            loss = loss_fn(pred,y)

            ## backpropogation
            # reset gradient calculations
            opt.zero_grad()

            # compute gradients
            loss.backward()

            # update model parameters via optimization step
            opt.step()
            
            avg_loss += loss
            pred_label = torch.argmax(pred, axis=1)
            avg_acc += torch.sum(pred_label == y)/len(x)
        
        end_time = datetime.now()
        print(f"Time: {(end_time-start_time).seconds}s")
        print(f"Average train loss: {avg_loss/n}, Average train accuracy: {avg_acc/n}")
        val_loss, val_acc = eval_model(val_loader, model, loss_fn, DEVICE)
        print(f"Val loss: {val_loss}, Val accuracy: {val_acc}\n")

### Check for GPU and move model to correct device 

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

Pass resnet model to gpu (or cpu if gpu is not found).

In [None]:
resnet.to(device);

### Train Model 
Tasks:
1. Monitor Val accuracy change along epochs
2. Monitor Val accuracy vs. train accuracy

In [None]:
train(train_dataloader, val_dataloader, resnet, opt, loss_fn, hp["epochs"], device)

##  Additional Exercise

Above, you trained a ResNet18 model with hyperparameters with learning rate 1e-4 for 5 epochs. Try to train the model with learning rate 1e-5 and 1e-3, and compare the training speed and performance. Which is the best learning rate: 1e-5, 1e-4 or 1e-3?

In [None]:
for lr in [1e-5, 1e-3]:
    hp["lr"] = lr
    opt = torch.optim.Adam(resnet.parameters(), lr=hp["lr"])
    print(hp)
    loss_fn = nn.CrossEntropyLoss()
    resnet = models.resnet18(weights="IMAGENET1K_V1")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    resnet.to(device)
    print(f"start training with learning rate {lr}")
    train(train_dataloader, val_dataloader, resnet, opt, loss_fn, hp["epochs"], device)