# Lab 2 - AD HW
We are implementing image augmentation on the SimpleNN model.


In [1]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
USE_COLAB = True

In [3]:
if USE_COLAB:
    from google.colab import drive

    drive.mount("/content/drive")

Mounted at /content/drive


In [4]:
if USE_COLAB:
    %cd /content/drive/MyDrive/Duke/DNN

/content/drive/MyDrive/Duke/DNN


In [5]:
# Residual Network 20 (ResNet20)
# input is nx3x32x32, output is nx10

# Residual Block
class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=inplanes,
            out_channels=planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
        )
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, out_channels=planes, kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(planes)
        if inplanes > planes or stride > 1:
            # Option B
            self.downsample = nn.Sequential(
                nn.Conv2d(
                    in_channels=inplanes,
                    out_channels=planes,
                    kernel_size=1,
                    stride=stride,
                    padding=0,
                    bias=False,
                ),
                nn.BatchNorm2d(planes),
            )
        else:
            self.downsample = None
        self.stride = stride

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity if self.downsample is None else self.downsample(identity)
        out = F.relu(out)
        return out


# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.inplanes = 16
        self.conv0 = nn.Conv2d(
            in_channels=3,
            out_channels=self.inplanes,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
        )
        self.bn0 = nn.BatchNorm2d(self.inplanes)
        self.layer1 = self._make_layer(block, planes=16, blocks=layers[0], stride=1)
        self.layer2 = self._make_layer(block, planes=32, blocks=layers[1], stride=2)
        self.layer3 = self._make_layer(block, planes=64, blocks=layers[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(in_features=64, out_features=num_classes)

    def _make_layer(self, block, planes, blocks, stride):
        layers = []
        for i in range(blocks):
            layers.append(
                block(
                    inplanes=(self.inplanes if i == 0 else planes),
                    planes=planes,
                    stride=stride if i == 0 else 1,
                )
            )
        self.inplanes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn0(self.conv0(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

### Sanity Check

In [6]:
import random

rand_num_images = random.sample(range(1, 100), 3)

for i in rand_num_images:
    dummy_input = torch.randn(i, 3, 32, 32)
    test_model = ResNet(BasicBlock, [3, 3, 3])
    test_output = test_model(dummy_input)
    print("Input size: ", dummy_input.size(), "Output size: ", test_output.size())
    assert test_output.size() == torch.Size([i, 10])

Input size:  torch.Size([92, 3, 32, 32]) Output size:  torch.Size([92, 10])
Input size:  torch.Size([95, 3, 32, 32]) Output size:  torch.Size([95, 10])
Input size:  torch.Size([72, 3, 32, 32]) Output size:  torch.Size([72, 10])


### Define transformations

In [7]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

#############################################
# your code here
# specify preprocessing function
transform_train = transforms.Compose(
    [
        transforms.ToTensor(),  # convert PIL to Tensor
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(size=32, padding=2),
        transforms.Normalize(
            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
        ),  # normalize the image
    ]
)
transform_val = transforms.Compose(
    [
        transforms.ToTensor(),  # convert PIL to Tensor
        transforms.Normalize(
            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
        ),  # normalize the image
    ]
)
#############################################

### Load data with transformations

In [8]:
# do NOT change these
from tools.dataset import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT,
    mode="train",
    download=True,
    transform=transform_train,
)

val_set = CIFAR10(
    root=DATA_ROOT,
    mode="val",
    download=True,
    transform=transform_val,
)

# construct dataloader
train_loader = DataLoader(
    train_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4
)
val_loader = DataLoader(
    val_set,
    batch_size=VAL_BATCH_SIZE,
    shuffle=False,  # Does not matter here
    num_workers=4,
)
############################################

Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified


  cpuset_checked))


### Instantiate SimpleNN model and deploy it to GPU devices.

In [9]:
# specify the device for computation
#############################################
# your code here
if torch.cuda.is_available():  # check if NVIDIA GPU is available
    device = torch.device("cuda:0")
elif torch.backends.mps.is_available():  # check if Apple's Metal is available
    device = torch.device("mps")
    print(
        f"Checking pytorch is built with mps activated: {torch.backends.mps.is_built()}"
    )
else:
    device = torch.device("cpu")

if device == torch.device("cuda:0") or device == torch.device("mps"):
    print(f"Running on {device} GPU...")
else:
    print("Running on CPU...")

# instantiate the model on device
model = ResNet(BasicBlock, [3, 3, 3]).to(device)

#############################################

Running on cuda:0 GPU...


### Set up the loss function and optimizer

In [10]:
import torch.nn as nn
import torch.optim as optim

# hyperparameters, do NOT change right now
# initial learning rate
INITIAL_LR = 0.1

# momentum for optimizer
MOMENTUM = 0.9

# L2 regularization strength
REG = 1e-3

#############################################
# your code here
# create loss function
criterion = nn.CrossEntropyLoss()

# Add optimizer
optimizer = optim.SGD(
    params=model.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG
)
#############################################

### Train model

In [11]:
# some hyperparameters
# total number of training epochs
from http.cookiejar import DefaultCookiePolicy
import pickle  # for saving accuracy and loss

EPOCHS = 150

# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model_resnet"

# start the training/validation process
# the process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
best_val_acc = 0
current_learning_rate = INITIAL_LR

# BONUS: hyperparams for learning rate scheduler
DECAY_EPOCHS = [70, 100]
DECAY = 0.1

avg_train_acc = []
avg_train_loss = []
avg_val_acc = []
avg_val_loss = []

In [12]:
print("==> Training starts!")
print("=" * 50)


for i in range(0, EPOCHS):
    # handle the learning rate scheduler.
    # BONUS
    if i in DECAY_EPOCHS and i != 0:
        current_learning_rate = current_learning_rate * DECAY
        for param_group in optimizer.param_groups:
            param_group["lr"] = current_learning_rate
        print("Current learning rate has decayed to %f" % current_learning_rate)

    #######################
    # your code here
    # switch to train mode
    model.train()

    #######################

    print("Epoch %d:" % i)
    # this help you compute the training accuracy
    total_examples = 0
    correct_examples = 0

    train_loss = 0  # track training loss if you want

    # Train the model for 1 epoch.
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        ####################################
        # your code here
        # copy inputs to device
        inputs = inputs.to(device)

        # compute the output and loss
        outputs = model(inputs)
        targets = targets.to(device)  # don't forget to also copy targets to device
        loss = criterion(outputs, targets)

        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()

        # apply gradient and update the weights
        optimizer.step()

        # count the number of correctly predicted samples in the current batch
        _, preds = outputs.max(1)
        total_examples += targets.shape[0]
        correct_examples += preds.eq(targets).sum().item()
        # track loss
        train_loss += loss.item()

        ####################################

    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" % (avg_loss, avg_acc))

    # store train accuracy and loss
    save_train_acc = avg_acc
    avg_train_loss.append(avg_loss)
    avg_train_acc.append(avg_acc)

    # Validate on the validation dataset
    #######################
    # your code here
    # switch to eval mode
    model.eval()

    #######################

    # this help you compute the validation accuracy
    total_examples = 0
    correct_examples = 0

    val_loss = 0  # again, track the validation loss if you want

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)

            # compute the output and loss
            outputs = model(inputs)
            targets = targets.to(device)  # don't forget to also copy targets to device
            loss = criterion(outputs, targets)

            # count the number of correctly preds samples in the current batch
            _, preds = outputs.max(1)
            total_examples += targets.shape[0]
            correct_examples += preds.eq(targets).sum().item()
            # track loss
            val_loss += loss.item()

            ####################################

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))

    # store val accuracy and loss
    avg_val_loss.append(avg_loss)
    avg_val_acc.append(avg_acc)

    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {
            "state_dict": model.state_dict(),
            "epoch": i,
            "lr": current_learning_rate,
            "val_acc": avg_acc,
            "train_acc": save_train_acc,
        }
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, "resnet.pth"))

print("=" * 50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:
Training loss: 1.6789, Training accuracy: 0.3673
Validation loss: 1.4321, Validation accuracy: 0.4820
Saving ...
Epoch 1:
Training loss: 1.2258, Training accuracy: 0.5529
Validation loss: 1.5645, Validation accuracy: 0.4686
Epoch 2:
Training loss: 0.9803, Training accuracy: 0.6505
Validation loss: 1.1122, Validation accuracy: 0.6148
Saving ...
Epoch 3:
Training loss: 0.8360, Training accuracy: 0.7076
Validation loss: 0.8656, Validation accuracy: 0.6972
Saving ...
Epoch 4:
Training loss: 0.7532, Training accuracy: 0.7416
Validation loss: 1.2042, Validation accuracy: 0.6310
Epoch 5:
Training loss: 0.7147, Training accuracy: 0.7545
Validation loss: 1.3267, Validation accuracy: 0.5584
Epoch 6:
Training loss: 0.6974, Training accuracy: 0.7608
Validation loss: 0.8328, Validation accuracy: 0.7216
Saving ...
Epoch 7:
Training loss: 0.6643, Training accuracy: 0.7714
Validation loss: 0.9125, Validation accuracy: 0.6934
Epoch 8:
Training loss: 0.6584, Training accura

In [None]:
# # load the best model checkpoint and continue training
# print("==> Loading the best model checkpoint ...")
# checkpoint = torch.load(os.path.join(CHECKPOINT_FOLDER, "resnet.pth"))
# model.load_state_dict(checkpoint["state_dict"])

In [None]:
# print("==> Training starts again!")
# print("=" * 50)

# EPOCHS = 100


# for i in range(0, EPOCHS):
#     # handle the learning rate scheduler.
#     # BONUS
#     if i in DECAY_EPOCHS and i != 0:
#         current_learning_rate = current_learning_rate * DECAY
#         for param_group in optimizer.param_groups:
#             param_group["lr"] = current_learning_rate
#         print("Current learning rate has decayed to %f" % current_learning_rate)

#     #######################
#     # your code here
#     # switch to train mode
#     model.train()

#     #######################

#     print("Epoch %d:" % i)
#     # this help you compute the training accuracy
#     total_examples = 0
#     correct_examples = 0

#     train_loss = 0  # track training loss if you want

#     # Train the model for 1 epoch.
#     for batch_idx, (inputs, targets) in enumerate(train_loader):
#         ####################################
#         # your code here
#         # copy inputs to device
#         inputs = inputs.to(device)

#         # compute the output and loss
#         outputs = model(inputs)
#         targets = targets.to(device)  # don't forget to also copy targets to device
#         loss = criterion(outputs, targets)

#         # zero the gradient
#         optimizer.zero_grad()

#         # backpropagation
#         loss.backward()

#         # apply gradient and update the weights
#         optimizer.step()

#         # count the number of correctly predicted samples in the current batch
#         _, preds = outputs.max(1)
#         total_examples += targets.shape[0]
#         correct_examples += preds.eq(targets).sum().item()
#         # track loss
#         train_loss += loss.item()

#         ####################################

#     avg_loss = train_loss / len(train_loader)
#     avg_acc = correct_examples / total_examples
#     print("Training loss: %.4f, Training accuracy: %.4f" % (avg_loss, avg_acc))


#     # Validate on the validation dataset
#     #######################
#     # your code here
#     # switch to eval mode
#     model.eval()

#     #######################

#     # this help you compute the validation accuracy
#     total_examples = 0
#     correct_examples = 0

#     val_loss = 0  # again, track the validation loss if you want

#     # disable gradient during validation, which can save GPU memory
#     with torch.no_grad():
#         for batch_idx, (inputs, targets) in enumerate(val_loader):
#             ####################################
#             # your code here
#             # copy inputs to device
#             inputs = inputs.to(device)

#             # compute the output and loss
#             outputs = model(inputs)
#             targets = targets.to(device)  # don't forget to also copy targets to device
#             loss = criterion(outputs, targets)

#             # count the number of correctly preds samples in the current batch
#             _, preds = outputs.max(1)
#             total_examples += targets.shape[0]
#             correct_examples += preds.eq(targets).sum().item()
#             # track loss
#             val_loss += loss.item()

#             ####################################

#     avg_loss = val_loss / len(val_loader)
#     avg_acc = correct_examples / total_examples
#     print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))

#     # save the model checkpoint
#     if avg_acc > best_val_acc:
#         best_val_acc = avg_acc
#         if not os.path.exists(f"{CHECKPOINT_FOLDER}_cont"):
#             os.makedirs(f"{CHECKPOINT_FOLDER}_cont")
#         print("Saving ...")
#         state = {
#             "state_dict": model.state_dict(),
#             "epoch": i,
#             "lr": current_learning_rate,
#             "loss": avg_loss,
#             "acc": avg_acc,
#         }
#         torch.save(state, os.path.join(f"{CHECKPOINT_FOLDER}_cont", "resnet.pth"))

In [13]:
# save/load the training and validation accuracy and loss
SAVE_NEW_DATA = True
if SAVE_NEW_DATA:
    with open(os.path.join(CHECKPOINT_FOLDER, "train_acc.pkl"), "wb") as f:
        pickle.dump(avg_train_acc, f)
    with open(os.path.join(CHECKPOINT_FOLDER, "train_loss.pkl"), "wb") as f:
        pickle.dump(avg_train_loss, f)
    with open(os.path.join(CHECKPOINT_FOLDER, "val_acc.pkl"), "wb") as f:
        pickle.dump(avg_val_acc, f)
    with open(os.path.join(CHECKPOINT_FOLDER, "val_loss.pkl"), "wb") as f:
        pickle.dump(avg_val_loss, f)
else:
    with open(os.path.join(CHECKPOINT_FOLDER, "train_acc.pkl"), "rb") as f:
        avg_train_acc = pickle.load(f)
    with open(os.path.join(CHECKPOINT_FOLDER, "train_loss.pkl"), "rb") as f:
        avg_train_loss = pickle.load(f)
    with open(os.path.join(CHECKPOINT_FOLDER, "val_acc.pkl"), "rb") as f:
        avg_val_acc = pickle.load(f)
    with open(os.path.join(CHECKPOINT_FOLDER, "val_loss.pkl"), "rb") as f:
        avg_val_loss = pickle.load(f)

### Save predictions

In [14]:
DATA_ROOT = "./data"
BATCH_SIZE = 100

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

test_set = CIFAR10(root=DATA_ROOT, mode="test", download=True, transform=transform_test)

# do NOT shuffle your test data loader!!!!!!!!!!!!!!!!
# otherwise the order of samples will be messed up
# and your test accuracy is likely to drop to random guessing level
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)

#########################################################
# use your model to generate predictions on test data
# and save the results into variable "results"
# "results" should be either a numpy array or a torch tensor with length of 10000

# initialize a resnet and load trained weights
net = ResNet(BasicBlock, [3, 3, 3])
state_dict = torch.load(
    os.path.join(CHECKPOINT_FOLDER, "resnet.pth")
)  # change the path to your own checkpoint file
net.load_state_dict(state_dict["state_dict"])
net.to(device)

# remember to switch to eval mode whenever you are making inference
net.eval()

results = []
with torch.no_grad():
    for x in test_loader:
        results.append(net(x.to(device)).argmax(1))

# convert results to numpy array
results = torch.cat(results).cpu().numpy()
assert len(results) == 10000

#########################################################
with open(os.path.join(CHECKPOINT_FOLDER, "predictions.csv"), "w") as fp:
    fp.write("Id,Label\n")
    for i in range(len(results)):
        fp.write("%d,%d\n" % (i, results[i]))

Using downloaded and verified file: ./data/cifar10_test_F22.zip
Extracting ./data/cifar10_test_F22.zip to ./data
Files already downloaded and verified
