# Dataset

## Description

## Pre-processing

### Resizing

### Random cropping

### Corrupting

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import sampler
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as T
import pickle
import numpy as np

In [2]:
from logger import Logger

In [3]:
device = torch.device('cpu')

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [4]:
# Prepare dataset
batch_size = 128
train_mean = [107.59252, 103.2752, 106.84143]
train_std = [63.439133, 59.521027, 63.240288]
# Preprocessing
transform = T.Compose([
                T.Normalize(train_mean, train_std)
            ])

train_set = TensorDataset(torch.load('train_x.pt'), torch.load('train_y.pt'))
val_set = TensorDataset(torch.load('val_x.pt'), torch.load('val_y.pt'))
test_set = TensorDataset(torch.load('test_x.pt'), torch.load('test_y.pt'))

train_rotate_set = TensorDataset(torch.load('train_x_rotate.pt'), torch.load('train_y_rotate.pt'))
val_rotate_set = TensorDataset(torch.load('val_x_rotate.pt'), torch.load('val_y_rotate.pt'))
test_rotate_set = TensorDataset(torch.load('test_x_rotate.pt'), torch.load('test_y_rotate.pt'))

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

train_rotate_loader = DataLoader(train_rotate_set, batch_size=batch_size, shuffle=True)
val_rotate_loader = DataLoader(val_rotate_set, batch_size=batch_size, shuffle=True)
test_rotate_loader = DataLoader(test_rotate_set, batch_size=batch_size, shuffle=True)

# Training Procedures

## Regular Training

In [5]:
# Set up training pipelines
def train_main(model, optimizer, loader_train, loader_val, epochs=1, model_path=None, early_stop_patience = 0):
    """
    Train the main branch
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Logger object with loss and accuracy data
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    logger = Logger()
    last_loss = float('inf')
    for e in range(epochs):
        num_correct = 0
        num_samples = 0
        total_loss = 0.0
        count = 0
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)
            total_loss += loss.item()

            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"\r[Epoch {e}, Batch {t}] train_loss: {loss.item()}", end='')
            count += 1

        # Conclude Epoch
        train_loss = total_loss / count
        train_acc = float(num_correct) / num_samples
        val_loss, val_acc = evaluate_main(model, loader_val)
        logger.log(train_loss, train_acc, val_loss, val_acc)
        
        with open(model_path.split('.')[0] + '.pkl', 'wb') as output_file:
            pickle.dump(logger, output_file)

        # Early Stopping
        if logger.check_early_stop(early_stop_patience):
            print("[Early Stopped]")
            break
        else:
            if last_loss > val_loss:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss improved from %.4f to %.4f. Saving model to {model_path}." % (last_loss, val_loss))
                if model_path is not None:
                    torch.save(model.state_dict(), model_path)
            else:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss did not improve from %.4f" % (last_loss))
            last_loss = val_loss
    return logger

        

In [6]:
def train_both(model, optimizer, loader_train, loader_val, epochs=1, model_path=None, early_stop_patience = 0):
    """
    Train the main and auxillary branch
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Logger object with loss and accuracy data
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    logger = Logger()
    last_loss = float('inf')
    for e in range(epochs):
        num_correct = 0
        num_samples = 0
        running_loss = 0.0
        count = 0
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss_main = F.cross_entropy(scores[0], y[:, 0])
            loss_auxillary = F.cross_entropy(scores[1], y[:, 1])
            loss = loss_main + loss_auxillary
            running_loss += loss_main.item()

            _, preds = scores[0].max(1)
            num_correct += (preds == y[:, 0]).sum()
            num_samples += preds.size(0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"\r[Epoch {e}, Batch {t}] train_loss: {loss.item()}", end='')
            count += 1

        # Conclude Epoch
        train_loss = running_loss / count
        train_acc = float(num_correct) / num_samples
        val_loss, val_acc = evaluate_both(model, loader_val)
        logger.log(train_loss, train_acc, val_loss, val_acc)

        with open(model_path.split('.')[0] + '.pkl', 'wb') as output_file:
            pickle.dump(logger, output_file)
            
        # Early Stopping
        if logger.check_early_stop(early_stop_patience):
            print("[Early Stopped]")
            break
        else:
            if last_loss > val_loss:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss improved from %.4f to %.4f. Saving model to {model_path}." % (last_loss, val_loss))
                if model_path is not None:
                    torch.save(model.state_dict(), model_path)
            else:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss did not improve from %.4f" % (last_loss))
            last_loss = val_loss
    return logger


In [7]:
def evaluate_main(model, loader):
    """
    Evaluate main branch accuracy
    Outputs: loss and accuracy
    """
    num_correct = 0
    num_samples = 0
    ave_loss = 0.0
    count = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            # print(scores.shape)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
            ave_loss += loss.item()
            count += 1
        acc = float(num_correct) / num_samples
        return ave_loss / count, acc

In [8]:
def evaluate_both(model, loader):
    """
    Evaluate main branch accuracy in model with two predictions
    Outputs: loss and accuracy
    """
    num_correct = 0
    num_samples = 0
    ave_loss = 0.0
    count = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            loss_main = F.cross_entropy(scores[0], y[:, 0])
            # print(scores.shape)
            _, preds = scores[0].max(1)
            num_correct += (preds == y[:, 0]).sum()
            # print(f"num_correct: {num_correct}")
            num_samples += preds.size(0)
            # print(f"num_samples: {num_samples}")
            ave_loss += loss_main.item()
            count += 1
        acc = float(num_correct) / num_samples
        return ave_loss / count, acc


In [9]:
def evaluate_non_rotate(model, loader):
    """
    Evaluate main branch accuracy in model with two predictions
    Outputs: loss and accuracy
    """
    from random import randrange
    num_correct = 0
    num_samples = 0
    ave_loss = 0.0
    count = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
#             random_number = randrange(4)
#             if random_number == 0:
#                 pass
#             elif random_number == 1:
#                 for i in range(x.size()[0]):
#                     x[i][0] = torch.rot90(x[i][0])
#                     x[i][1] = torch.rot90(x[i][1])
#                     x[i][2] = torch.rot90(x[i][2])
#             elif random_number == 2:
#                 for i in range(x.size()[0]):
#                     x[i][0] = torch.rot90(x[i][0], 2)
#                     x[i][1] = torch.rot90(x[i][1], 2)
#                     x[i][2] = torch.rot90(x[i][2], 2)
#             elif random_number == 3:
#                 for i in range(x.size()[0]):
#                     x[i][0] = torch.rot90(x[i][0], 3)
#                     x[i][1] = torch.rot90(x[i][1], 3)
#                     x[i][2] = torch.rot90(x[i][2], 3)
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            loss_main = F.cross_entropy(scores[0], y)
            # print(scores.shape)
            _, preds = scores[0].max(1)
            num_correct += (preds == y).sum()
            #print(f"num_correct: {num_correct}")
            num_samples += preds.size(0)
            #print(f"num_samples: {num_samples}")
            ave_loss += loss_main.item()
            count += 1
        acc = float(num_correct) / num_samples
        return ave_loss / count, acc


## Online Training

In [10]:
def ttt_online(model, loader, loader_spinned, optimizer):
    """
    Online TTT with image spinning task
    Outputs: loss and accuracy
    """
    for x, y in loader_spinned:
        x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
        y = y.to(device=device, dtype=torch.long)
        scores = model(x)
        loss_auxillary = F.cross_entropy(scores[1], y[:, 1])
        optimizer.zero_grad()
        loss_auxillary.backward()
        optimizer.step()
    return evaluate_main(model, loader)
    

# Experiment 1: Baseline ResNet18

In [11]:
# Experiment 1: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_1 = BaselineResNet(43)
optimizer = optim.Adam(model_base_1.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_1, optimizer, train_loader, val_loader, epochs=30, model_path='model_base_1.pth', early_stop_patience=5)

[Epoch 0] train_acc: 0.12248150981892375, val_acc:0.3569697742634868, val_loss improved from inf to 2.2720. Saving model to model_base_1.pth.
[Epoch 1] train_acc: 0.6198355011476664, val_acc:0.7555158780767759, val_loss improved from 2.2720 to 0.8263. Saving model to model_base_1.pth.
[Epoch 2] train_acc: 0.8285195103289977, val_acc:0.8477235046550184, val_loss improved from 0.8263 to 0.5327. Saving model to model_base_1.pth.
[Epoch 3] train_acc: 0.8886444784493751, val_acc:0.884836117842112, val_loss improved from 0.5327 to 0.4087. Saving model to model_base_1.pth.
[Epoch 4] train_acc: 0.9238714613618975, val_acc:0.8942736895804106, val_loss improved from 0.4087 to 0.3710. Saving model to model_base_1.pth.
[Epoch 5] train_acc: 0.9361769956643713, val_acc:0.922203800535646, val_loss improved from 0.3710 to 0.2781. Saving model to model_base_1.pth.
[Epoch 6] train_acc: 0.9510010201479214, val_acc:0.9234791480678485, val_loss improved from 0.2781 to 0.2671. Saving model to model_base_1.p

<logger.Logger at 0x25be5ab5100>

## Evaluation
### Uncorrupted Images

In [12]:
from models import BaselineResNet
model_path = 'model_base_1.pth'
model_base_1 = BaselineResNet(43)
params = torch.load(model_path)
model_base_1.load_state_dict(params)
model_base_1 = model_base_1.to(device=device)
evaluate_main(model_base_1, test_loader)

(0.43273189961910247, 0.9043547110055423)

### Corrupted Images

# Experiment 1.5: Baseline CNN

In [11]:
# Experiment 1: Train a baseline CNN: no branch
lr = 1e-3
wd = 1e-4
from cnn_models import BaselineCNN
cnn_model_base_1 = BaselineCNN(43)
optimizer = optim.Adam(cnn_model_base_1.parameters(), lr=lr, weight_decay=wd)
train_main(cnn_model_base_1, optimizer, train_loader, val_loader, epochs=30, model_path='cnn_model_base_1.pth', early_stop_patience=5)

[Epoch 0] train_acc: 0.4020976791634787, val_acc:0.6983803086341028, val_loss improved from inf to 1.0278. Saving model to cnn_model_base_1.pth.
[Epoch 1] train_acc: 0.8483167559296098, val_acc:0.8686392041831399, val_loss improved from 1.0278 to 0.4386. Saving model to cnn_model_base_1.pth.
[Epoch 2] train_acc: 0.927250701351696, val_acc:0.907537303915317, val_loss improved from 0.4386 to 0.3340. Saving model to cnn_model_base_1.pth.
[Epoch 3] train_acc: 0.9595766386125988, val_acc:0.9270501211580155, val_loss improved from 0.3340 to 0.2903. Saving model to cnn_model_base_1.pth.
[Epoch 4] train_acc: 0.9738587095128793, val_acc:0.9296008162224206, val_loss did not improve from 0.2903
[Epoch 5] train_acc: 0.9751657740372354, val_acc:0.9164647366407346, val_loss did not improve from 0.3384
[Epoch 6] train_acc: 0.9775567457281306, val_acc:0.9380181099349573, val_loss improved from 0.3908 to 0.2848. Saving model to cnn_model_base_1.pth.
[Epoch 7] train_acc: 0.9848890589135425, val_acc:0.93

<logger.Logger at 0x25b0f00e0d0>

## Evaluation
### Uncorrupted Images

In [13]:
from cnn_models import BaselineCNN
model_path = 'cnn_model_base_1.pth'
cnn_model_base_1 = BaselineCNN(43)
params = torch.load(model_path)
cnn_model_base_1.load_state_dict(params)
cnn_model_base_1 = cnn_model_base_1.to(device=device)
evaluate_main(cnn_model_base_1, test_loader)

(0.4685808004754962, 0.9095011876484561)

# Experiment 2: ResNet18 with Auxillary Branch (No Online Training)

In [11]:
# Experiment 1: Train a ResNet18 with auxillary branch
lr = 5e-4
wd = 1e-5
from models import ResNetTwoBranch
exp_2_model_1 = ResNetTwoBranch()
optimizer = optim.Adam(exp_2_model_1.parameters(), lr=lr, weight_decay=wd)
train_both(exp_2_model_1, optimizer, train_rotate_loader, val_rotate_loader, epochs=50, model_path='exp_2_model_1.pth', early_stop_patience=5)

[Epoch 0] train_acc: 0.16449885233358838, val_acc:0.28650682310929726, val_loss improved from inf to 2.4156. Saving model to exp_2_model_1.pth.
[Epoch 1] train_acc: 0.4454699056363173, val_acc:0.49295370488458107, val_loss improved from 2.4156 to 1.6854. Saving model to exp_2_model_1.pth.
[Epoch 2] train_acc: 0.6814986610558531, val_acc:0.7458232368320368, val_loss improved from 1.6854 to 0.8470. Saving model to exp_2_model_1.pth.
[Epoch 3] train_acc: 0.851361259882683, val_acc:0.891914296645836, val_loss improved from 0.8470 to 0.3726. Saving model to exp_2_model_1.pth.
[Epoch 4] train_acc: 0.9133352461106861, val_acc:0.9018620073970157, val_loss improved from 0.3726 to 0.3400. Saving model to exp_2_model_1.pth.
[Epoch 5] train_acc: 0.9386237566947208, val_acc:0.9341601836500446, val_loss improved from 0.3400 to 0.2309. Saving model to exp_2_model_1.pth.
[Epoch 6] train_acc: 0.953049285896455, val_acc:0.9518237469710497, val_loss improved from 0.2309 to 0.1681. Saving model to exp_2_m

<logger.Logger at 0x1d7ce149190>

## Evaluate

In [13]:
from models import ResNetTwoBranch
model_path = 'exp_2_model_1.pth'
exp_2_model_1 = ResNetTwoBranch()
params = torch.load(model_path)
exp_2_model_1.load_state_dict(params)
exp_2_model_1 = exp_2_model_1.to(device=device)
evaluate_both(exp_2_model_1, test_rotate_loader)
#evaluate_non_rotate(exp_2_model_1, test_loader)

(0.39871874469556384, 0.9288796516231196)

# Experiment 2.5: CNN with Auxillary Branch (No Online Training)

In [11]:
# Experiment 1: Train a CNN with auxillary branch
lr = 5e-4
wd = 1e-5
from cnn_models import CNNTwoBranch
exp_2_cnn_model_1 = CNNTwoBranch()
optimizer = optim.Adam(exp_2_cnn_model_1.parameters(), lr=lr, weight_decay=wd)
train_both(exp_2_cnn_model_1, optimizer, train_rotate_loader, val_rotate_loader, epochs=50, model_path='exp_2_cnn_model_1.pth', early_stop_patience=5)

[Epoch 0] train_acc: 0.054641673042591175, val_acc:0.061599285805381966, val_loss improved from inf to 3.4921. Saving model to exp_2_cnn_model_1.pth.
[Epoch 1] train_acc: 0.05527129558786024, val_acc:0.060068868766738934, val_loss did not improve from 3.4921
[Epoch 2, Batch 818] train_loss: 4.8913483619689945

KeyboardInterrupt: 

## Evaluate

In [None]:
# Experiment 3: ResNet18 with Auxillary Branch (Online-Trained)

In [None]:
# Experiment 3: Do online training on the auxillary branch, with pre-trained shared and main branch weights from experiment 1.