# Dataset

## Description

## Pre-processing

### Resizing

### Random cropping

### Corrupting

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import sampler
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

In [2]:
from logger import Logger

In [3]:
device = torch.device('cpu')

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [4]:
# Prepare dataset
batch_size = 128
train_mean = [107.59252, 103.2752, 106.84143]
train_std = [63.439133, 59.521027, 63.240288]
# Preprocessing
transform = T.Compose([
                T.Normalize(train_mean, train_std)
            ])

train_set = TensorDataset(torch.load('train_x.pt'), torch.load('train_y.pt'))
val_set = TensorDataset(torch.load('val_x.pt'), torch.load('val_y.pt'))
test_set = TensorDataset(torch.load('test_x.pt'), torch.load('test_y.pt'))

#train_rotate_set = TensorDataset(torch.load('train_x_rotate.pt'), torch.load('train_y_rotate.pt'))
#val_rotate_set = TensorDataset(torch.load('val_x_rotate.pt'), torch.load('val_y_rotate.pt'))
#test_rotate_set = TensorDataset(torch.load('test_x_rotate.pt'), torch.load('test_y_rotate.pt'))

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

#train_rotate_loader = DataLoader(train_rotate_set, batch_size=batch_size, shuffle=True)
#val_rotate_loader = DataLoader(val_rotate_set, batch_size=batch_size, shuffle=True)
#test_rotate_loader = DataLoader(test_rotate_set, batch_size=batch_size, shuffle=True)

# Training Procedures

## Regular Training

In [5]:
# Set up training pipelines
def train_main(model, optimizer, loader_train, loader_val, epochs=1, model_path=None, early_stop_patience = 0):
    """
    Train the main branch
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Logger object with loss and accuracy data
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    logger = Logger()
    last_loss = float('inf')
    for e in range(epochs):
        num_correct = 0
        num_samples = 0
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"\r[Epoch {e}, Batch {t}] train_loss: {loss.item()}", end='')

        # Conclude Epoch
        train_loss = loss.item()
        train_acc = float(num_correct) / num_samples
        val_loss, val_acc = evaluate_main(model, loader_val)
        logger.log(train_loss, train_acc, val_loss, val_acc)
        
        # Early Stopping
        if logger.check_early_stop(early_stop_patience):
            print("[Early Stopped]")
            break
        else:
            if last_loss > val_loss:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss improved from %.4f to %.4f. Saving model to {model_path}.".format(last_loss, val_loss))
                if model_path is not None:
                    torch.save(model.state_dict(), model_path)
            else:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss did not improve from %.4f".format(last_loss))
            last_loss = val_loss

        

In [6]:
def evaluate_main(model, loader):
    """
    Evaluate main branch accuracy
    Outputs: loss and accuracy
    """
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            # print(scores.shape)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        return loss.item(), acc

## Online Training

In [7]:
def ttt_online(model, loader, loader_spinned, optimizer):
    """
    Online TTT with image spinning task
    Outputs: loss and accuracy
    """
    for x, y in loader_spinned:
        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        y = y.to(device=device, dtype=torch.long)
        scores = model(x)
        loss = F.cross_entropy(scores[1], y[1])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return evaluate_main(model, loader)

# Experiment 1: Baseline ResNet18

In [8]:
# Experiment 1: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-5
from models import BaselineResNet
model = BaselineResNet(58)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
train_main(model, optimizer, train_loader, val_loader, epochs=100, model_path='model.pth', early_stop_patience=5)

Using cache found in /home/haoru/.cache/torch/hub/pytorch_vision_v0.6.0
[Epoch 0, Batch 26] train_loss: 1.4036067724227905[Epoch 0] train_acc: 0.3261390887290168, val_acc:0.2577937649880096, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 1, Batch 26] train_loss: 2.7469780445098877[Epoch 1] train_acc: 0.46792565947242204, val_acc:0.4184652278177458, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 2, Batch 26] train_loss: 1.8481502532958984[Epoch 2] train_acc: 0.5314748201438849, val_acc:0.4172661870503597, val_loss did not improve from %.4f
[Epoch 3, Batch 26] train_loss: 1.8004624843597412[Epoch 3] train_acc: 0.6025179856115108, val_acc:0.4904076738609113, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 4, Batch 26] train_loss: 2.1851603984832764[Epoch 4] train_acc: 0.6193045563549161, val_acc:0.5, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 5, Batch 26] train_loss: 1.8282390832901[Epoch 5]

KeyboardInterrupt: 

## Evaluation
### Uncorrupted Images

In [8]:
from models import BaselineResNet
model_path = 'model.pth'
model = BaselineResNet(58)
params = torch.load(model_path)
model.load_state_dict(params)
model = model.to(device=device)
evaluate_main(model, test_loader)


Using cache found in /home/haoru/.cache/torch/hub/pytorch_vision_v0.6.0


(3.170003652572632, 0.522567703109328)

### Corrupted Images

# Experiment 2: ResNet18 with Auxillary Branch (No Online Training)

In [None]:
# Experiment 2: Train a ResNet18 with auxillary branch

# Experiment 3: ResNet18 with Auxillary Branch (Online-Trained)

In [None]:
# Experiment 3: Do online training on the auxillary branch, with pre-trained shared and main branch weights from experiment 1.