# Dataset

## Description

## Pre-processing

### Resizing

### Random cropping

### Corrupting

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import sampler
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

In [2]:
from logger import Logger

In [3]:
device = torch.device('cpu')

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [4]:
# Prepare dataset
batch_size = 128
train_mean = [107.59252, 103.2752, 106.84143]
train_std = [63.439133, 59.521027, 63.240288]
# Preprocessing
transform = T.Compose([
                T.Normalize(train_mean, train_std)
            ])

train_set = TensorDataset(torch.load('train_x.pt'), torch.load('train_y.pt'))
val_set = TensorDataset(torch.load('val_x.pt'), torch.load('val_y.pt'))
test_set = TensorDataset(torch.load('test_x.pt'), torch.load('test_y.pt'))

#train_rotate_set = TensorDataset(torch.load('train_x_rotate.pt'), torch.load('train_y_rotate.pt'))
#val_rotate_set = TensorDataset(torch.load('val_x_rotate.pt'), torch.load('val_y_rotate.pt'))
#test_rotate_set = TensorDataset(torch.load('test_x_rotate.pt'), torch.load('test_y_rotate.pt'))

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

#train_rotate_loader = DataLoader(train_rotate_set, batch_size=batch_size, shuffle=True)
#val_rotate_loader = DataLoader(val_rotate_set, batch_size=batch_size, shuffle=True)
#test_rotate_loader = DataLoader(test_rotate_set, batch_size=batch_size, shuffle=True)

# Training Procedures

## Regular Training

In [5]:
# Set up training pipelines
def train_main(model, optimizer, loader_train, loader_val, epochs=1, model_path=None, early_stop_patience = 0):
    """
    Train the main branch
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Logger object with loss and accuracy data
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    logger = Logger()
    last_loss = float('inf')
    for e in range(epochs):
        num_correct = 0
        num_samples = 0
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"\r[Epoch {e}, Batch {t}] train_loss: {loss.item()}", end='')

        # Conclude Epoch
        train_loss = loss.item()
        train_acc = float(num_correct) / num_samples
        val_loss, val_acc = evaluate_main(model, loader_val)
        logger.log(train_loss, train_acc, val_loss, val_acc)
        
        # Early Stopping
        if logger.check_early_stop(early_stop_patience):
            print("[Early Stopped]")
            break
        else:
            if last_loss > val_loss:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss improved from %.4f to %.4f. Saving model to {model_path}." % (last_loss, val_loss))
                if model_path is not None:
                    torch.save(model.state_dict(), model_path)
            else:
                print(f"\r[Epoch {e}] train_acc: {train_acc}, val_acc:{val_acc}, val_loss did not improve from %.4f" % (last_loss))
            last_loss = val_loss

        

In [6]:
def evaluate_main(model, loader):
    """
    Evaluate main branch accuracy
    Outputs: loss and accuracy
    """
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float32)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            # print(scores.shape)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        return loss.item(), acc

## Online Training

In [7]:
def ttt_online(model, loader, loader_spinned, optimizer):
    """
    Online TTT with image spinning task
    Outputs: loss and accuracy
    """
    for x, y in loader_spinned:
        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        y = y.to(device=device, dtype=torch.long)
        scores = model(x)
        loss = F.cross_entropy(scores[1], y[1])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return evaluate_main(model, loader)

# Experiment 1: Baseline ResNet18

In [7]:
# Experiment 1: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-5
from models import BaselineResNet
model = BaselineResNet(58)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
train_main(model, optimizer, train_loader, val_loader, epochs=50, model_path='model.pth', early_stop_patience=5)

Downloading: "https://github.com/pytorch/vision/archive/v0.6.0.zip" to C:\Users\hycme/.cache\torch\hub\v0.6.0.zip


[Epoch 0] train_acc: 0.4073741007194245, val_acc:0.420863309352518, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 1] train_acc: 0.595173860911271, val_acc:0.579136690647482, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 2] train_acc: 0.7150779376498801, val_acc:0.6672661870503597, val_loss did not improve from %.4f
[Epoch 3] train_acc: 0.8204436450839329, val_acc:0.6936450839328537, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 4] train_acc: 0.8658573141486811, val_acc:0.8651079136690647, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 5] train_acc: 0.9063249400479616, val_acc:0.8489208633093526, val_loss did not improve from %.4f
[Epoch 6] train_acc: 0.9349520383693045, val_acc:0.7649880095923262, val_loss improved from %.4f to %.4f. Saving model to model.pth.
[Epoch 7] train_acc: 0.9190647482014388, val_acc:0.8237410071942446, val_loss improved from %.4f to %.4f. Saving model to model.pt

KeyboardInterrupt: 

In [11]:
# Experiment 2: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_1 = BaselineResNet(58)
optimizer = optim.Adam(model_base_1.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_1, optimizer, train_loader, val_loader, epochs=25, model_path='model_base_1.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.3962829736211031, val_acc:0.4592326139088729, val_loss improved from inf to 1.2396. Saving model to model_base_1.pth.
[Epoch 1] train_acc: 0.6312949640287769, val_acc:0.5821342925659473, val_loss improved from 1.2396 to 1.0678. Saving model to model_base_1.pth.
[Epoch 2] train_acc: 0.7712829736211031, val_acc:0.6600719424460432, val_loss improved from 1.0678 to 0.1275. Saving model to model_base_1.pth.
[Epoch 3] train_acc: 0.8324340527577938, val_acc:0.7068345323741008, val_loss did not improve from 0.1275
[Epoch 4] train_acc: 0.8776978417266187, val_acc:0.8249400479616307, val_loss improved from 1.9390 to 0.0884. Saving model to model_base_1.pth.
[Epoch 5] train_acc: 0.8995803357314148, val_acc:0.7799760191846523, val_loss did not improve from 0.0884
[Epoch 6] train_acc: 0.9246103117505995, val_acc:0.8309352517985612, val_loss did not improve from 0.3738
[Epoch 7] train_acc: 0.9279076738609112, val_acc:0.8189448441247003, val_loss improved from 0.6398 to 0.1083.

In [13]:
# Experiment 3: Train a baseline ResNet18: no branch
lr = 0.5 * 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_2 = BaselineResNet(58)
optimizer = optim.Adam(model_base_2.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_2, optimizer, train_loader, val_loader, epochs=25, model_path='model_base_2.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.4337529976019185, val_acc:0.43585131894484413, val_loss improved from inf to 1.5848. Saving model to model_base_2.pth.
[Epoch 1] train_acc: 0.6672661870503597, val_acc:0.6768585131894485, val_loss improved from 1.5848 to 1.0063. Saving model to model_base_2.pth.
[Epoch 2] train_acc: 0.79181654676259, val_acc:0.7140287769784173, val_loss did not improve from 1.0063
[Epoch 3] train_acc: 0.871552757793765, val_acc:0.8381294964028777, val_loss improved from 1.4350 to 0.2368. Saving model to model_base_2.pth.
[Epoch 4] train_acc: 0.8974820143884892, val_acc:0.8123501199040767, val_loss improved from 0.2368 to 0.0272. Saving model to model_base_2.pth.
[Epoch 5] train_acc: 0.9262589928057554, val_acc:0.8878896882494005, val_loss did not improve from 0.0272
[Epoch 6] train_acc: 0.954736211031175, val_acc:0.9244604316546763, val_loss improved from 0.5714 to 0.0517. Saving model to model_base_2.pth.
[Epoch 7] train_acc: 0.9653776978417267, val_acc:0.8273381294964028, val_l

In [15]:
# Experiment 4: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 2*1e-4
from models import BaselineResNet
model_base_3 = BaselineResNet(58)
optimizer = optim.Adam(model_base_3.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_3, optimizer, train_loader, val_loader, epochs=25, model_path='model_base_3.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.4009292565947242, val_acc:0.4904076738609113, val_loss improved from inf to 1.4700. Saving model to model_base_3.pth.
[Epoch 1] train_acc: 0.5932254196642686, val_acc:0.4262589928057554, val_loss did not improve from 1.4700
[Epoch 2] train_acc: 0.7281175059952039, val_acc:0.6534772182254197, val_loss improved from 4.2047 to 0.8472. Saving model to model_base_3.pth.
[Epoch 3] train_acc: 0.7925659472422062, val_acc:0.7068345323741008, val_loss did not improve from 0.8472
[Epoch 4] train_acc: 0.8694544364508393, val_acc:0.8099520383693045, val_loss improved from 1.0837 to 0.8310. Saving model to model_base_3.pth.
[Epoch 5] train_acc: 0.8946342925659473, val_acc:0.8405275779376499, val_loss improved from 0.8310 to 0.6378. Saving model to model_base_3.pth.
[Epoch 6] train_acc: 0.907673860911271, val_acc:0.8267386091127098, val_loss improved from 0.6378 to 0.2973. Saving model to model_base_3.pth.
[Epoch 7] train_acc: 0.9318045563549161, val_acc:0.829136690647482, val_

In [17]:
# Experiment 5: Train a baseline ResNet18: no branch
lr = 0.8*1e-3
wd = 1e-4
from models import BaselineResNet
model_base_4 = BaselineResNet(58)
optimizer = optim.Adam(model_base_4.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_4, optimizer, train_loader, val_loader, epochs=30, model_path='model_base_4.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.41351918465227816, val_acc:0.32973621103117506, val_loss improved from inf to 6.4801. Saving model to model_base_4.pth.
[Epoch 1] train_acc: 0.634742206235012, val_acc:0.5269784172661871, val_loss improved from 6.4801 to 3.0476. Saving model to model_base_4.pth.
[Epoch 2] train_acc: 0.763189448441247, val_acc:0.5725419664268585, val_loss improved from 3.0476 to 2.2186. Saving model to model_base_4.pth.
[Epoch 3] train_acc: 0.8339328537170264, val_acc:0.5959232613908872, val_loss improved from 2.2186 to 1.7167. Saving model to model_base_4.pth.
[Epoch 4] train_acc: 0.8763489208633094, val_acc:0.8687050359712231, val_loss improved from 1.7167 to 0.6649. Saving model to model_base_4.pth.
[Epoch 5] train_acc: 0.9096223021582733, val_acc:0.9058752997601919, val_loss improved from 0.6649 to 0.0973. Saving model to model_base_4.pth.
[Epoch 6] train_acc: 0.9431954436450839, val_acc:0.9214628297362111, val_loss did not improve from 0.0973
[Epoch 7] train_acc: 0.9577338129

In [8]:
# Experiment 6: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_5 = BaselineResNet(58)
optimizer = optim.SGD(model_base_5.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_5, optimizer, train_loader, val_loader, epochs=70, model_path='model_base_5.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.052158273381294966, val_acc:0.1552757793764988, val_loss improved from inf to 3.9343. Saving model to model_base_5.pth.
[Epoch 1] train_acc: 0.1474820143884892, val_acc:0.15347721822541965, val_loss improved from 3.9343 to 3.3593. Saving model to model_base_5.pth.
[Epoch 2] train_acc: 0.15197841726618705, val_acc:0.16606714628297362, val_loss did not improve from 3.3593
[Epoch 3] train_acc: 0.18854916067146282, val_acc:0.2014388489208633, val_loss did not improve from 3.5582
[Epoch 4] train_acc: 0.22422062350119903, val_acc:0.2392086330935252, val_loss improved from 3.6231 to 2.2558. Saving model to model_base_5.pth.
[Epoch 5] train_acc: 0.25524580335731417, val_acc:0.2607913669064748, val_loss improved from 2.2558 to 2.0427. Saving model to model_base_5.pth.
[Epoch 6] train_acc: 0.27473021582733814, val_acc:0.2637889688249401, val_loss did not improve from 2.0427
[Epoch 7] train_acc: 0.28042565947242204, val_acc:0.2709832134292566, val_loss improved from 3.5269 

[Epoch 65] train_acc: 0.5984712230215827, val_acc:0.5833333333333334, val_loss improved from 2.5253 to 1.4069. Saving model to model_base_5.pth.
[Epoch 66] train_acc: 0.6032673860911271, val_acc:0.5377697841726619, val_loss did not improve from 1.4069
[Epoch 67] train_acc: 0.6059652278177458, val_acc:0.5755395683453237, val_loss improved from 2.2139 to 1.9056. Saving model to model_base_5.pth.
[Epoch 68] train_acc: 0.6095623501199041, val_acc:0.579136690647482, val_loss improved from 1.9056 to 1.4252. Saving model to model_base_5.pth.
[Epoch 69] train_acc: 0.6101618705035972, val_acc:0.5785371702637889, val_loss did not improve from 1.4252


In [9]:
# Experiment 7: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_6 = BaselineResNet(58)
model_path = 'model_base_5.pth'
params = torch.load(model_path)
model_base_6.load_state_dict(params)
model_base_6 = model_base_6.to(device=device)
optimizer = optim.SGD(model_base_6.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_6, optimizer, train_loader, val_loader, epochs=70, model_path='model_base_6.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.612410071942446, val_acc:0.5785371702637889, val_loss improved from inf to 2.0093. Saving model to model_base_6.pth.
[Epoch 1] train_acc: 0.612410071942446, val_acc:0.5053956834532374, val_loss did not improve from 2.0093
[Epoch 2] train_acc: 0.6157074340527577, val_acc:0.5893285371702638, val_loss improved from 2.5192 to 1.3822. Saving model to model_base_6.pth.
[Epoch 3] train_acc: 0.6151079136690647, val_acc:0.5623501199040767, val_loss improved from 1.3822 to 1.2687. Saving model to model_base_6.pth.
[Epoch 4] train_acc: 0.6172062350119905, val_acc:0.5599520383693045, val_loss did not improve from 1.2687
[Epoch 5] train_acc: 0.6193045563549161, val_acc:0.5833333333333334, val_loss improved from 2.4156 to 2.0095. Saving model to model_base_6.pth.
[Epoch 6] train_acc: 0.6267985611510791, val_acc:0.4982014388489209, val_loss improved from 2.0095 to 1.4769. Saving model to model_base_6.pth.
[Epoch 7] train_acc: 0.6263489208633094, val_acc:0.5185851318944844, val_

[Epoch 64] train_acc: 0.7636390887290168, val_acc:0.697242206235012, val_loss did not improve from 0.6093
[Epoch 65] train_acc: 0.7658872901678657, val_acc:0.6816546762589928, val_loss improved from 1.8317 to 1.4549. Saving model to model_base_6.pth.
[Epoch 66] train_acc: 0.7648381294964028, val_acc:0.7194244604316546, val_loss improved from 1.4549 to 0.7013. Saving model to model_base_6.pth.
[Epoch 67] train_acc: 0.770083932853717, val_acc:0.4922062350119904, val_loss did not improve from 0.7013
[Epoch 68] train_acc: 0.7661870503597122, val_acc:0.6900479616306955, val_loss did not improve from 0.7527
[Epoch 69] train_acc: 0.7693345323741008, val_acc:0.7170263788968825, val_loss did not improve from 1.0467


In [None]:
# Experiment 8: Train a baseline ResNet18: no branch
lr = 1e-3
wd = 1e-4
from models import BaselineResNet
model_base_7 = BaselineResNet(58)
model_path = 'model_base_6.pth'
params = torch.load(model_path)
model_base_7.load_state_dict(params)
model_base_7 = model_base_7.to(device=device)
optimizer = optim.SGD(model_base_7.parameters(), lr=lr, weight_decay=wd)
train_main(model_base_7, optimizer, train_loader, val_loader, epochs=70, model_path='model_base_7.pth', early_stop_patience=5)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


[Epoch 0] train_acc: 0.7721822541966427, val_acc:0.6996402877697842, val_loss improved from inf to 0.7670. Saving model to model_base_7.pth.
[Epoch 1] train_acc: 0.7733812949640287, val_acc:0.6936450839328537, val_loss did not improve from 0.7670
[Epoch 2] train_acc: 0.7705335731414868, val_acc:0.5353717026378897, val_loss improved from 1.0371 to 0.8967. Saving model to model_base_7.pth.
[Epoch 3] train_acc: 0.7726318944844125, val_acc:0.7134292565947242, val_loss did not improve from 0.8967
[Epoch 4] train_acc: 0.7741306954436451, val_acc:0.5695443645083933, val_loss improved from 2.0632 to 1.1265. Saving model to model_base_7.pth.
[Epoch 5] train_acc: 0.7774280575539568, val_acc:0.6870503597122302, val_loss did not improve from 1.1265
[Epoch 6] train_acc: 0.7784772182254197, val_acc:0.7008393285371702, val_loss improved from 1.2648 to 0.4302. Saving model to model_base_7.pth.
[Epoch 7] train_acc: 0.7831235011990407, val_acc:0.7482014388489209, val_loss did not improve from 0.4302
[Ep

## Evaluation
### Uncorrupted Images

In [9]:
from models import BaselineResNet
model_path = 'model.pth'
model = BaselineResNet(58)
params = torch.load(model_path)
model.load_state_dict(params)
model = model.to(device=device)
evaluate_main(model, test_loader)


Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


(2.2637951374053955, 0.6228686058174524)

In [12]:
from models import BaselineResNet
model_path = 'model_base_1.pth'
model_base_1 = BaselineResNet(58)
params = torch.load(model_path)
model_base_1.load_state_dict(params)
model_base_1 = model_base_1.to(device=device)
evaluate_main(model_base_1, test_loader)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


(1.1314829587936401, 0.6855566700100301)

In [14]:
from models import BaselineResNet
model_path = 'model_base_2.pth'
model_base_2 = BaselineResNet(58)
params = torch.load(model_path)
model_base_2.load_state_dict(params)
model_base_2 = model_base_2.to(device=device)
evaluate_main(model_base_2, test_loader)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


(1.4556317329406738, 0.6639919759277834)

In [16]:
from models import BaselineResNet
model_path = 'model_base_3.pth'
model_base_3 = BaselineResNet(58)
params = torch.load(model_path)
model_base_3.load_state_dict(params)
model_base_3 = model_base_3.to(device=device)
evaluate_main(model_base_3, test_loader)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


(2.6625049114227295, 0.5125376128385155)

In [18]:
from models import BaselineResNet
model_path = 'model_base_4.pth'
model_base_4 = BaselineResNet(58)
params = torch.load(model_path)
model_base_4.load_state_dict(params)
model_base_4 = model_base_4.to(device=device)
evaluate_main(model_base_4, test_loader)

Using cache found in C:\Users\hycme/.cache\torch\hub\pytorch_vision_v0.6.0


(1.644360065460205, 0.645937813440321)

### Corrupted Images

# Experiment 2: ResNet18 with Auxillary Branch (No Online Training)

In [None]:
# Experiment 2: Train a ResNet18 with auxillary branch

# Experiment 3: ResNet18 with Auxillary Branch (Online-Trained)

In [None]:
# Experiment 3: Do online training on the auxillary branch, with pre-trained shared and main branch weights from experiment 1.