In [24]:
import torch
import torchvision
import torchvision.transforms as T
import torchvision.models as models


import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions.laplace import Laplace
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import  tqdm
import seaborn as sns
import pickle as pkl
from pathlib import Path


import sys
sys.path.append('..')
from thresholdout import Thresholdout

In [2]:
DEVICE = torch.device("cuda:3")
DATA_ROOT = Path('../data')

In [3]:
torch.cuda.is_available()

True

# Experiment Setup
Get an untrained resnet 18 model and train it on a binary classification problem, then extend it out to recognise 3 classes and etc. all the way up to 10. While the training is ongoing we monitor overfitting using thresholdout.


What do we expect? What are we proving

## Model & Data Prep

In [5]:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
TRANSFORMS = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor(), normalize])


CLASSES = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [6]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [7]:
CLASS_TO_IDX = {cl: CLASSES.index(cl) for cl in CLASSES}

In [8]:
CLASS_TO_IDX

{'plane': 0,
 'car': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [9]:
def extract_cl_idxs(targets, *classes):
    cl_idxs = [CLASS_TO_IDX[cl] for cl in classes]
    
    targets = np.array(targets)
    ds_idxs = []
    
    for idx in cl_idxs:
        ds_idxs.extend(np.asarray(targets == idx).nonzero()[0])
        
    return ds_idxs

In [10]:
def select_classes(trainset, testset, *classes):
    cl_idxs = extract_cl_idxs(trainset.targets, *classes)
    
    trainset.targets = [trainset.targets[i] for i in cl_idxs]
    trainset.data =  trainset.data[cl_idxs]
    
    cl_idxs = extract_cl_idxs(testset.targets, *classes)
    
    testset.targets = [testset.targets[i] for i in cl_idxs]
    testset.data =  testset.data[cl_idxs]
    
    return trainset, testset
    
    

In [13]:
def get_cl_dataset(*classes):
    trainset = torchvision.datasets.CIFAR10(root=DATA_ROOT / 'cifar-10-data', train=True,
                                        download=True, transform=TRANSFORMS)

    testset = torchvision.datasets.CIFAR10(root=DATA_ROOT / 'cifar-10-data', train=False,
                                           download=True, transform=TRANSFORMS)
    return select_classes(trainset, testset, *classes)

In [14]:
trainset, testset = get_cl_dataset('plane', 'car')

Files already downloaded and verified
Files already downloaded and verified


In [16]:
trainset.data.shape, testset.data.shape

((10000, 32, 32, 3), (2000, 32, 32, 3))

In [17]:
len(trainset.targets), len(testset.targets)

(10000, 2000)

In [27]:
def change_num_cl(model, cls):
    model.fc = nn.Linear(512, len(cls))

    

In [28]:
model = models.resnet18(pretrained=False)
change_num_cl(model, ['plane', 'car'])
model.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## Thresholdout (with budget)

In [30]:
class Thresholdout:
    def __init__(self, train, holdout, tolerance=0.01/4, scale_factor=4):
        self.tolerance = tolerance
        
        self.laplace_eps = Laplace(torch.tensor([0.0]), torch.tensor([2*self.tolerance]))
        self.laplace_gamma = Laplace(torch.tensor([0.0]), torch.tensor([4*self.tolerance]))
        self.laplace_eta = Laplace(torch.tensor([0.0]), torch.tensor([8*self.tolerance]))

        self.train = train
        self.holdout = holdout
        
        self.T = 4*tolerance + self.noise(self.laplace_gamma)
        # self.budget = ???
        
    def noise(self, dist):
        return dist.sample().item()
        
    def verify_statistic(self, phi):
        """
            - phi(dataset) -> statistic: 
              function returns the average of some statistic
        """
        
        train_val = phi(self.train)
        holdout_val = phi(self.holdout)
                
        delta = abs(train_val - holdout_val)
        
        if delta > self.T + self.noise(self.laplace_eta):
            self.T += self.noise(self.laplace_gamma)
            return holdout_val + self.noise(self.laplace_eps), delta, False
        else:
            return train_val, delta, True

SyntaxError: invalid syntax (<ipython-input-30-4ad856e4dcdf>, line 13)

# Experiment Routine

In [51]:
def run():
    pass