# Improving Patch Classification
In this notebook, I'm going to explore how to improve the patch classifier as the current one is not really performing. In fact let's try out a bunch of the pretrained ones from torch vision. We have quite large datasets so it makes sense to try and train the whole network, not just the end layers.

In [1]:
# standard project preamble
from pathgen.utils.seeds import set_seed
from pathgen.utils.paths import project_root

experiment_name = "all"
experiment_root = project_root() / "experiments" / experiment_name

global_seed = 987654321
set_seed(global_seed)

In [2]:
# standard data science imports
import numpy as np

# standard pytorch imports
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

# pytorch data loading imports
from torch.utils.data import DataLoader, RandomSampler
from torchvision.datasets import ImageFolder

# torchvision
import torchvision
import torchvision.transforms as transforms

In [3]:
# define our hyper parameters
batch_size = 128
num_epochs = 30
learning_rate = 0.00001  # 0.001 - takes 25mins (log10 grid search?)

In [4]:
# set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# set up the data loaders for training
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])  # these values are what the pretrained models expect
])

train_set = ImageFolder(experiment_root / 'train_patches', transform=transform)  # 70,000 samples (35,000 per class) - non-fake
valid_set = ImageFolder(experiment_root / 'valid_patches', transform=transform)  # 30,000 samples (15,000 per class) - non-fake

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, worker_init_fn=np.random.seed(global_seed), num_workers=32)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, worker_init_fn=np.random.seed(global_seed), num_workers=32)

# downsample to speed up training to test the code
#train_sampler = RandomSampler(train_set, replacement=True, num_samples=256)
#valid_sampler = RandomSampler(valid_set, replacement=True, num_samples=256)

#train_loader = DataLoader(train_set, batch_size=batch_size, sampler=train_sampler, worker_init_fn=np.random.seed(global_seed), num_workers=32)
#valid_loader = DataLoader(valid_set, batch_size=batch_size, sampler=valid_sampler, worker_init_fn=np.random.seed(global_seed), num_workers=32)

## Saving and loading models
Let's add some functions for loading an saving the state of the model and training using a checkpoint.

In [6]:
def save_checkpoint(epoch, model, optimizer, path):
    print(f"saving checkpoint to {path}")
    state = { 'epoch': epoch, 
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict() }
    torch.save(state, path)

def load_checkpoint(model, optimizer, path):
    print("loading checkpoint")
    state = torch.load(path)
    epoch = state["epoch"]
    model.load_state_dict(state["state_dict"])
    optimizer.load_state_dict(state["optimizer"])
    return epoch, model, optimizer, loss

## Set up the models that we are going to test
We are going to test using VGG16, Resnet-18, InceptionV3.

In [7]:
# preamble
from torchsummary import summary

# get and example image from the dataset so we can generate the summaries
img, _ = train_set[0]
img.to(device)

tensor([[[ 0.2796,  0.0056, -0.3369,  ..., -0.5082, -0.2513, -0.2171],
         [ 0.5364,  0.3138,  0.0227,  ..., -0.3027, -0.0629,  0.0398],
         [ 0.9474,  0.7248,  0.6049,  ...,  0.7248,  1.0159,  1.2043],
         ...,
         [ 2.2489,  2.2489,  2.2489,  ...,  1.1700, -0.0287, -0.8507],
         [ 2.0948,  2.1804,  2.2147,  ...,  1.3413,  0.2453, -0.6281],
         [ 1.9235,  2.0092,  2.1462,  ...,  1.6667,  0.6392, -0.2342]],

        [[-0.3375, -0.6176, -1.0203,  ..., -0.9678, -0.7052, -0.7052],
         [-0.0399, -0.3025, -0.6001,  ..., -0.6702, -0.4426, -0.3901],
         [ 0.4328,  0.2052,  0.0301,  ...,  0.4853,  0.7654,  0.9580],
         ...,
         [ 2.2010,  2.2360,  2.1485,  ...,  0.8004, -0.4426, -1.3004],
         [ 2.3060,  2.3585,  2.3585,  ...,  0.9930, -0.1275, -1.0378],
         [ 2.2010,  2.2710,  2.3585,  ...,  1.2731,  0.2402, -0.7052]],

        [[ 0.6705,  0.3916,  0.0605,  ...,  0.0779,  0.4439,  0.4614],
         [ 0.9145,  0.6705,  0.4265,  ...,  0

In [8]:
# VGG16 load the model
model_vgg16 = torchvision.models.vgg16(pretrained=False)
model_vgg16.classifier[6] = nn.Sequential(nn.Linear(in_features=4096, out_features=1, bias=True))

# get a summary of the model based on our input shape
model_vgg16.to(device)
summary(model_vgg16, img.shape, batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [128, 64, 256, 256]           1,792
              ReLU-2        [128, 64, 256, 256]               0
            Conv2d-3        [128, 64, 256, 256]          36,928
              ReLU-4        [128, 64, 256, 256]               0
         MaxPool2d-5        [128, 64, 128, 128]               0
            Conv2d-6       [128, 128, 128, 128]          73,856
              ReLU-7       [128, 128, 128, 128]               0
            Conv2d-8       [128, 128, 128, 128]         147,584
              ReLU-9       [128, 128, 128, 128]               0
        MaxPool2d-10         [128, 128, 64, 64]               0
           Conv2d-11         [128, 256, 64, 64]         295,168
             ReLU-12         [128, 256, 64, 64]               0
           Conv2d-13         [128, 256, 64, 64]         590,080
             ReLU-14         [128, 256,

In [9]:
# load in the pretrained resnet18 model and change the number of outputs on the final fc layer
model_resnet18 = torchvision.models.resnet18(pretrained=False)
model_resnet18.fc = nn.Linear(model_resnet18.fc.in_features, 1)

# get a summary of the model based on our input shape
model_resnet18.to(device)
print(model_resnet18)
summary(model_resnet18, img.shape, batch_size)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
# load in the pretrained densenet model and change the number of outputs on the final fc layer
model_googlenet = torchvision.models.googlenet(pretrained=True)
model_googlenet.fc = nn.Linear(model_googlenet.fc.in_features, 1)

# get a summary of the model based on our input shape
model_googlenet.to(device)
summary(model_googlenet, img.shape, batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [128, 64, 128, 128]           9,408
       BatchNorm2d-2        [128, 64, 128, 128]             128
       BasicConv2d-3        [128, 64, 128, 128]               0
         MaxPool2d-4          [128, 64, 64, 64]               0
            Conv2d-5          [128, 64, 64, 64]           4,096
       BatchNorm2d-6          [128, 64, 64, 64]             128
       BasicConv2d-7          [128, 64, 64, 64]               0
            Conv2d-8         [128, 192, 64, 64]         110,592
       BatchNorm2d-9         [128, 192, 64, 64]             384
      BasicConv2d-10         [128, 192, 64, 64]               0
        MaxPool2d-11         [128, 192, 32, 32]               0
           Conv2d-12          [128, 64, 32, 32]          12,288
      BatchNorm2d-13          [128, 64, 32, 32]             128
      BasicConv2d-14          [128, 64,

## Training
Now that we have the data loader created and the model defined, let's define our loss and optimiser and run the training loop.

In [11]:
from statistics import mean

class LoggedVariable:
    def __init__(self):
        self.batch_values = []
        self.epoch_values = []
        
    def append(self, value):
        self.batch_values.append(value)
        
    def end_epoch(self):
        mean_batch_values = mean(self.batch_values)
        self.epoch_values.append(mean_batch_values)
        self.batch_values = []
        
class Logger:
    def __init__(self):
        self.variables = {}
    
    def __call__(self, key, value):
        if key not in self.variables:
            self.variables[key] = LoggedVariable()
        self.variables[key].append(value)
    
    def end_epoch(self, epoch):
        print(f"end epoch {epoch}", end = '')
        for key, val in self.variables.items():
            val.end_epoch()
            print(f" {key}: {val.epoch_values[epoch]:.2f}", end = '')
        print()
    
    def history(self):
        return { k:v.epoch_values for k, v in self.variables.items() }

In [12]:
def accuracy(scores, targets):
    _, predictions = scores.max(1)
    num_correct = (predictions == targets).sum()
    num_samples = predictions.size(0)
    return num_correct / num_samples

def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    return acc

In [13]:
from time import time

def fit(model, optimizer_class, criterion, train_loader, valid_loader, device, epochs=20, learning_rate=0.001):
    print(f'fitting model: {type(model).__name__} for {epochs} epochs.')
    
    # set up the optimizer
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    # initalise stats
    log = Logger()
    start_time_sec = time()
    
    for epoch in range(epochs):
        
        # train and evaluate on the training set
        model.train()
        for batch_idx, (data, targets) in enumerate(train_loader):
            # put X and y for the batch on the GPU is possible
            data = data.to(device=device)
            targets = targets.to(device=device)
            
            # forward pass
            scores = model(data)
            loss = criterion(scores, targets.unsqueeze(1).float())
            
            # backwards pass
            optimizer.zero_grad()
            loss.backward()
            
            # gradient descent
            optimizer.step()
            
            # log the metrics
            acc = binary_acc(scores, targets.unsqueeze(1))
            log('train_acc', acc.item())
            log('train_loss', loss.item())
            
            print('\r', f'train.\t\tepoch: {epoch}\tbatch: {batch_idx + 1}/{len(train_loader)}\tloss: {loss:.3f}\taccuracy: {acc} ', sep='', end='', flush=True)
        
        print()
        
        # evaluate on the validation set
        model.eval()
        for batch_idx, (data, targets) in enumerate(valid_loader):
            # put X and y for the batch on the device
            data = data.to(device=device)
            targets = targets.to(device=device)
            
            # compute the predictions
            scores = model(data)
            
            # computer the metric and log them
            loss = criterion(scores, targets.unsqueeze(1).float())
            acc = binary_acc(scores, targets.unsqueeze(1))
            log('valid_acc', acc.item())
            log('valid_loss', loss.item())
        
            print('\r', f'validate.\tepoch: {epoch}\tbatch: {batch_idx + 1}/{len(valid_loader)}\tloss: {loss:.3f}\taccuracy: {acc} ', sep='', end='', flush=True)        
        
        print()
        
        save_checkpoint(epoch, model, optimizer, experiment_root / f"{type(model).__name__}_checkpoint_{epoch}.ckpt") 
        log.end_epoch(epoch)
        
        print()
    
    end_time_sec       = time()
    total_time_sec     = end_time_sec - start_time_sec
    time_per_epoch_sec = total_time_sec / epochs
    print("training complete.")
    print('Time total:     %5.2f sec' % (total_time_sec))
    print('Time per epoch: %5.2f sec' % (time_per_epoch_sec))
    print()
    
    return log.history()
    

In [14]:
# criterion = nn.BCEWithLogitsLoss()  # combines a Sigmoid layer and the BCELoss in one single class
# history = fit(model_vgg16, optim.Adam, criterion, train_loader, valid_loader, device, epochs=num_epochs, learning_rate=learning_rate)
# history

In [15]:
import json

criterion = nn.BCEWithLogitsLoss()
# models = [model_vgg16, model_resnet18, model_googlenet]
models = [model_resnet18]
for model in models:
    # fit the model
    history = fit(model, optim.Adam, criterion, train_loader, valid_loader, device, epochs=num_epochs)
    
    # save the results
    json.dump(history, open(experiment_root / f"results_{type(model).__name__}.json", "w"))

fitting model: ResNet for 30 epochs.
train.		epoch: 0	batch: 547/547	loss: 0.408	accuracy: 82.0 
validate.	epoch: 0	batch: 235/235	loss: 0.344	accuracy: 85.0 
saving checkpoint to /home/ubuntu/pathgen/experiments/all/ResNet_checkpoint_0.ckpt
end epoch 0 train_acc: 83.09 train_loss: 0.39 valid_acc: 75.20 valid_loss: 0.55

train.		epoch: 1	batch: 547/547	loss: 0.221	accuracy: 91.0 
validate.	epoch: 1	batch: 235/235	loss: 0.269	accuracy: 92.0 
saving checkpoint to /home/ubuntu/pathgen/experiments/all/ResNet_checkpoint_1.ckpt
end epoch 1 train_acc: 86.31 train_loss: 0.33 valid_acc: 86.26 valid_loss: 0.34

train.		epoch: 2	batch: 547/547	loss: 0.293	accuracy: 89.0 
validate.	epoch: 2	batch: 235/235	loss: 0.468	accuracy: 85.0 
saving checkpoint to /home/ubuntu/pathgen/experiments/all/ResNet_checkpoint_2.ckpt
end epoch 2 train_acc: 87.86 train_loss: 0.29 valid_acc: 87.66 valid_loss: 0.31

train.		epoch: 3	batch: 547/547	loss: 0.298	accuracy: 87.0 
validate.	epoch: 3	batch: 235/235	loss: 0.633

Now let's visualise the training process and see how things went.

Let's test the model on the test set to see how it performs.