In [None]:
!pip install pycm livelossplot albumentations
%pylab inline

In [None]:
# Decide whether to use albumentations augmentations. Regular torchvision transforms will be used otherwise
use_albumentations = False

# Choose which model to transfer
models_available = ["Googlenet", "Resnet18", "Resnet50", "Inception"]
model_to_train = models_available[3]
inception_use_aux = True # If using inception, choose whether to use the auxiliary output for training

# Decide how many layers of a transferred model to freeze
layers_to_freeze = 5

# Input size for inception is different to other models
if (model_to_train == "Inception"):
    input_size = 299
else:
    input_size = 224

# Choose hyperparameters
lr = 1e-2
momentum = 0.4
batch_size = 64
test_batch_size = 100 # decrease test batch size to reduce RAM usage
n_epochs = 30
weight_decay = 1e-3

# Change this to wherever data is stored
train_directory = "./train/"
test_directory = "./test/"

# Choose a descriptive model save name
model_save_name = 'Inception_frozen_some_no_albumentations.pth'

In [None]:
from sklearn.metrics import accuracy_score # this allows us to evaluate our model at every iteration
from sklearn.metrics import f1_score # this allows us to evaluate our validation accuracy
from sklearn.model_selection import StratifiedShuffleSplit # this allows us to create a random validation split

# These imports help plot the convergence and create the confusion matrix
from livelossplot import PlotLosses
from pycm import *

# These imports help us create models and datasets
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

# This allows me to create my own custom dataset
from torch.utils.data import Dataset 
from torchvision.datasets.folder import *

# This allows me to import pretrained models for transfer learning
import torchvision.models as models

# This allows me to do a number of transforms for data augmentation later on
from torchvision.transforms import *
from torchvision.transforms.functional import normalize

# These imports help us write the submission file
import json, csv
import os
import os.path

# We could use albumentations to perform data augmentation
if (use_albumentations):
    from albumentations import Compose
    import albumentations.augmentations.transforms as transforms

# This helps us keep a copy of model state dicts
import copy

# To display random images
from random import randrange

# Enable hardware acceleration
device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda:3'
else:
    print("No GPU available!")

In [None]:
# The means and stds of our entire training set, calculated in another file:
means = [0.4802, 0.4481, 0.3975]
stds = [0.2302, 0.2265, 0.2262]

if (use_albumentations):
    def apply_normalization(X):
        for i in range(3): # Normalization needs to be channel-wise
            X[:,:,:,i] /= 255. # X is of the form [number of samples, height, width, channels]
            X[:,:,:,i] -= means[i]
            X[:,:,:,i] /= stds[i]
        return X
    
# If not using albumentations, we will use the normalize function of torchvision.transforms

In [None]:
if (use_albumentations):
    # Rewritten to help separate data from labels
    def my_make_dataset(directory, class_to_idx, extensions=None, is_valid_file=None):
        data = [] # create an empty list that will then hold our data
        targets = [] # create an empty list that will then hold our label
        directory = os.path.expanduser(directory) # the path to the data
        # these next lines are from the default make_dataset:
        both_none = extensions is None and is_valid_file is None
        both_something = extensions is not None and is_valid_file is not None
        if both_none or both_something:
            raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time")
        if extensions is not None:
            def is_valid_file(x):
                return has_file_allowed_extension(x, extensions)
        for target_class in sorted(class_to_idx.keys()):
            class_index = class_to_idx[target_class]
            target_dir = os.path.join(directory, target_class)
            if not os.path.isdir(target_dir):
                continue
            for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
                for fname in sorted(fnames):
                    path = os.path.join(root, fname)
                    if is_valid_file(path):
                        item = path, class_index # regular make_dataset would then simply return this
                        data.append(path) # instead, we add to our separate lists
                        targets.append(class_index)
        return data, targets # and return two items


    # A custom dataset class that will work with albumentations. Only minor changes from regular ImageFolder:
    class AlbumentationImageFolder(ImageFolder):
        def __init__(self, root, extensions=IMG_EXTENSIONS, transform=None,
                     target_transform=None, is_valid_file=None, augmentation=None):
            super(ImageFolder, self).__init__(root, default_loader, IMG_EXTENSIONS if is_valid_file is None else None,
                                              transform=transform,
                                              target_transform=target_transform,
                                              is_valid_file=is_valid_file)

            classes, class_to_idx = self._find_classes(self.root)
            data, targets = my_make_dataset(self.root, class_to_idx, extensions, is_valid_file)
            self.imgs = self.samples
            # store data and targets separately:
            self.data = [ToTensor()(self.loader(data[i])) for i in range(len(data))]
            self.targets = targets
            self.augmentation = augmentation # store augmentations to be applied

        def __getitem__(self, idx):
            path, target = self.samples[idx]
            sample = self.loader(path)
            if self.augmentation is not None:
                sample = self.augmentation(image=np.asarray(sample))['image'] # apply augmentation
                sample = np.transpose(sample, (2, 0, 1)).astype(np.float32) # get the right axis order
                sample = torch.tensor(sample, dtype=torch.float) # transform into tensor

            return sample, target
    
    # this will hold our training and validation datasets after separating them from my_data
    # it is functionally the same as AlbumentationImageFolder except it takes in data and targets as inputs rather than a path
    # This is so that we can pass it X_train/y_train and X_val/y_val after getting them with StratifiedShuffleSplit
    class CustomImageTensorDataset(Dataset):
        def __init__(self, data, targets, transform=None):
            """
            Args:
                data (Tensor): A tensor containing the data e.g. images
                targets (Tensor): A tensor containing all the labels
                transform (callable, optional): Optional transform to be applied
                    on a sample.
            """
            self.data = data
            self.targets = targets
            self.transform = transform

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            sample, label = self.data[idx], self.targets[idx]
            if self.transform:
                sample = ToPILImage()(sample).convert("RGB")
                sample = self.transform(image=np.array(sample, dtype = np.uint8))['image']
                sample = np.transpose(sample, (2, 0, 1)).astype(np.float32)
                sample = torch.tensor(sample, dtype=torch.float)

            return sample, label
    
# A custom dataset class that retains filenames for use in creating the csv file
class ImageFolderWithPaths(ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # We only need to override the __getitem__ method
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
if (use_albumentations):
    my_data = AlbumentationImageFolder(train_directory) # Load all the data
    
    # Plot some of it to observe it:
    fig, axarr = plt.subplots(5, 5, figsize=(8, 8))
    for ax, j in zip(axarr.flatten(), range(25)):
      i = randrange(100000)
      ax.imshow(my_data.data[i].permute(1, 2, 0))
      ax.set_title(my_data.targets[i])
      ax.get_xaxis().set_visible(False) # turn off the axes to improve visibility
      ax.get_yaxis().set_visible(False) # since the axes do not provide useful info
    plt.subplots_adjust(hspace=0.3) # increase spaces between the plots to improve visibility
    plt.show()
    
    # Create a split that maintains the same % of images per class in the training and validation data
    shuffler = StratifiedShuffleSplit(n_splits=1, test_size=0.05).split(my_data.data, my_data.targets)
    indices = [(train_idx, validation_idx) for train_idx, validation_idx in shuffler][0]
    
    # Transform list of tensors into higher dimensional tensor
    # This is to allow us to separate it by using indices[i] as an index
    my_data.data = torch.stack(my_data.data)

    # Separate training and validation data based on created indices
    X_train, y_train = my_data.data[indices[0]].float(), torch.from_numpy(np.array(my_data.targets)[indices[0]])
    X_val, y_val = my_data.data[indices[1]].float(),  torch.from_numpy(np.array(my_data.targets)[indices[1]])
    
    # Apply a number of transforms on the training data to augment our dataset
    data_train = CustomImageTensorDataset(X_train, y_train.long(), transform=Compose([
                        transforms.RandomContrast(),
                        transforms.HorizontalFlip(),
                        transforms.HueSaturationValue(),
                        transforms.Resize(input_size, input_size),
                        transforms.Normalize(means, stds)
                        ]))
    
    # For validation, only resize and normalize the data
    data_validate = CustomImageTensorDataset(X_val, y_val.long(), transform=Compose([transforms.Resize(input_size, input_size),
                        transforms.Normalize(means, stds)]))
else: # if not using albumentations
    # Perform different set of augmentations that we found to be optimal
    train_transform = Compose([
        RandomApply([RandomChoice([RandomCrop(size=[64, 64], padding=6), RandomAffine(0, translate=(0.1, 0.1))])]),
        RandomHorizontalFlip(),
        RandomRotation(25),
        Resize(input_size),
        ToTensor(),
        Normalize(mean=means, std=stds), 
    ])

    # Again only resize and normalize the data for validation and testing
    validation_test_transform = Compose([
        Resize(input_size),
        ToTensor(),
        Normalize(means, stds)
    ])
    
    
    if (model_to_train == "Inception"): # inception has built-in transforms
        my_data = ImageFolder(train_directory, transform = validation_test_transform) # so we do not apply ours
    else:
        my_data = ImageFolder(train_directory, transform = train_transform) # otherwise do apply our transforms

    # Use 95% of the data for training
    train_size = int(0.95 * len(my_data))
    validation_size = len(my_data) - train_size
    # this is okay to use in place of Stratified Shuffle Split as we do not have a heavy imbalance in the number
    # of items in each class
    train_dataset, validation_dataset = random_split(my_data, [train_size, validation_size]) 

    
# Create the loaders for all our tests
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
validation_loader = DataLoader(validation_dataset, batch_size=test_batch_size, shuffle=False, num_workers=0)

test_set = ImageFolderWithPaths(test_directory, transform = validation_test_transform)
test_loader = DataLoader(test_set)

In [None]:
# Trains our model on the training set
def train(model, optimizer, criterion, data_loader):
    model.train()
    train_loss, train_accuracy = 0, 0
    for X, y in data_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        if (inception_use_aux): # inception can use an auxiliary output to help learning
            a2, aux = model(X.view(-1, 3, input_size, input_size))
            loss1 = criterion(a2, y)
            loss2 = criterion(aux, y) # aux also tries to predict the answer
            loss = loss1 + 0.4 * loss2 # loss is a weighted sum
        else:
            a2 = model(X.view(-1, 3, input_size, input_size))
            loss = criterion(a2, y)
        loss.backward()
        train_loss += loss*X.size(0)
        y_pred = F.log_softmax(a2, dim=1).max(1)[1]
        train_accuracy += accuracy_score(y.cpu().numpy(), y_pred.detach().cpu().numpy())*X.size(0)
        optimizer.step()  
        
    return train_loss/len(data_loader.dataset), train_accuracy/len(data_loader.dataset)
  
# Checks our model against the validation set
def validate(model, criterion, data_loader):
    model.eval()
    validation_loss, validation_accuracy = 0., 0.
    for X, y in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 3, input_size, input_size))
            loss = criterion(a2, y)
            validation_loss += loss*X.size(0)
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            validation_accuracy += accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())*X.size(0)
            
    return validation_loss/len(data_loader.dataset), validation_accuracy/len(data_loader.dataset)
  
# Evaluates our model's % accuracy
def evaluate(model, data_loader):
    model.eval()
    ys, y_preds = [], []
    for X, y in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 3, input_size, input_size))
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            ys.append(y.cpu().numpy())
            y_preds.append(y_pred.cpu().numpy())
            
    return np.concatenate(y_preds, 0),  np.concatenate(ys, 0)

# Generates predictions indexed with the right file names
def predict(model, data_loader):
    model.eval()
    files, y_preds = [], [] # files will hold the test image names
    for X, y, z in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 3, input_size, input_size))
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            y_preds.append(y_pred.cpu().numpy())
            files.append(z)
            
    return np.concatenate(y_preds, 0), np.concatenate(files, 0)

# Generates predictions by averaging multiple models
def multimodel_predict(models, data_loader):
    for model in models:
        model.eval()
    files, y_preds = [], []
    for X, y, z in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            outputs = np.zeros(len(models))
            for i, model in enumerate(models):
                outputs[i] = model(X.view(-1, 3, input_size, input_size))
            a2 = np.mean(outputs)
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            y_preds.append(y_pred.cpu().numpy())
            files.append(z)
            
    return np.concatenate(y_preds, 0), np.concatenate(files, 0)

In [None]:
# First create a the requested model
if (model_to_train == "Googlenet"):
    model = models.googlenet(pretrained=True).to(device)
elif (model_to_train == "Resnet18"):
    model = models.resnet18(pretrained=True).to(device)
elif (model_to_train == "Resnet50"):
    model = models.resnet50(pretrained=True).to(device)
elif (model_to_train == "Inception" and not inception_use_aux):
    model = models.inception_v3(pretrained=True, aux_logits=False, transform_input=True).to(device)
elif (model_to_train == "Inception"):
    model = models.inception_v3(pretrained=True, transform_input=True).to(device)
    # In this case only we need to also change the final layer of the aux output:
    num_ftrs = model.AuxLogits.fc.in_features
    model.AuxLogits.fc = nn.Linear(num_ftrs, 200)

# Change the last layer to have the right number of classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 200)

In [None]:
# Initialize at 0, and goes up to "number of layers to freeze"
ct = 0

for child in model.children():
    ct += 1
    if ct <= layers_to_freeze:
        for param in child.parameters():
            param.requires_grad = False
    else:
        for param in child.parameters():
            param.requires_grad = True
            
print(ct) # Print how many layers in total there were, for info

# No matter how many layers we froze, we want to at least unfreeze the last layer
for param in model.fc.parameters():
    param.requires_grad = True

In [None]:
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

# Set these up to be updated while training, so that if we end up overfitting
# We still save the best version of the model
best_model_wts = copy.deepcopy(model.state_dict()) 
best_acc = 0.0

liveloss = PlotLosses()
for epoch in range(n_epochs):
    
    logs = {}
    train_loss, train_accuracy = train(model, optimizer, criterion, train_loader)

    logs['' + 'log loss'] = train_loss.item()
    logs['' + 'accuracy'] = train_accuracy.item()
    
    validation_loss, validation_accuracy = validate(model, criterion, validation_loader)
    logs['val_' + 'log loss'] = validation_loss.item()
    logs['val_' + 'accuracy'] = validation_accuracy.item()
    
    if validation_accuracy.item() > best_acc: # If we got a better validation accuracy, save it
        best_acc = validation_accuracy.item()
        best_model_wts = copy.deepcopy(model.state_dict())
    
    liveloss.update(logs)
    liveloss.draw()

In [None]:
path = F"./{model_save_name}" 
torch.save(best_model_wts, path) # save the version of the model with the best validation accuracy