## Need to install the latest version of PyTorch and PIL for this.

Get from here: https://pytorch.org/get-started/locally/ and https://pypi.org/project/Pillow/2.2.1/

Below code is for windows only.

## GPU memory of at least 8 GB is needed to run this or the error 'CUDA out of memory' will show.

In [None]:
# Imports here
import torch
import numpy as np
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
%matplotlib inline
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from PIL import Image
from PIL import ImageFile

## Training Phase
## Load the training and validation data

### NOTE: Right now the validation directory has only 1 dummy image (refer to last part of the Report pdf document for reason). 

From report:
"Before the final submission, only 1 dummy image is placed in the validation directory (as this code requires validation directory with at least 1 image to run, feel free to randomly copy and paste any amount of images from the training set to this directory to use as validation images when running it). This is to ensure that the maximum number of images are used for training instead of validation as previous validation runs have already identified the best model."

Feel free to randomly copy and paste any amount of images from the training set to this directory to use as validation images when running it.

In [None]:
#train_dir = 'train'
train_dir = 'train_expanded_character'
test_dir = 'test'
valid_dir = 'valid'


In [None]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 50

# convert data to a normalized torch.FloatTensor for validation set
# validation set is preprocessed to be of appropriate shape to fit into the ResNext model
# validation set's preprocessing does not include transfomration for augmentation
valid_transforms = transforms.Compose([transforms.Resize(255),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                      [0.229, 0.224, 0.225])])


# training set is preprocessed like validation 
# but has the additional transformation for random brightness, for augmentation
augmentation_transforms = transforms.Compose([transforms.Resize(255),                                      
                                      transforms.CenterCrop(224),
                                      transforms.ColorJitter(brightness=.05, saturation=0, contrast=0),  #for AUGMENTATION                        
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                      [0.229, 0.224, 0.225])])


# load the training and test datasets with the appropriate preprocessing
train_data = datasets.ImageFolder(train_dir, transform=augmentation_transforms)
valid_data = datasets.ImageFolder(valid_dir, transform=valid_transforms)

# obtain indices for training and validation
num_train = len(train_data)
print("Total number of training images: " + str(num_train))
indices = list(range(num_train))
np.random.shuffle(indices)
train_size = int(np.floor(num_train))
train_idx = indices[:train_size]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(list(range(len(valid_data))))

# prepare data loaders to be used in training model later (prepare data in batches)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)

validloader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)

# Building and training the classifier


* Load a [pre-trained network](http://pytorch.org/docs/master/torchvision/models.html)
* Freeze some top layers while only training the bottom layers' weights
* Define a fully-connected layer below
* Train the classifier layers using backpropagation using the pre-trained network to get the features
* Track the loss and accuracy on the validation set to determine the best hyperparameters

### Note that validation directory has only 1 dummy image as mentioned earlier at the start of this notebook, under 'load the training and validation data' section. To properly look at validation accuracy, feel free to randomly copy and paste any amount of images from the training set to this directory to use as validation images when running it. 


In [None]:
# TODO: Build and train your network
model = models.resnext101_32x8d(pretrained=True)
model

In [None]:
layer_freeze_onwards = 18

# Freeze parameters so we don't backprop through them
for param in model.conv1.parameters():
    param.requires_grad = False
for param in model.bn1.parameters():
    param.requires_grad = False
for param in model.relu.parameters():
    param.requires_grad = False
for param in model.maxpool.parameters():
    param.requires_grad = False
for param in model.layer1.parameters():
    param.requires_grad = False
for param in model.layer2.parameters():
    param.requires_grad = False

# in layer 3, freeze all weights until the 18th sub-layer, refer to Appendix in the code for more details
# layers above this are all frozen and not trained
for param in model.layer3[0:layer_freeze_onwards].parameters():
    param.requires_grad = False

# Replace FC layers of pretrained model
model.fc = nn.Sequential(nn.Linear(2048, 42))

print(model)

### Displays the layers in layer 3 that are going to be trained
### Note that all layers in layer 4 are going to be trained

In [None]:
model.layer3[layer_freeze_onwards:]

In [None]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

### Specify Optimizers and the Respective Learning Rate, and Loss Criterion

In [None]:
# No need for logsoftmax on model output when this is used
criterion = nn.CrossEntropyLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer1 = optim.Adam(model.layer3[layer_freeze_onwards:].parameters(), lr=0.00005)
optimizer2 = optim.Adam(model.layer4.parameters(), lr=0.00005)
optimizer3 = optim.Adam(model.fc.parameters(), lr=0.00005)


# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

### Training of model weights, forward and back propagation, calulating train/validation loss and accuracy
### Do NOT worry about validation accuracy as there is only 1 image in validation folder as mentioned earlier, refer to last section of report for more details.
### First run 4 epochs at learning rate 0.00005

In [None]:
# number of epochs to train the model
n_epochs = 4

# track change in validation loss, start with maximum loss, i.e. infinity
valid_loss_min = np.Inf 

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    train_accuracy = 0.0
    valid_accuracy = 0.0

    ###################
    # train the model #
    ###################

    model.train()
    for batch_idx, (data, target) in enumerate(trainloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        optimizer3.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer1.step()
        optimizer2.step()
        optimizer3.step()
        # update training loss
        train_loss += loss.item()*data.size(0)        
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        train_accuracy += np.mean(correct)
     
          
    ######################    
    # validate the model #
    ######################
    model.eval()
    for batch_idx, (data, target) in enumerate(validloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        valid_accuracy += np.mean(correct)
        
        
    # calculate average losses
    train_loss = train_loss/len(trainloader.dataset)
    valid_loss = valid_loss/len(validloader.dataset)
    train_accuracy = train_accuracy/len(trainloader)
    valid_accuracy = valid_accuracy/len(validloader)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \t Training Acc: {:.6f} \tValidation Loss: {:.6f} \tValidation Acc: {:.6f}'.format(
        epoch, train_loss, train_accuracy, valid_loss, valid_accuracy))
    
    # check if validation loss has decreased
    # this requires 
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).'.format(
        valid_loss_min,
        valid_loss))
        valid_loss_min = valid_loss

### Change learning rate to 0.00001, run 4 more epochs, the full code for setting optimizer and training model is repeated in the next 2 blocks for ease of customization should the need arise.

In [None]:
# No need for logsoftmax on model output when this is used
criterion = nn.CrossEntropyLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer1 = optim.Adam(model.layer3[layer_freeze_onwards:].parameters(), lr=0.00001)
optimizer2 = optim.Adam(model.layer4.parameters(), lr=0.00001)
optimizer3 = optim.Adam(model.fc.parameters(), lr=0.00001)


# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

In [None]:
# number of epochs to train the model
n_epochs = 4

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    train_accuracy = 0.0
    valid_accuracy = 0.0

    ###################
    # train the model #
    ###################

    model.train()
    for batch_idx, (data, target) in enumerate(trainloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        optimizer3.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer1.step()
        optimizer2.step()
        optimizer3.step()
        # update training loss
        train_loss += loss.item()*data.size(0)        
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        train_accuracy += np.mean(correct)
     
          
    ######################    
    # validate the model #
    ######################
    model.eval()
    for batch_idx, (data, target) in enumerate(validloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        valid_accuracy += np.mean(correct)
        
        
    # calculate average losses
    train_loss = train_loss/len(trainloader.dataset)
    valid_loss = valid_loss/len(validloader.dataset)
    train_accuracy = train_accuracy/len(trainloader)
    valid_accuracy = valid_accuracy/len(validloader)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \t Training Acc: {:.6f} \tValidation Loss: {:.6f} \tValidation Acc: {:.6f}'.format(
        epoch, train_loss, train_accuracy, valid_loss, valid_accuracy))
    
    # check if validation loss has decreased
    # this requires 
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).'.format(
        valid_loss_min,
        valid_loss))
        valid_loss_min = valid_loss

### Change learning rate to 0.000001, run 10 more epochs

In [None]:
# No need for logsoftmax on model output when this is used
criterion = nn.CrossEntropyLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer1 = optim.Adam(model.layer3[layer_freeze_onwards:].parameters(), lr=0.000001)
optimizer2 = optim.Adam(model.layer4.parameters(), lr=0.000001)
optimizer3 = optim.Adam(model.fc.parameters(), lr=0.000001)


# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

In [None]:
# number of epochs to train the model
n_epochs = 10

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    train_accuracy = 0.0
    valid_accuracy = 0.0

    ###################
    # train the model #
    ###################

    model.train()
    for batch_idx, (data, target) in enumerate(trainloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        optimizer3.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer1.step()
        optimizer2.step()
        optimizer3.step()
        # update training loss
        train_loss += loss.item()*data.size(0)        
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        train_accuracy += np.mean(correct)
     
          
    ######################    
    # validate the model #
    ######################
    model.eval()
    for batch_idx, (data, target) in enumerate(validloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        top_p, top_class = output.topk(1, dim=1)
        correct_tensor = top_class.eq(target.data.view_as(top_class))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        valid_accuracy += np.mean(correct)
        
        
    # calculate average losses
    train_loss = train_loss/len(trainloader.dataset)
    valid_loss = valid_loss/len(validloader.dataset)
    train_accuracy = train_accuracy/len(trainloader)
    valid_accuracy = valid_accuracy/len(validloader)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \t Training Acc: {:.6f} \tValidation Loss: {:.6f} \tValidation Acc: {:.6f}'.format(
        epoch, train_loss, train_accuracy, valid_loss, valid_accuracy))
    
    # check if validation loss has decreased
    # this requires 
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).'.format(
        valid_loss_min,
        valid_loss))
        valid_loss_min = valid_loss

## Prediction Phase
### Load the test data

Similar to loading training and validation data, same preprocessing steps, however code is copied here to allow for any adjustments if needed (not encouraged).

In [None]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

from torchvision import datasets
import torchvision.transforms as transforms

# specify test directory
test_dir = 'test'
# how many samples per batch to load
batch_size = 64


# convert data to a normalized torch.FloatTensor
test_transforms = transforms.Compose([transforms.Resize(255),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                      [0.229, 0.224, 0.225])])

# choose the training and test datasets
test_data = ImageFolderWithPaths(test_dir,transform=test_transforms)



# prepare data loaders (combine dataset and sampler)
testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

### Run model in evaluation mode through data, predict the top 5 classes for data using the top 5 probabilities

In [None]:
    idx_to_class = {value: key for key, value in train_data.class_to_idx.items()}
    idx_to_class
    
    predicted = []
    data_to_predict =[]
  
    ######################    
    # validate the model #
    ######################
    # set model to evaluation mode
    model.eval()
    for batch_idx, (data, target, paths) in enumerate(testloader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        top_p, top_class = output.topk(5, dim=1)
        top_class_np = top_class.cpu().numpy()
        for each in top_class_np:
          for idx in each:
            predicted.append(idx_to_class[idx])
            
        for path in paths:
              data_to_predict.append(path)
        print('Done with batch ',batch_idx)
        
    predicted_array = np.array(predicted)
    print(predicted_array.reshape(-1,5))  
    print(data_to_predict)    



### Only the top class is needed as we just want the class with the highest probability, extract this top class and print it out

In [None]:
predicted_array_reshaped = predicted_array.reshape(-1,5)

predicted_list = []
dataname_list = []

for i, name in enumerate(predicted_array_reshaped):
  string = predicted_array_reshaped[i][0] #only extract the highest probability class
  predicted_list.append(string)
  
print(predicted_list)

for name in data_to_predict:
  dataname_list.append(name[11:])
  
print(dataname_list) 

### Create dataframe of prediction and output to csv

In [None]:
import pandas as pd

df = pd.DataFrame({'Filename': dataname_list, 'Prediction': predicted_list})
df['Prediction'].nunique()
#df.head()

In [None]:
df.to_csv('submission.csv', index = False)