# 1.Import required libraries

In [None]:
# Importing required libraries

!pip install pycm livelossplot
%pylab inline
from sklearn.metrics          import accuracy_score
from sklearn.preprocessing    import StandardScaler
from sklearn.model_selection  import StratifiedShuffleSplit
from livelossplot             import PlotLosses
from pycm                     import *
from torch.utils.data         import Dataset
from torchvision.transforms   import Compose, ToTensor, Normalize, RandomRotation, ToPILImage, RandomHorizontalFlip, RandomVerticalFlip, RandomAffine, ColorJitter, Lambda, Resize
from torch.utils.data         import TensorDataset, DataLoader, random_split
from torchvision.datasets     import ImageFolder

import os
import torch
import torch.nn               as nn
import torch.nn.functional    as F
import torchvision.transforms as transforms
import torchvision.models     as models
import matplotlib.pyplot      as plt
import numpy                  as np
import pandas as pd


plt.style.use('seaborn-deep')

def set_seed(seed):
    """
    Use this to set ALL the random seeds to a fixed value and take out any randomness from cuda kernels
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.benchmark = True  # uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms. -
    torch.backends.cudnn.enabled   = True

    return True


device = 'cpu'

if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")


# 2. Pre-processing:

In [None]:
cwd = os.getcwd()
print(cwd)
path = '../input/acseminiproject/'
os.listdir(path)

In [None]:
## transformations & normalisation ##

# training set transformation:
raw_transform_train = Compose([
    Resize(224),                                                                                                                                                                                                                                                                                          
    ToTensor(), 
    Normalize(mean=[0.4802, 0.4481, 0.3975],
                         std=[0.277, 0.2691, 0.2821]),
])

# validation set transformation:
raw_transform_valid = Compose([
    Resize(224),                                                                                                                                                                                                                                                                                            
    ToTensor(), 
    Normalize(mean=[0.4802, 0.4481, 0.3975],
                         std=[0.277, 0.2691, 0.2821]),
])


# download the raw data, apply transformations and normalisation:
train_data_raw = ImageFolder(path+'train', transform=raw_transform_train)
test_data_raw = ImageFolder(path+'test', transform=raw_transform_valid)

In [None]:
## optimal hyperparameters ## 

seed = 42
lr = 5e-4 # learning rate
momentum = 0.6 # optimisation momentum parameter
batch_size = 64 # training batch size
test_batch_size = 500 # test batch size 
n_epochs = 7 # number of epochs to train over
weight_decay = 1e-5 # L2 weight-decay parameter
set_seed(42)

In [None]:
def generate_train_val_loaders(validation=True, split=0.8):
    
    """
    Explanation:
        This function instantiates data loaders
    Args:
        validation (bool): if True then will split data
        split (float): gives training/validation split, 0.8 corresponds to 80/20
    Returns:
        train_loader: DataLoader for training data
        validation_loader: DataLoader for validation data
    """
    
    if validation == True:
        """splitting into validation and training dataseta"""
        train_size = int(split * len(train_data_raw))
        validation_size = len(train_data_raw) - train_size

        # split up the data
        train_dataset, validation_dataset = random_split(train_data_raw, [train_size, validation_size])

        # instantiate DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)                                    
        validation_loader = DataLoader(validation_dataset, batch_size=test_batch_size, shuffle=False, num_workers=0)   
        return train_loader, validation_loader
    
    else:
        """using the whole dataset to train"""
        # final case, where we train the model using all of the available data:
        train_loader = DataLoader(train_data_raw, batch_size=batch_size, shuffle=True, num_workers=0)
#         print(len(train_loader.dataset)) # check length
        return train_loader, 0


train_loader, validation_loader = generate_train_val_loaders(validation=True, split=0.8)

# # check it's working:
# print(len(train_loader.dataset)) # check length
# print(len(validation_loader.dataset)) # check length

In [None]:
# mostly fixed CUDA out of memory problem
model = None
learn = None
import gc
gc.collect()

# 3. Define our CNN:

In [None]:
def choose_model(name, printmod=True):
    """
    Explanation:
        This function chooses the desired model, pre-trained, change output layer as we have 200 classes
    Args:
        name (string): name of model
        printmod (bool): will print model if True  
    Returns:
        model: the desired CNN, with output layer set to 200
    """
    if name == "resnet18":
        model = models.resnet18(pretrained=True).to(device)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 200)
        
    elif name == "resnext50_32x4d":
        model = models.resnext50_32x4d(pretrained=True).to(device)  
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 200)

    elif name == "wide_resnet101_2":
        model = models.wide_resnet101_2(pretrained=True).to(device)  
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 200)
        
    elif name == "vgg19_bn":
        vggmodel = models.vgg19_bn(pretrained=True).to(device) 
        num_ftrs =  vggmodel.classifier[6].in_features
        vggmodel.classifier[6] = nn.Linear(num_ftrs,200)

    else:
        print("please input a valid model name.")
        
    if printmod == True:
        print(model)
        
    return model.to(device)

model = choose_model("wide_resnet101_2", printmod=True)


# NOTE - if using model to predict test data (i.e. not training), then skip 4. & 5. below

# 4. Training, evaluation and validation functions:

In [None]:
## note - for resnet (224 shape). If you wish to re-use this code with a different model you may have to change dimensions within the code ##
def train(model, optimizer, criterion, data_loader):
    model.train()
    train_loss, train_accuracy = 0, 0
    for X, y in data_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad() 
        a2 = model(X.view(-1, 3, 224 , 224))                                                                                     
        loss = criterion(a2, y)
        loss.backward()
        train_loss += loss*X.size(0)
        y_pred = F.log_softmax(a2, dim=1).max(1)[1]
        train_accuracy += accuracy_score(y.cpu().numpy(), y_pred.detach().cpu().numpy())*X.size(0)                            
        optimizer.step()  
    return train_loss/len(data_loader.dataset), train_accuracy/len(data_loader.dataset)

def validate(model, criterion, data_loader):
    model.eval()
    validation_loss, validation_accuracy = 0., 0.
    for X, y in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 3, 224 , 224))                                                                                                                                                                
            loss = criterion(a2, y)
            validation_loss += loss*X.size(0)
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            validation_accuracy += accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())*X.size(0)                              
    return validation_loss/len(data_loader.dataset), validation_accuracy/len(data_loader.dataset)

optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

liveloss = PlotLosses()
for epoch in range(n_epochs):
    logs = {}
    train_loss, train_accuracy = train(model, optimizer, criterion, train_loader)
    logs['' + 'log loss'] = train_loss.item()
    logs['' + 'accuracy'] = train_accuracy.item()

    validation_loss, validation_accuracy = validate(model, criterion, validation_loader)
    logs['val_' + 'log loss'] = validation_loss.item()
    logs['val_' + 'accuracy'] = validation_accuracy.item()

    liveloss.update(logs)
    liveloss.draw()

# 5. Save our model:

In [None]:
model_save_name = 'RESNET_101_wide_le5_fulldata.pth'
path = F"./{model_save_name}"
torch.save(model.state_dict(), path)

# 6. Generate predicted labels for test set in .csv file

## This code is for loading Team Dropout model (in this case, we are showing wide_resnet101_2 and please make sure in section 4 to also choose wide_resnet101_2 model) Note that in GoogleDrive our best model is called "best_resnet101wide_21_02_2020.pth". Skip this part if user wish to use trained model from section 5
Note : Please set the path to the directory you have stored the model in


In [None]:
model.load_state_dict(torch.load(F"../wide_resnet101_2_fulltrain.pth"))

-----------------------------------------------------------------------------

In [None]:
def test(model, data_loader):
    """
    Explanation:
        This function makes predictions on the test data from a single model
    Args:
        model (Tensor): A pytorch neural net model
        data_loader (DataLoader): A pytorch  dataloader for the test data    
    Returns:
        y_preds (np.array): An array  containing the class label predictions based on the softmax classifier
        file_names: (np.array): An array containing the filenames; this is used to construct the kaggle submission
    """
    model.eval()
    y_preds, file_names = [], []
    for X, y,file_paths in data_loader:
        with torch.no_grad():
            # make sure we can utilize the GPU if available
            X, y = torch.from_numpy(np.array(X)).to(device), y.to(device)
            
            # forward pass through the model
            a2 = model(X)
            # make the predictions based off our model
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            
            # fix filenames
            img_names = [name.split('/')[-1] for name in file_paths]
            
            # set up the return arrays
            y_preds.append(y_pred.cpu().numpy())
            file_names.extend(img_names)
    return np.concatenate(y_preds, 0), np.array(file_names)


# this is needed for the test function below. It loads the test dataset correctly
class ImageFolderWithPaths(ImageFolder):
    """Custom dataset that also returns image file paths. Extends
    torchvision.datasets.ImageFolder
    Inspiration for this function from https://gist.github.com/andrewjong/6b02ff237533b3b2c554701fb53d5c4d
    """
    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
path = '../input/miniproject/'
os.listdir(path+'test')

# this transformation changes the PIL image to a tensor. This transform is only necessary for the test dataset as it uses ImageFolderWithPaths
test_transform = Compose([
    Resize(254),
    ToTensor(),
    Normalize(mean=[0.4802, 0.4481, 0.3975],
                         std=[0.277, 0.2691, 0.2821]),
])

# Again assuming that the folder path is correct relative to file position
os.listdir(path)
test_data_raw = ImageFolderWithPaths(path+'test', transform=test_transform)
test_loader = DataLoader(test_data_raw, batch_size=1, shuffle=False, num_workers=0)

In [None]:
# Make the predictions and construct the output dataframe
y_preds, filenames = test(model, test_loader)
# construct dataframe from the results
submission = pd.DataFrame({'Filename': filenames, 'Label': y_preds})
submission.head()
submission.to_csv('wide_resnet101_2.csv', index=False)