In [None]:
# These imports help us create models and datasets
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

# This allows me to import pretrained models for transfer learning
import torchvision.models as models

# This allows me to do a number of transforms for data augmentation later on
from torchvision.transforms import *
from torchvision.transforms.functional import normalize

# This helps us write the output file
import csv

# Enable hardware acceleration
device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda:3'
else:
    print("No GPU available!")

In [None]:
# The means and stds of our entire training set, calculated in another file:
means = [0.4802, 0.4481, 0.3975]
stds = [0.2302, 0.2265, 0.2262]

# Transforms to apply on inception models without TTA (Test-Time Augmentation)
validation_test_transform_inception = Compose([
            Resize(299),
            ToTensor(),
            Normalize(means, stds)
        ])

# Transforms to apply on inception models with TTA
validation_test_transform_inception_tta = Compose([
        # First make the images larger
        Resize(299),
        # For each image, predict both the original and horizontally flipped version
        Lambda(lambda image: torch.stack([ToTensor()(RandomHorizontalFlip(p=1)(image)), 
                                          ToTensor()(image)])),
        # Normalize every image
        Lambda(lambda crops: torch.stack([normalize(crop, means, stds) for crop in crops])),
    ])

# Transforms to apply on non-inception models with no TTA
validation_test_transform = Compose([
            Resize(224),
            ToTensor(),
            Normalize(means, stds)
        ])

# Transforms to apply on non-inception models with TTA
# (not used here, but available as a functionality)
validation_test_transform_tta = Compose([
        # First make the images larger
        Resize(224),
        # For each image, predict both the original and horizontally flipped version
        Lambda(lambda image: torch.stack([ToTensor()(RandomHorizontalFlip(p=1)(image)), 
                                          ToTensor()(image)])),
        # Normalize every image
        Lambda(lambda crops: torch.stack([normalize(crop, means, stds) for crop in crops])),
    ])

In [None]:
class ImageFolderWithPaths(ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # We only need to override the __getitem__ method
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path
    
image_folder = "./test/"
    
# For the prediction with 2 models:
test_set = ImageFolderWithPaths(image_folder, transform = validation_test_transform)
test_loader = DataLoader(test_set)

test_set_inception = ImageFolderWithPaths(image_folder, transform = validation_test_transform_inception)
test_loader_inception = DataLoader(test_set_inception)

# For the prediction with just inception and TTA:
test_set_tta = ImageFolderWithPaths(image_folder, transform = validation_test_transform_inception_tta)
test_loader_tta = DataLoader(test_set_tta)

In [None]:
# Generate predictions while applying test-time augmentation
def predict_tta(model, data_loader):
    model.eval()
    files, y_preds = [], []
    for X, y, z in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            # There is a new dimension in each X - the number of transforms of each image
            batch_size, n_crops, c, h, w = X.size()
            X = X.view(-1, 3, 299, 299)
            a2 = model(X)
            a2 = a2.view(batch_size, n_crops, -1).mean(1) # Average out the predictions on every transform
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            y_preds.append(y_pred.cpu().numpy())
            files.append(z)
            
    return np.concatenate(y_preds, 0), np.concatenate(files, 0)

# Predict by averaging the predictions of many models
def multimodel_predict(model, inception_model, data_loader, inception_data_loader):
    model.eval()
    inception_model.eval()
    files, y_preds = [], []
    # This is necessary as model and inception model expect inputs from different loaders:
    for (X, y, z), (Xi, yi, zi) in zip(data_loader, inception_data_loader):
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            Xi, yi = Xi.to(device), yi.to(device)
            a0 = model(X.view(-1, 3, 224, 224)) # correct size for non-inception
            a1 = inception_model(Xi.view(-1, 3, 299, 299)) # correct size for inception
            a2 = (a0 + a1) / 2. # average out the predictions
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            y_preds.append(y_pred.cpu().numpy())
            files.append(z)
    
    return np.concatenate(y_preds, 0), np.concatenate(files, 0)

# Prediction using TTA

Run the following cells to predict with one inception model using Test-Time Augmentation

In [None]:
# The place where we stored the model we used with TTA:
model_path = "../Models/Inceptionv3_5_layers_frozen_horizontal_flip.pth"

# This model had aux outputs enable
model = models.inception_v3(pretrained=True, aux_logits=True, transform_input=True)
    
# Change number of classes in aux and regular outputs:
num_ftrs = model.AuxLogits.fc.in_features
model.AuxLogits.fc = nn.Linear(num_ftrs, 200)    

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 200)

# Load the weights of our trained model
model.load_state_dict(torch.load(model_path))

# Send the model to cuda if available
model = model.to(device)

In [None]:
my_predictions, my_files = predict_tta(model, test_loader_tta)

# Predict using Multi-model

Run the following cells to predict using the average of 2 models:

In [None]:
model1_path = "../Models/Models/inceptionv3_5_layers_frozen_full_training_set_default_augmentations.pth"
model2_path = "../Models/Resnet_no_layers_frozen_augmented_cropped_full_training_set.pth"

In [None]:
# This version of inception was trained without auxiliary outputs:
model1 = models.inception_v3(pretrained=True, aux_logits=False, transform_input=True).to(device)
    
# Only need to change number of classes in the last layer
num_ftrs = model1.fc.in_features
model1.fc = nn.Linear(num_ftrs, 200)

# Load our saved weights
model1.load_state_dict(torch.load(model1_path))

In [None]:
# This is a pre-trained version of resnet18
model2 = models.resnet18()

# Change the number of classes in the output
num_ftrs = model2.fc.in_features
model2.fc = nn.Linear(num_ftrs, 200)

# Load our learned weights
model2.load_state_dict(torch.load(model2_path))

In [None]:
my_predictions, my_files = multimodel_predict(model2, model1, test_loader, test_loader_inception)

# Create the submission file

Regardless of how the predictions were generated, this cell will save them in a file in the format: Filename,label

In [None]:
# .split('/') splits the path by folders, -1 chooses the file name
# we use .lower to have the format as jpeg instead of JPEG
my_files_clean = [my_files[i].split('/')[-1].lower() for i in range(len(my_files))]

# Open a file and write it in csv format
with open('submission_1.csv', 'w', encoding="ISO-8859-1", newline='') as myfile:
    wr = csv.writer(myfile)
    wr.writerow(("Filename", "Label"))
    wr.writerows(zip(my_files_clean, my_predictions))