In [None]:
from typing import Tuple
import random
import torch
import torch.nn.functional as F
import copy
import numpy as np
import torchvision
import math
import e2cnn.nn as enn
from e2cnn.nn import init
from e2cnn import gspaces
from PIL import Image
import os
import json
import torch
import pickle
from torch.utils.data import Dataset, DataLoader, Subset
import pandas as pd
import cv2
from torch.optim.lr_scheduler import ReduceLROnPlateau
import albumentations as A
import torchtoolbox.transform as transforms
from sklearn.metrics import accuracy_score, roc_auc_score
import sklearn
from tqdm.autonotebook import tqdm
import isic_train
device = 'cuda' if torch.cuda.is_available() else 'cpu'

def predict(model, batch_size):
    """function to predict test samples with test time augmentation """
    #loading the dataset
    test_set = isic_train.MelanomaDataset(df=test_df,
                       imfolder='test', 
                       train=False,
                       transforms=train_transform,
                       meta_features=meta_features)
    
    #initializing the dataloader
    test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False, num_workers=0)
    length_dataset = len(test_set)
    
    #number of test time augmentation
    number_of_tta = 5
    
    #setting model to evaluation mode
    model.eval()
    
    #initializing the variable for test predictions
    test_preds = torch.zeros(size = (length_dataset, 1), device="cpu", dtype=torch.float32)
    
    for _ in range(number_of_tta):
        with torch.no_grad():
            for k, (images) in tqdm(enumerate(test_loader), total=int(torch.ceil(torch.tensor(length_dataset / batch_size)).item())):

                images[0] = images[0].to(device)
                images[1] = images[1].to(device)

                out = model(images)
                pred = torch.sigmoid(out)
                
                #writing the predictions to the corresponding variable
                test_preds[test_loader.batch_size*k : k*test_loader.batch_size+ images[0].shape[0]] += pred.cpu()

    final_pred = test_preds/number_of_tta
    return final_pred



In [None]:
#loading the .csv files with meta_data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')


# data augmentation for test time augmentation
# make sure you use the same augmentations you used in training
test_transform = transforms.Compose([
    isic_train.AdvancedHairAugmentation(hairs_folder='mel_hairs'),
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(degrees=0, scale=(0.8,1.2), shear=(-20,20)),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    isic_train.Microscope(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])


# preprocessing of meta data, taken from https://www.kaggle.com/nroman/melanoma-pytorch-starter-efficientnet
# One-hot encoding of location of imaged site
concat = pd.concat([train_df['anatom_site_general_challenge'], test_df['anatom_site_general_challenge']], ignore_index=True)
dummies = pd.get_dummies(concat, dummy_na=True, dtype=np.uint8, prefix='site')
train_df = pd.concat([train_df, dummies.iloc[:train_df.shape[0]]], axis=1)
test_df = pd.concat([test_df, dummies.iloc[train_df.shape[0]:].reset_index(drop=True)], axis=1)

# encoding the sex of patients, -1 if it is missing
train_df['sex'] = train_df['sex'].map({'male': 1, 'female': 0})
test_df['sex'] = test_df['sex'].map({'male': 1, 'female': 0})
train_df['sex'] = train_df['sex'].fillna(-1)
test_df['sex'] = test_df['sex'].fillna(-1)

# normalizing the age of the patients between 0 and 1 to use it for classification as well, 0 if the age is missing
train_df['age_approx'] /= train_df['age_approx'].max()
test_df['age_approx'] /= test_df['age_approx'].max()
train_df['age_approx'] = train_df['age_approx'].fillna(0)
test_df['age_approx'] = test_df['age_approx'].fillna(0)

# filling missing values for patient_ids
train_df['patient_id'] = train_df['patient_id'].fillna(0)
meta_features = ['sex', 'age_approx'] + [col for col in train_df.columns if 'site_' in col]
meta_features.remove('anatom_site_general_challenge')


#initialize the model you want to make your submission with and load the weights and predict the test samples
#first fold
model = isic_train.DenseNet(32, [6,12,24,16], len(meta_features)).to(device)
weights = torch.load("best_model_fold_1")
model.load_state_dict(weights, strict=True)
prediction_fold_1 = predict(model, 256)

#second fold
model = isic_train.DenseNet(32, [6,12,24,16], len(meta_features)).to(device)
weights = torch.load("best_model_fold_2")
model.load_state_dict(weights, strict=True)
prediction_fold_2 = predict(model, 256)

#third fold
model = isic_train.DenseNet(32, [6,12,24,16], len(meta_features)).to(device)
weights = torch.load("best_model_fold_3")
model.load_state_dict(weights, strict=True)
prediction_fold_3 = predict(model, 256)

#fourth fold
model = isic_train.DenseNet(32, [6,12,24,16], len(meta_features)).to(device)
weights = torch.load("best_model_fold_4")
model.load_state_dict(weights, strict=True)
prediction_fold_4 = predict(model, 256)

#fifth fold
model = isic_train.DenseNet(32, [6,12,24,16], len(meta_features)).to(device)
weights = torch.load("best_model_fold_5")
model.load_state_dict(weights, strict=True)
prediction_fold_5 = predict(model, 256)

#summing up the predictions of each fold and averaging them
final_prediction = (prediction_fold_1 + prediction_fold_2 + prediction_fold_3 + prediction_fold_4 + prediction_fold_5)/5

#writing the final_predictions to the submission.csv file
sub = pd.read_csv('sample_submission.csv')
sub['target'] = final_prediction.cpu().numpy().reshape(-1,)
sub.to_csv('submission.csv', index=False)