In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data_dir = '/content/drive/My Drive/DATA_DIR/'
save_folder = '/content/drive/My Drive/Full_Data/Ensemble_ensemblers/'

vgg = '/content/drive/My Drive/Eval on Test Data Results/VGG/'
squeezenet = '/content/drive/My Drive/Eval on Test Data Results/SqueezeNet/'
resnet101 = '/content/drive/My Drive/Eval on Test Data Results/ResNet101/'
resnet50 = '/content/drive/My Drive/Eval on Test Data Results/ResNet50/'
inception = '/content/drive/My Drive/Eval on Test Data Results/Inception/'
densenet = '/content/drive/My Drive/Eval on Test Data Results/DenseNet/'

ensemble_1 = '/content/drive/My Drive/Full_Data/Ensemble_ensemblers/classification_ensemble_1.csv'
ensemble_2 = '/content/drive/My Drive/Full_Data/Ensemble_ensemblers/classification_ensemble_2.csv'
ensemble_3 = '/content/drive/My Drive/Full_Data/Ensemble_ensemblers/classification_ensemble_3.csv'

vgg_auc = 0.933
squeezenet_auc = 0.895
resnet101_auc = 0.948
resnet50_auc = 0.935
inception_auc = 0.939
densenet_auc = 0.940

save = False

## Initialise

In [None]:
import sys
sys.path.append('/content/drive/My Drive/')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import os
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image, ImageDraw
from sklearn.metrics import roc_auc_score, roc_curve, auc
from tqdm import tqdm
import math

from __future__ import print_function
from __future__ import division

import time
import copy

import itertools  
import collections 

## Import Data

In [None]:
labels_tr = pd.read_csv(data_dir + 'train.csv', na_filter=False)
labels_dev = pd.read_csv(data_dir + 'dev.csv', na_filter=False)
labels_test = pd.read_csv(data_dir + 'test.csv', na_filter=False)

In [None]:
vgg_pred = pd.read_csv(vgg + 'test_classification.csv', na_filter=False)
squeezenet_pred = pd.read_csv(squeezenet + 'test_classification.csv', na_filter=False)
resnet101_pred = pd.read_csv(resnet101 + 'test_classification.csv', na_filter=False)
resnet50_pred = pd.read_csv(resnet50 + 'test_classification.csv', na_filter=False)
inception_pred = pd.read_csv(inception + 'test_classification.csv', na_filter=False)
densenet_pred = pd.read_csv(densenet + 'test_classification.csv', na_filter=False)

ensemble_1_pred = pd.read_csv(ensemble_1, na_filter=False)
ensemble_2_pred = pd.read_csv(ensemble_2, na_filter=False)
ensemble_3_pred = pd.read_csv(ensemble_3, na_filter=False)


In [None]:
vgg_dict = dict(zip(vgg_pred.image_name, vgg_pred.prediction))
squeezenet_dict = dict(zip(squeezenet_pred.image_name, squeezenet_pred.prediction))
resnet101_dict = dict(zip(resnet101_pred.image_name, resnet101_pred.prediction))
resnet50_dict =  dict(zip(resnet50_pred.image_name, resnet50_pred.prediction))
inception_dict = dict(zip(inception_pred.image_name, inception_pred.prediction))
densenet_dict = dict(zip(densenet_pred.image_name, densenet_pred.prediction))

ensemble_1_dict = dict(zip(ensemble_1_pred.image_name, ensemble_1_pred.prediction))
ensemble_2_dict = dict(zip(ensemble_2_pred.image_name, ensemble_2_pred.prediction))
ensemble_3_dict = dict(zip(ensemble_3_pred.image_name, ensemble_3_pred.prediction))

## Ensemble Models

### average

In [None]:
# using defaultdict 
Cdict = collections.defaultdict(float) 
numofdicts = 3

# iterating key, val with chain() 
for key, val in itertools.chain(vgg_dict.items(),
                                squeezenet_dict.items(),
                                resnet101_dict.items(), 
                                resnet50_dict.items(), 
                                inception_dict.items(), 
                                densenet_dict.items()): 

    Cdict[key] += val / numofdicts

preds_prob = list(Cdict.values())

### majority voting

In [None]:
# using defaultdict 
final_dict = collections.defaultdict(float) 
vote_dict = collections.defaultdict(float) 
novote_dict = collections.defaultdict(float) 
vote_prob_dict = collections.defaultdict(float) 
novote_prob_dict = collections.defaultdict(float)
counter_dict =  collections.defaultdict(float)


# iterating key, val with chain() 
for key, val in itertools.chain(vgg_dict.items(),
                                squeezenet_dict.items(),
                                resnet101_dict.items(), 
                                resnet50_dict.items(), 
                                inception_dict.items(), 
                                densenet_dict.items()):
    counter_dict[key] += 1
    if val > 0.5:
        vote_dict[key] +=1
        vote_prob_dict[key] += val
    else:
        novote_dict[key] +=1
        novote_prob_dict[key] += val
    if vote_dict[key] / counter_dict[key] > 0.5:
        final_dict[key] = vote_prob_dict[key] / vote_dict[key]
    else:
        final_dict[key] = novote_prob_dict[key] / novote_dict[key]
      
preds_prob = list(final_dict.values())

### weighted average

In [None]:
AUC = torch.tensor([vgg_auc, squeezenet_auc, resnet101_auc, resnet50_auc, inception_auc, densenet_auc])
outputs = torch.nn.Softmax(dim=0)(AUC * 150)
print(outputs)

In [None]:
# using defaultdict 
Cdict = collections.defaultdict(float) 

n = 0
# iterating key, val with chain() 
for key, val in itertools.chain(vgg_dict.items(),
                                squeezenet_dict.items(),
                                resnet101_dict.items(), 
                                resnet50_dict.items(), 
                                #resnet50baseline_dict.items(), 
                                inception_dict.items(), 
                                densenet_dict.items()): 
    if n < 1000:
        model = 0
    elif n < 2000:
        model = 1
    elif n < 3000:
        model = 2
    elif n < 4000:
       model = 3
    elif n < 5000:
        model = 4
    elif n < 6000:
        model = 5
    elif n < 7000:
        model = 6
    else:
        model = 7
    n += 1 


    Cdict[key] += val * outputs[model]

preds_prob_values = list(Cdict.values())

preds_prob = []

for i in range(len(preds_prob_values)):
    preds_prob.append(preds_prob_values[i].item())


In [None]:
n = 0
num_images = 10

for i in range(80):

    model = math.floor(n/(num_images))
    n +=1
    print(model)

### output ensemble 


In [None]:
if save:
  cls_res = pd.DataFrame({'image_name': labels_dev.image_name, 'prediction': preds_prob})
  cls_res.to_csv(save_folder + 'classification_ensemble_ensemblers.csv', columns=['image_name', 'prediction'], sep=',', index=None)
  print('classification.csv generated.')

## Calculate AUC

In [None]:
gt = labels_test.annotation.astype(bool).astype(float).values
pred = []
for i in range(len(preds_prob)):
  if preds_prob[i] >= 0.5:
      pred.append(1)
  else:
      pred.append(0)

indecies = []
for i in range(len(pred)):
  if gt[i] != pred[i]:
    indecies.append(i)

print(len(indecies))

In [None]:
fpr, tpr, _ = roc_curve(gt, preds_prob)
roc_auc = auc(fpr, tpr)

In [None]:
fig, ax = plt.subplots(
    subplot_kw=dict(xlim=[0, 1], ylim=[0, 1], aspect='equal'),
    figsize=(6, 6)
)
ax.plot(fpr, tpr, label=f'AUC: {roc_auc:.03}')
_ = ax.legend(loc="lower right")
_ = ax.set_title('ROC curve')
ax.grid(linestyle='dashed')