This file is to be used in conjunction with getPredictions (in the misc_scripts folder)

After the weights of CNNs are collected and predictions evaluated into csv files with getPredictions, this notebook ensembles the csv predictions.

In [None]:
import keras
import numpy as np
import pandas as pd

from tqdm import tqdm_notebook

In [None]:
# define metric
from balancedAccuracy import balancedAccuracy
num_classes = 3
bacc_metric = balancedAccuracy(num_classes)

In [None]:
# plot_confusion_matrix function
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

class_names = ["MEL", "NV", "BKL"]

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(np.mean(np.diag(cm)))
        
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    # get balanced accuracy
    return np.mean(np.diag(cm))
    


## Load Data (for labels)

In [None]:
data = np.load(".../3HAMAUG.npz") # augmented HAM dataset with 3 classes numpy file

targetValList = data["targetValList"][:,:3]

testData = np.load(".../3TESTHAM.npz")
targetTestList = testData["targetTestList"]

## Ensemble Classifiers

In [None]:
def valMatrix(y_pred, save=False):
    y_test = targetValList.copy()
    y_test = y_test.argmax(1)
    
    if(len(y_pred.shape) != 1):
        y_pred = y_pred.argmax(1)
    

    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    plt.figure()
    print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized Validation confusion matrix')))
    plt.show()

In [None]:
def testMatrix(y_pred, save=False):
    y_test = targetTestList.copy()
    y_test = y_test.argmax(1)
    
    if(len(y_pred.shape) != 1):
        y_pred = y_pred.argmax(1)
    

    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    plt.figure()
    print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized Test Confusion Matrix')))
    plt.tight_layout()
    if(save): plt.savefig('testMatrix.png', dpi = 500)
    
    plt.show()

In [None]:
# TODO: If using this file, fill in your own csv predictions and path

PATH = ".../"

# using validation predicts
vgg_pred = pd.read_csv(PATH+"vgg16-validation.csv",
                      index_col=0)

# using test predicts
vgg_pred2 = pd.read_csv(PATH+"vgg16-test.csv",
                          index_col=0)

In [None]:
valMatrix(np.array(vgg_pred))
testMatrix(np.array(vgg_pred2))

In [None]:
avg_pred_val = []

for i in tqdm_notebook(vgg_pred.index):
    avg_predict = np.average([vgg_pred
                             """
                             other csv predictions here
                             
                             """ 
                              
                             ]
                             
                             axis=0)
    
    avg_pred_val.append(avg_predict)
    

avg_pred_test = []

for i in tqdm_notebook(vgg_pred2.index):
    avg_predict = np.average([#vgg_pred2.iloc[i],
                              """
                             other csv predictions here
                             
                             """ 
                              
                                ]
                             axis=0)
    
    avg_pred_test.append(avg_predict)
    

In [None]:
valMatrix(np.array(avg_pred_val))
testMatrix(np.array(avg_pred_test))

## RBF SVM Ensemble (train on validation)

In [None]:
svm_val_x = pd.concat(["""
                        csv predictions here in list format

                        """], axis=1)
svm_val_y = np.argmax(targetValList.copy(), axis=1)
print(svm_val_x.shape, svm_val_y.shape)

svm_test_x = pd.concat(["""
                        csv predictions here in list format

                        """], axis=1)
svm_test_y = np.argmax(targetTestList.copy(), axis=1)
print(svm_test_x.shape, svm_test_y.shape)

In [None]:
from sklearn.svm import SVC

svmEnsemble = SVC(C=.1, gamma="auto", kernel="rbf", class_weight="balanced", random_state=8888)
svmEnsemble = svmEnsemble.fit(svm_val_x, svm_val_y)

In [None]:
valMatrix(svmEnsemble.predict(svm_val_x))

In [None]:
testMatrix(svmEnsemble.predict(svm_test_x), save=True)