# Model Ensembling

## Contents:<a class="anchor" id="contents"></a>
* [Setup and Data preparation](#setup)
* [Loading Models to Ensemble](#loading)
* [Building Ensembles](#buildingensembles)
* [Evaluating Ensembles](#evaluatingensembles)

## Setup and Data preparation <a class="anchor" id="setup"></a>
----------------------------------

In [1]:
%load_ext jupyternotify
import time
import tensorflow as tf
import keras.backend as K
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve
from keras import models, layers, optimizers, regularizers
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.utils import to_categorical
from keras import Input
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import keras
import os, shutil
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping

from keras_gcnn.layers import GConv2D, GBatchNorm
from keras_gcnn.layers.pooling import GroupPool

#Loading my custom functions from a separate Jupyter notebook
from ipynb.fs.full.my_functions import build_and_compile_model, build_and_compile_model_GCNN, fit_model_to_generator, plot_auc, area_under_ROC_curve
from ipynb.fs.full.my_functions import plot_results,plot_graphs, plot_smooth, plot_smooth_graphs, fit_model_to_directory_generator, evaluate_auc
from ipynb.fs.full.my_functions import build_and_compile_dense_model, crop, memory_required, count_conv_layers
from ipynb.fs.full.my_functions import model_ensemble_evaluation, acc_comparison, save_history, load_history

<IPython.core.display.Javascript object>

Using TensorFlow backend.


In [2]:
from keras.applications.nasnet import NASNetMobile
from keras.applications.xception import Xception
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, Input, Concatenate, GlobalMaxPooling2D
from keras.optimizers import Adam

### Data preparation and generator setup

In [3]:
base_dir = "C:/GitRepos/FINAL PROJECT DATA/Histopathologic Cancer Detection/WholePCamSetFromGithub/converted_images/"
tr_dir = os.path.join(base_dir, "train")
va_dir = os.path.join(base_dir, "valid")
te_dir = os.path.join(base_dir, "test")

train_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 64 
targ_size = (96,96)
classification = "binary"


validation_generator = train_datagen.flow_from_directory(
    va_dir,
    target_size=targ_size,
    batch_size=batch_size,
    class_mode=classification)

test_generator = train_datagen.flow_from_directory(
    te_dir,
    target_size=targ_size,
    batch_size=batch_size,
    class_mode=classification)

Found 32768 images belonging to 2 classes.
Found 32768 images belonging to 2 classes.


## Loading Models To Ensemble <a class="anchor" id="loadingGCNN"></a>
----------------------------------

#### Highest performing P4M Alternate blocks model

In [3]:
initial_filters = 12
weight_decay = 1e-4
P4M_Altblocks6 = build_and_compile_dense_model(initial_filters,growth_rate=8,dense_blocks=6,conv_layers=1,
                                           is_gconv=True,gconv_type="D4",padding="same",dropout=0.2,img_size=96,
                                           opt="DenseSGD",weight_decay=weight_decay,output="sigmoid",labels=1,bc_model=False)
P4M_Altblocks6.load_weights("../model_saves/PCAM/PCAM_Dense/P4M_Altblocks4BestWeights229epochs.hdf5")

#### Highest performing P4M-BC model

In [4]:
initial_filters = 3
weight_decay = 1e-4
P4M_BC3 = build_and_compile_dense_model(initial_filters,growth_rate=3,dense_blocks=6,conv_layers=3,
                                           is_gconv=True,gconv_type="D4",padding="same",dropout=0.2,img_size=96,
                                           opt="DenseSGD",weight_decay=weight_decay,output="sigmoid",labels=1,bc_model=True)
P4M_BC3.load_weights("../model_saves/PCAM/PCam_Dense/P4M_BC3BestWeights224epochs.hdf5")

#### Best CNN Model

In [5]:
initial_filters = 24
weight_decay = 1e-4 
CNN_Best = build_and_compile_dense_model(initial_filters,growth_rate=24,dense_blocks=6,conv_layers=3,
                                           is_gconv=False,gconv_type="D4",padding="same",dropout=0.2,img_size=96,
                                           opt="DenseSGD",weight_decay=weight_decay,output="sigmoid",labels=1,bc_model=True)
CNN_Best.load_weights("../model_saves/PCAM/PCam_Dense/CNN_BestBestWeights141epochs.hdf5")

#### Xception/NASNet model sourced from Kaggle, taken for experimentation purposes. 
https://www.kaggle.com/greg115/histopathologic-cancer-detector-lb-0-958

In [6]:
# This model taken from https://www.kaggle.com/greg115/histopathologic-cancer-detector-lb-0-958

input_shape = (96, 96, 3)
inputs = Input(input_shape)

xception = Xception(include_top=False, input_shape=input_shape)  
nas_net = NASNetMobile(include_top=False, input_shape=input_shape)

outputs = Concatenate(axis=-1)([GlobalAveragePooling2D()(xception(inputs)),
                                GlobalAveragePooling2D()(nas_net(inputs))])
outputs = Dropout(0.5)(outputs)
outputs = Dense(1, activation='sigmoid')(outputs)

#Pretrained but also trained on PYCAM afterwards for about 12 epochs by me.
preTrainedModel = Model(inputs, outputs)
preTrainedModel.compile(optimizer=Adam(lr=0.0001, decay=0.00001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

preTrainedModel.load_weights("../model_saves/PCAM/pretrainedModelFinal.h5")

## Building Ensembles <a class="anchor" id="buildingensembles"></a>
----------------------------------

In [17]:
ens = [P4M_BC3,P4M_Altblocks6]
ens2 = [P4M_BC3,CNN_Best]
ens3 = [P4M_BC3,preTrainedModel]

ens4 = [P4M_BC3, P4M_Altblocks6, CNN_Best]
ens5 = [P4M_BC3, P4M_Altblocks6, preTrainedModel]
ens6 = [P4M_BC3, CNN_Best, preTrainedModel]

ens7 = [P4M_BC3,P4M_Altblocks6, CNN_Best, preTrainedModel]

## Evaluating Ensembles <a class="anchor" id="evaluatingensembles"></a>
----------------------------------

#### Ensemble One

In [8]:
print("Ensemble scores:",model_ensemble_evaluation(ens,test_generator, weighted=True))

val scores:  [0.9120145631067961, 0.9044296116504854]
weights [0.5020878570235511, 0.497912142976449]
Ensemble scores: ([0.9106924019607843, 0.904296875, 0.9153645833333334], [0.9705873727207059, 0.9653383141383142, 0.9727790031790032])


#### Ensemble Two

In [9]:
print("Ensemble scores:",model_ensemble_evaluation(ens2,test_generator, weighted=True))

val scores:  [0.9118628640776699, 0.9001820388349514]
weights [0.5032231059020511, 0.4967768940979489]
Ensemble scores: ([0.9109987745098039, 0.899203431372549, 0.9210707720588235], [0.9706987857871879, 0.9587126783447024, 0.9750973545269976])


#### Ensemble Three

In [18]:
print("Ensemble scores:",model_ensemble_evaluation(ens3,test_generator, weighted=True))

val scores:  [0.9050364077669902, 0.8073422330097088]
weights [0.5285258681785967, 0.4714741318214033]
Ensemble scores: ([0.9121859681372549, 0.8044577205882353, 0.9088158700980392], [0.9710595847516397, 0.9415384711596657, 0.9711987578180499])


#### Ensemble Four

In [9]:
print("Ensemble scores:",model_ensemble_evaluation(ens4,test_generator, weighted=True))

val scores:  [0.8986650485436893, 0.9033677184466019, 0.9165655339805825]
weights [0.33056191060766693, 0.33229172479214325, 0.33714636460018965]
Ensemble scores: ([0.8993566176470589, 0.9044500612745098, 0.9094669117647058, 0.9239047181372549], [0.9580732113398553, 0.9656385737886108, 0.9704416093893364, 0.9761926718261242])


#### Ensemble Five

In [19]:
print("Ensemble scores:",model_ensemble_evaluation(ens5,test_generator, weighted=True))

val scores:  [0.917627427184466, 0.908373786407767, 0.7996055825242718]
weights [0.3494915645944072, 0.34596718280563904, 0.3045412525999538]
Ensemble scores: ([0.9094669117647058, 0.9034926470588235, 0.8060278799019608, 0.9139476102941176], [0.9705011203622216, 0.9648682663017941, 0.9410835249765556, 0.9733200040183655])


#### Ensemble Six

In [20]:
print("Ensemble scores:",model_ensemble_evaluation(ens6,test_generator, weighted=True))

val scores:  [0.9059466019417476, 0.8956310679611651, 0.8056735436893204]
weights [0.34747192645604236, 0.34351544772211556, 0.3090126258218421]
Ensemble scores: ([0.9121859681372549, 0.9008501838235294, 0.8053002450980392, 0.9158624387254902], [0.971287811950585, 0.9591961299427535, 0.9420490075191347, 0.9769506100231218])


#### Ensemble Seven

In [21]:
print("Ensemble scores:",model_ensemble_evaluation(ens7,test_generator, weighted=True))

val scores:  [0.9121662621359223, 0.9042779126213593, 0.8983616504854369, 0.8029429611650486]
weights [0.2593039803355039, 0.2570615377980939, 0.2553797058950364, 0.22825477597136573]
Ensemble scores: ([0.9103860294117647, 0.9045649509803921, 0.8992800245098039, 0.8059895833333334, 0.9200367647058824], [0.970480145082907, 0.9654041323730757, 0.9584806363504418, 0.9414159798170825, 0.9771637213885354])
