# Import Libraries and set seeds

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pylab
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import json
import pickle
import random as python_random
import os


import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import ResNet50, InceptionResNetV2

from keras import models, layers
from keras.optimizers import RMSprop, SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint

from utils import *
from analysis_utils import *

np.random.seed(123)
python_random.seed(123)
tf.random.set_seed(1234)

# Load Dataset

In [None]:
df = pd.read_csv('./meta.csv')

train_df, test_df = train_test_split(df, test_size=0.2, random_state=0, stratify=df['gender'])
train_df, val_df = train_test_split(train_df, test_size=0.3, random_state=0, stratify=train_df['gender'])

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Functions

In [None]:
# Define EarlyStopping callback
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7, restore_best_weights=True)

# Train with base model set-up

In [None]:
# Base Model Parameters
IMG_SIZE_1 = 224
IMG_SIZE_2 = 299
BATCH_SIZE = 128
EPOCHS = 30
LEARNING_RATE = 1e-4
NUM_NEURONS = 1024
DROPOUT_RATE = 0.5
MOMENTUM = 0.9
OPTIMISER = 'SGD'

# Check for base model results folder (create if doesn't exist)
SAVE_PATH = './results/base_models'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

In [None]:
# Prepare data for base models
generators_224 = generate_data(train_df, val_df, test_df, IMG_SIZE_1, BATCH_SIZE)
generators_299 = generate_data(train_df, val_df, test_df, IMG_SIZE_2, BATCH_SIZE)

## ResNet50 Base Model

### Set Parameters for Base ResNet50

In [None]:
# Parameters for Base ResNet50
TITLE = 'resnet50_base'
START_LAYER = 143 # Layer to start unfreezing from

### Create Model for Base ResNet50

In [None]:
# Download ResNet50 weights
resnet50_conv = ResNet50(weights='imagenet', include_top=False, input_tensor=None, input_shape=(224,224,3))

# freeze all layers except stage 4
for layer in resnet50_conv.layers[:START_LAYER]:
  layer.trainable = False
for layer in resnet50_conv.layers[START_LAYER:]:
  layer.trainable = True

# Build base model
resnet50_base = models.Sequential()
resnet50_base.add(resnet50_conv)    
resnet50_base.add(layers.Flatten())   
resnet50_base.add(layers.Dense(NUM_NEURONS, activation='relu'))
resnet50_base.add(layers.Dropout(DROPOUT_RATE))
resnet50_base.add(layers.Dense(1, activation='sigmoid'))

# Compile base model
resnet50_base.compile(loss='binary_crossentropy', 
                      optimizer=SGD(lr=LEARNING_RATE, momentum=MOMENTUM),
                      metrics=['acc'])

### Train (Fit) Base ResNet50 & save information

In [None]:
# Train and save the history of training + params of model
resnet50_base_hist = fit(resnet50_base, EPOCHS, TITLE, generators_224, es, SAVE_PATH)

## Plot Histories

In [None]:
# Plot base model histories
# Check for base model results folder (create if doesn't exist)
SAVE_PATH = './results/base_models/histories_plot'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

base_model_plot(resnet50_base_hist, TITLE, SAVE_PATH)

## InceptionResNet Base Model

### Set Parameters for Base InceptionResNet

In [None]:
# Parameters for Base InceptionResNet
TITLE = 'inc_res_base'
START_LAYER = 759
LAST_LAYER = 780

### Create Model for Base InceptionResNet

In [None]:
# Download InceptionResNet weights
incResV2_conv = InceptionResNetV2(weights='imagenet', include_top=False, input_tensor=None, input_shape=(299,299,3))

# freeze all layers except last block
for layer in incResV2_conv.layers[:START_LAYER]:
  layer.trainable = False
for layer in incResV2_conv.layers[START_LAYER:]:
  layer.trainable = True

# Build base model
inc_res_base = models.Sequential()
inc_res_base.add(incResV2_conv)             
inc_res_base.add(layers.Flatten())          
inc_res_base.add(layers.Dense(NUM_NEURONS, activation='relu'))
inc_res_base.add(layers.Dropout(DROPOUT_RATE))
inc_res_base.add(layers.Dense(1, activation='sigmoid'))

# Compile base model
inc_res_base.compile(loss='binary_crossentropy', 
                     optimizer=SGD(lr=LEARNING_RATE, momentum=MOMENTUM),
                     metrics=['acc'])

### Train (Fit) Base InceptionResNet & save information

In [None]:
# Train and save the history of training + params of model
inc_res_base_hist = fit(inc_res_base, EPOCHS, TITLE, generators_299, es, SAVE_PATH)

In [None]:
# Plot base model histories
# Check for base model results folder (create if doesn't exist)
SAVE_PATH = './results/base_models/histories_plot'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

base_model_plot(inc_res_base_hist, TITLE, SAVE_PATH)

# InceptionResNet Tuning

## Batch Size
### Set Parameters for Batch Sizes

In [None]:
TITLE = 'inc_res_bs'
BATCH_SIZE = [64, 256]
EPOCHS = 30
LEARNING_RATE = 1e-4
NUM_NEURONS = 1024
DROPOUT_RATE = 0.5
MOMENTUM = 0.9
OPTIMISER = 'SGD'
START_LAYER = 759
LAST_LAYER = 780

# Check for batch size model results folder (create if doesn't exist)
SAVE_PATH = './results/bs_models'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

bs_generators = {}
# Prepare data for base model
for bs in BATCH_SIZE:
    bs_generators[bs] = generate_data(train_df, val_df, test_df, IMG_SIZE_2, bs)

### Create Model for Batch Sizes

In [None]:
bs_inc_res_models = []
for bs in BATCH_SIZE:
    # Download InceptionResNet weights
    incResV2_conv = InceptionResNetV2(weights='imagenet', include_top=False, 
                                      input_tensor=None, input_shape=(299,299,3))
    
    # freeze all layers except last block
    for layer in incResV2_conv.layers[:START_LAYER]:
        layer.trainable = False
    for layer in incResV2_conv.layers[START_LAYER:]:
        layer.trainable = True
    
    # Build model
    model = models.Sequential()
    model.add(incResV2_conv)            
    model.add(layers.Flatten())          
    model.add(layers.Dense(NUM_NEURONS, activation='relu'))
    model.add(layers.Dropout(DROPOUT_RATE))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # append model to list
    bs_inc_res_models.append(model)

### Train (Fit) Batch Sizes Model & save information

In [None]:
# Train and save the history of training + params of model
bs_history = {}

for model, bs, gen in zip(bs_inc_res_models, BATCH_SIZE, bs_generators):
    model.compile(loss='binary_crossentropy', 
                  optimizer=SGD(lr=LEARNING_RATE, momentum=MOMENTUM),
                  metrics=['acc'])
    
    title = '{}_{}'.format(TITLE, bs)
    print('Running {}'.format(title))
    bs_history[title] = fit(model, EPOCHS, title, bs_generators[gen], es, SAVE_PATH)

## Dropout

### Set Parameters for Dropouts

In [None]:
TITLE = 'inc_res_dropout'
BATCH_SIZE = 128
EPOCHS = 30
LEARNING_RATE = 1e-4
NUM_NEURONS = 1024
DROPOUT_RATE = [0, 0.2]
MOMENTUM = 0.9
OPTIMISER = 'SGD'
START_LAYER = 759
LAST_LAYER = 780

# Check for dropout model results folder (create if doesn't exist)
SAVE_PATH = './results/dropout_models'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

### Create Model for Dropouts

In [None]:
dropout_inc_res_models = []
for dropout in DROPOUT_RATE:
    # Download InceptionResNet weights
    incResV2_conv = InceptionResNetV2(weights='imagenet', include_top=False, 
                                      input_tensor=None, input_shape=(299,299,3))
    
    # freeze all layers except last block
    for layer in incResV2_conv.layers[:START_LAYER]:
        layer.trainable = False
    for layer in incResV2_conv.layers[START_LAYER:]:
        layer.trainable = True
    
    # Build model
    model = models.Sequential()
    model.add(incResV2_conv)
    model.add(layers.Flatten())
    model.add(layers.Dense(NUM_NEURONS, activation='relu'))
    model.add(layers.Dropout(dropout))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # append model to list
    dropout_inc_res_models.append(model)

### Train (Fit) Dropout Model & save information

In [None]:
# Train and save the history of training + params of model
dropout_history = {}

for model, dropout in zip(dropout_inc_res_models, DROPOUT_RATE):
    model.compile(loss='binary_crossentropy', 
                  optimizer=SGD(lr=LEARNING_RATE, momentum=MOMENTUM),
                  metrics=['acc'])
    
    title = '{}_{}'.format(TITLE, dropout)
    print('Running {}'.format(title))
    dropout_history[dropout] = fit(model, EPOCHS, title, generators_299, es, SAVE_PATH)

## Learning Rate

### Set Parameters for Learning Rate

In [None]:
TITLE = 'inc_res_lr'
BATCH_SIZE = 128
EPOCHS = 30
LEARNING_RATE = [1e-2, 1e-3]
NUM_NEURONS = 1024
DROPOUT_RATE = 0.5
MOMENTUM = 0.9
OPTIMISER = 'SGD'
START_LAYER = 759
LAST_LAYER = 780

SAVE_PATH = './results/lr_models'

if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

### Create Model for Dropouts

In [None]:
lr_inc_res_models = []
for lr in LEARNING_RATE:
    # Download InceptionResNet weights
    incResV2_conv = InceptionResNetV2(weights='imagenet', include_top=False, input_tensor=None, input_shape=(299,299,3))
    
    # freeze all layers except last block
    for layer in incResV2_conv.layers[:START_LAYER]:
        layer.trainable = False
    for layer in incResV2_conv.layers[START_LAYER:]:
        layer.trainable = True
        
    # Build model
    model = models.Sequential()
    model.add(incResV2_conv)
    model.add(layers.Flatten())
    model.add(layers.Dense(NUM_NEURONS, activation='relu'))
    model.add(layers.Dropout(DROPOUT_RATE))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # append model to list
    lr_inc_res_models.append(model)

### Train (Fit) Dropout Model & save information

In [None]:
# Train and save the history of training + params of model
lr_history = {}

for model, lr in zip(lr_inc_res_models, LEARNING_RATE):
    model.compile(loss='binary_crossentropy', 
                  optimizer=SGD(lr=lr, momentum=MOMENTUM),
                  metrics=['acc'])
    
    title = '{}_{}'.format(TITLE, lr)
    print('Running {}'.format(title))
    lr_history[lr] = fit(model, EPOCHS, title, generators_299, es, SAVE_PATH)

## Stacked

### Set Parameters for Stacked

In [None]:
TITLE = 'inc_res_stacked'
BATCH_SIZE = 128
EPOCHS = 30
LEARNING_RATE = 1e-4
NUM_NEURONS = 1024
DROPOUT_RATE = 0.5
MOMENTUM = 0.9
OPTIMISER = 'SGD'
START_LAYER = 759
LAST_LAYER = 780

SAVE_PATH = './results/stacked_models'
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

### Create Model for Stacked

In [None]:
# Download InceptionResNet weights
incResV2_conv = InceptionResNetV2(weights='imagenet', include_top=False, input_tensor=None, input_shape=(299,299,3))

# freeze all layers except last block
for layer in incResV2_conv.layers[:START_LAYER]:
    layer.trainable = False
for layer in incResV2_conv.layers[START_LAYER:]:
    layer.trainable = True

# Build model
stacked_model = models.Sequential()
stacked_model.add(incResV2_conv)
stacked_model.add(layers.Flatten())
stacked_model.add(layers.Dense(NUM_NEURONS, activation='relu'))
stacked_model.add(layers.Dropout(DROPOUT_RATE))
stacked_model.add(layers.Dense(NUM_NEURONS, activation='relu'))
stacked_model.add(layers.Dropout(DROPOUT_RATE))
stacked_model.add(layers.Dense(1, activation='sigmoid'))

### Train (Fit) Stacked Model & save information

In [None]:
# Train and save the history of training + params of model
stacked_model.compile(loss='binary_crossentropy', 
                      optimizer=SGD(lr=LEARNING_RATE, momentum=MOMENTUM),
                      metrics=['acc'])

stacked_history = fit(stacked_model, EPOCHS, TITLE, generators_299, es, SAVE_PATH)

# Load Back Results

In [None]:
all_hist = {}
# Load back history for batch_size
BATCH_SIZES = [64,128,256]
for bs in BATCH_SIZES:
  title = 'bs_inc_res_{}'.format(bs)
  filename = './drive/MyDrive/nn_project/inception_resnet/bs_inc_res/bs_hist/{}_hist'.format(title)
  all_hist[title] = pickle.load(open(filename, "rb"))

LEARNING_RATES = [1e-2, 1e-3]
for lr in LEARNING_RATES:
  title = 'lr_inc_res_{}'.format(lr)
  filename = './drive/MyDrive/nn_project/inception_resnet/lr_inc_res/lr_hist/{}_hist'.format(title)
  all_hist[title] = pickle.load(open(filename, "rb"))

DROPOUT_RATES = [0, 0.2]
for dr in DROPOUT_RATES:
  title = 'dropout_inc_res_{}'.format(dr)
  filename = './drive/MyDrive/nn_project/inception_resnet/dropout_inc_res/dropout_hist/{}_hist'.format(title)
  all_hist[title] = pickle.load(open(filename, "rb"))

title = 'stacked_inc_res'
filename = './drive/MyDrive/nn_project/inception_resnet/stacked_inc_res/{}_hist'.format(title)
all_hist[title] = pickle.load(open(filename, "rb"))

In [None]:
# Plot Max Validation Accuracies
print('Max Validation Accuracies')
max_acc = {}
max_model = {'name': None, 'val_acc': 0}
for model in all_hist.keys():
  max = np.max(all_hist[model]['val_acc'])
  epoch = np.argmax(all_hist[model]['val_acc']) + 1
  last_epoch = len(all_hist[model]['val_acc'])
  print('{:<20}: {} at epoch {:<2} where last epoch is {}'.format(model, max, epoch, last_epoch))
  if max > max_model['val_acc']:
    max_model['val_acc'] = max
    max_model['name'] = model
  max_acc[model] = max

print()
print('The best model is {} with val_acc of {}'.format(max_model['name'], max_model['val_acc']))

In [None]:
filename = './drive/MyDrive/nn_project/inception_resnet/bs_inc_res/bs_256/bs_inc_res_256_mc.h5'
bs_256_model = models.load_model(filename)