## Paper Reading Analysis - Code Implementation
### Model 5 Training, Hyperparameter Search and Evaluation
### Jonathan Alcineus

In [1]:
# These handle the file locations and importing the dataframe from the saved datafile from the authors files
import os
import pandas as pd


# These handle the image processing, editing, or displaying that needs to be performed
import cv2 
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import skimage

# These handle training the convolutional neural network (CNN) model
import tensorflow as tf
import keras
import numpy as np
import sklearn
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, BatchNormalization, MaxPooling2D, GlobalAveragePooling2D, Dropout
from keras.models import Model, load_model
from keras.optimizers import Adam, SGD
import time

import copy

2025-08-30 22:36:27.358667: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-30 22:36:27.428889: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756593387.457962    5266 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756593387.466808    5266 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1756593387.484916    5266 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# This changes the home directory
home_directory = os.path.expanduser("~")
os.chdir(home_directory)

# Then goes to the folder where the data lies
os.chdir('DNNorDermatologist')

# Ensures that we are in the correct folder
print(os.getcwd())

/teamspace/studios/this_studio/DNNorDermatologist


In [3]:
# Begin to build the classifier and the ranges for each model to find the optimal parameters, or searching through hyperparameters
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize

space = [Real(1e-6, 0.01, "log-uniform", name='learning_rate'),
          Real(0.1, 0.8, name='dropout'),
          Real(0.8, 1.0, name='momentum'),
          Real(0.9, 1.0, name='beta_1'),
          Real(0.99, 1.0, name='beta_2'),
          Integer(low=5,high=20, name = 'epochs'),
          Integer(low=50, high=225, name='num_dense_nodes'),
          Categorical(categories=['SGD', 'Adam'],
                             name='optimizer_type')
          ]

In [4]:

# The first part to implenment is the creation of random models
if not os.path.isdir('suite_of_models'):
    os.mkdir('suite_of_models')

def make_a_model(learning_rate, dropout, momentum, beta_1, beta_2, num_dense_nodes, optimizer_type):
    # Like in the paper the base model for the image classifcation will be imagenet
    base_model = InceptionV3(weights='imagenet',input_shape=(224, 224, 3), include_top=False)

    # Fine tune the model with extra dense layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(num_dense_nodes, activation='relu', kernel_initializer='he_normal')(x)
    x = Dropout(rate=dropout)(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    # Selects a type of model optimizer
    if optimizer_type == "Adam":
        optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
    elif optimizer_type == "SGD":
        optimizer = SGD(learning_rate=learning_rate, momentum=momentum)

    model.compile(loss='binary_crossentropy',
          optimizer=optimizer,
          metrics=['accuracy'])
    return model

In [5]:
# Start off with basic parameters and the batch size for the models
batch_size = 16
best_accuracy = {} 
for seed in range(15):
  best_accuracy[seed] = 0.0

In [6]:
# Training on seed 0 for this cell

seed = 4

print('We are currently training on seed:', seed) 
# for each iteration of the hyperparameter search, return a set of parameters
# and feed them into the relevant parts
# run training of the model for this seed, save with seed num
X_train = np.load(f'paper_reading_small_data/trial_{seed}_X_train.npy', allow_pickle=True)
y_train = np.load(f'paper_reading_small_data/trial_{seed}_y_train.npy', allow_pickle=True)
X_test = np.load(f'paper_reading_small_data/trial_{seed}_X_test.npy', allow_pickle=True)
y_test = np.load(f'paper_reading_small_data/trial_{seed}_y_test.npy', allow_pickle=True)

path_best_model = 'inception_saved_trial_{}.keras'.format(seed)
  
@use_named_args(dimensions=space)
def fitness(learning_rate, dropout, momentum, beta_1, beta_2,
              num_dense_nodes, optimizer_type, epochs):

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(learning_rate))
    print('num_dense_nodes:', num_dense_nodes)
    print('dropout:', dropout)
    print('optimizer_type:', optimizer_type)
    print('epochs:', epochs)

    # Create the neural network with these hyper-parameters.
    model = make_a_model(learning_rate=learning_rate, 
                         dropout=dropout, 
                         momentum=momentum, 
                         beta_1=beta_1, beta_2=beta_2,
                         num_dense_nodes=num_dense_nodes, 
                         optimizer_type=optimizer_type)

    # Use Keras to train the model.
    history = model.fit(x=X_train,
                          y=y_train,
                          epochs=epochs,
                          batch_size=batch_size,
                          validation_data= (X_test,y_test))

    # Get the classification accuracy on the validation-set
    # after the last training-epoch.
    accuracy = history.history['val_accuracy'][-1]
    # auc_val = history.history['val_auc'][-1]

    # Print the classification accuracy.
    print()
    print("Accuracy: {0:.2%}".format(accuracy))
    print()

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    global best_accuracy

    if accuracy > best_accuracy[seed]:
      # Save the new model to harddisk in the recommended Keras format
      model_path = os.path.join('DataSplitted', path_best_model)
      model.save(model_path)
    

      # Update the classification accuracy.
      best_accuracy[seed] = accuracy
      # best_auc = auc_val
          

    # Delete the Keras model with these hyper-parameters from memory.
    del model

    import gc

    keras.backend.clear_session()
    gc.collect()

    try:
      tf.config.experimental.reset_memory_stats("GPU:0")
    except:
      pass  # in case older TF version
    return -accuracy

  
#This conducts the hyperparameter search over each data split for details see: https://scikit-optimize.github.io/#skopt.gp_minimize
search_result = gp_minimize(func=fitness,
                            dimensions=space,
                            acq_func='EI', # Expected Improvement.
                            n_calls=15,
			    n_random_starts = 5,
                            verbose = True)
print('Seed: ',seed)
print("BEST ACCURACY: ", best_accuracy)
print('hyper_params ', search_result.x)

del X_train, y_train, X_test, y_test 

import gc

keras.backend.clear_session()
gc.collect()

We are currently training on seed: 4


Iteration No: 1 started. Evaluating function at random point.
learning rate: 1.2e-04
num_dense_nodes: 98
dropout: 0.30776521376363053
optimizer_type: SGD
epochs: 18


I0000 00:00:1755995456.116816   62857 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13949 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5


Epoch 1/18


I0000 00:00:1755995480.549317   64217 service.cc:152] XLA service 0x7ff25c014d90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755995480.549344   64217 service.cc:160]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-08-24 00:31:21.496155: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755995484.499733   64217 cuda_dnn.cc:529] Loaded cuDNN version 91200
2025-08-24 00:31:32.804856: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-24 00:31:32.952362: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investiga

[1m 1/52[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m44:51[0m 53s/step - accuracy: 0.6250 - loss: 0.6248

I0000 00:00:1755995512.479062   64217 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m51/52[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 114ms/step - accuracy: 0.5437 - loss: 0.7121

2025-08-24 00:32:08.627750: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-24 00:32:08.775216: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-24 00:32:09.087535: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-24 00:32:09.230566: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 924ms/step - accuracy: 0.5906 - loss: 0.6745 - val_accuracy: 0.6812 - val_loss: 0.6191
Epoch 2/18
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 148ms/step - accuracy: 0.8116 - loss: 0.4458 - val_accuracy: 0.6341 - val_loss: 0.6981
Epoch 3/18
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 148ms/step - accuracy: 0.8418 - loss: 0.3656 - val_accuracy: 0.7899 - val_loss: 0.4415
Epoch 4/18
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 149ms/step - accuracy: 0.8623 - loss: 0.2948 - val_accuracy: 0.8188 - val_loss: 0.3783
Epoch 5/18
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 149ms/step - accuracy: 0.8841 - loss: 0.2490 - val_accuracy: 0.8176 - val_loss: 0.3892
Epoch 6/18
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 149ms/step - accuracy: 0.9263 - loss: 0.1792 - val_accuracy: 0.8225 - val_loss: 0.4386
Epoch 7/18
[1m52/52[0m [32m━━━━━━━

0

In [3]:
# GradCAM and Kernel SHAP Experiments
os.chdir(home_directory)
os.chdir('paper-reading-analysis')

# Library with the methods that I needed
import gradcam_shap
import scipy

from tf_keras_vis.gradcam import Gradcam
from tf_keras_vis.utils.model_modifiers import ReplaceToLinear
from tf_keras_vis.utils.scores import CategoricalScore

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
os.chdir(home_directory)
os.chdir('DNNorDermatologist')
os.chdir('DataSplitted')
seed = 4
model = load_model(f'inception_saved_trial_{seed}.keras')

2025-08-30 22:36:52.522631: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [5]:
import collections
import collections.abc
collections.Iterable = collections.abc.Iterable

from vis.utils import utils
from keras import layers, activations

#Assorted modifications for model compatibility with gradCAM
gmodel = copy.deepcopy(model)

print(gmodel.layers)

layer_idx = utils.find_layer_idx(gmodel,'dense_1')

#swap with softmax with linear classifier for the reasons mentioned above
gmodel.layers[layer_idx].activation = activations.linear
gmodel = utils.apply_modifications(gmodel)



[<InputLayer name=input_layer, built=True>, <Conv2D name=conv2d, built=True>, <BatchNormalization name=batch_normalization, built=True>, <Activation name=activation, built=True>, <Conv2D name=conv2d_1, built=True>, <BatchNormalization name=batch_normalization_1, built=True>, <Activation name=activation_1, built=True>, <Conv2D name=conv2d_2, built=True>, <BatchNormalization name=batch_normalization_2, built=True>, <Activation name=activation_2, built=True>, <MaxPooling2D name=max_pooling2d, built=True>, <Conv2D name=conv2d_3, built=True>, <BatchNormalization name=batch_normalization_3, built=True>, <Activation name=activation_3, built=True>, <Conv2D name=conv2d_4, built=True>, <BatchNormalization name=batch_normalization_4, built=True>, <Activation name=activation_4, built=True>, <MaxPooling2D name=max_pooling2d_1, built=True>, <Conv2D name=conv2d_8, built=True>, <BatchNormalization name=batch_normalization_8, built=True>, <Activation name=activation_8, built=True>, <Conv2D name=conv2d_



In [6]:
os.chdir(home_directory)
os.chdir('paper-reading-analysis')
%run gradcam_shap.py

In [7]:
os.chdir(home_directory)
os.chdir('DNNorDermatologist')

# Get the test dataset of 400 - 200 nevi and 200 melanoma
test_df = pd.read_pickle('NvAndMelNoDuplicatesFullSizeTestSet.zip')

# Change the idx column to be '0' where the diagnosis of the lesion was
# nevi, and '1' when the diagnosis is diagnosis
test_df['idx'] = np.where(test_df['id'] == 'mel', 1 , 0)

# Save a new table 'features' to be test_df, without the idx column
features=test_df.drop(columns=['idx'], axis = 1)
# Create a new table with just the correct diagnosis (0 for melanoma (or nevi), 1 for nevi (or melanoma))
target=test_df['idx']

# Change features to be a numpy array of image pixel data ((R, G, B))
features = np.asarray(features['image'].tolist())

# I want to resize the images 
features = np.array([cv2.resize(image, (224, 224)) for image in features])

# Normalise this data in an alternate table to be values from 0 ... 1
# e.g. 255 -> 1, 0 --> 0
# Normalises for original prediction and evaluation of model, the SHAP funciton below requires non normalised data
# TODO: Standarise this so SHAP takes normalised

features2 = features / 255

# Convert the data to one-hot encoding
target_cat = to_categorical(target, num_classes = 2)

# Get predictions for image data
# e.g.
# Index 0 : [0.9222, 0.0778]
# Index 1 : [0.4500, 0.5500]
# etc..
# This represents likelihood of melanoma and nevi respectively (according to the model)
y_pred = model.predict(features2, verbose=1)
y_pred = [[value[0], 1-value[0]] for value in y_pred]

print(y_pred)

# Create a new dataframe with entries for each element of the test set
# Include an ID, diagnosis, and % likelihoods for each diagnosis from the model
df = pd.DataFrame(columns=['ID', 'Dx', '% Mel', '% Nev'],index=[i for i in range(400)])
df['ID'] = df.index

# Create dictionaries to contain actual diagnosis and probabilities from the model
dx_d = {}
Pmel = {}
Pnev = {}
# Take the actual diagnoses from where we retrieved them earlier
y_test_cat = target_cat

# For each element in the test set:
for ind in range(400):
    # Append the diagnosis and predictions to their respective dictionaries
    if y_test_cat[ind][1] == 1.0:
        diagnosis = 'Melanoma'
    elif y_test_cat[ind][0] == 1.0:
        diagnosis = 'Nevus'
    dx_d[ind] = diagnosis
    Pmel[ind] = y_pred[ind][0]
    Pnev[ind] = y_pred[ind][1]
    
# Take the above dictionaries and insert them into the data frame
df['Dx'] = df['ID'].map(dx_d)
df['% Mel'] = df['ID'].map(Pmel)
df['% Nev'] = df['ID'].map(Pnev)

# Change the prediction likelihoods to be floats 
df = df.astype({"% Mel": float, "% Nev": float})

#df = df.iloc[id_list]

# Print the first 5 entries in the data frame
print('Unseen set') 
print(df)

I0000 00:00:1756593434.328331    5970 service.cc:152] XLA service 0x7f5f7c055930 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1756593434.328381    5970 service.cc:160]   StreamExecutor device (0): Host, Default Version
2025-08-30 22:37:14.425853: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1756593436.041709    5970 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step 
[[np.float32(0.9999862), np.float32(1.3828278e-05)], [np.float32(0.90381753), np.float32(0.096182466)], [np.float32(0.99999905), np.float32(9.536743e-07)], [np.float32(0.99974316), np.float32(0.0002568364)], [np.float32(0.077295326), np.float32(0.9227047)], [np.float32(1.0), np.float32(0.0)], [np.float32(0.9999981), np.float32(1.9073486e-06)], [np.float32(0.9999862), np.float32(1.3828278e-05)], [np.float32(0.999972), np.float32(2.8014183e-05)], [np.float32(0.9802422), np.float32(0.019757807)], [np.float32(1.0), np.float32(0.0)], [np.float32(0.99978036), np.float32(0.00021964312)], [np.float32(0.99998367), np.float32(1.6331673e-05)], [np.float32(0.99999976), np.float32(2.3841858e-07)], [np.float32(0.98588485), np.float32(0.014115155)], [np.float32(0.5735389), np.float32(0.4264611)], [np.float32(0.99923706), np.float32(0.00076293945)], [np.float32(0.9997805), np.float32(0.0002195239)], [np.float32(0.97511566), np.f

In [8]:
os.chdir('..')
os.chdir('paper-reading-analysis')

# I want examine the results, so I will just save them
df.to_csv(f'predictions_model_{seed}.csv')