

# AMLS Assignment Draft
## Task B: CNN on BloodMNIST Dataset

Explore CNN based classifiers on the BloodMNIST dataset.

## Import libraries
The required libraries for this notebook are sklearn, copy, numpy and matplotlib.

In [10]:
## first enable autoreload during development so latest (new) version local code library is reloaded on execution 
## can be commented out when local code development not happening to avoid overhead
%reload_ext autoreload
%autoreload 2

## import libraries
import io
import numpy as np
import matplotlib.pyplot as plt
## import tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy

## removed MedMNIST specific library

## local code library
import AMLS_common as ac

## Set base parameters
Including hyper parameters and data set specifics

In [11]:
parameter = ac.HyperParameters(learning_rate=0.001, 
                               batch_size=128, 
                               num_epochs=50, 
                               num_filter=16,
                               layers=3,
                               dropout_rate=0.2,
                               optimise="Adam",
                               loss="SparseCategoricalCrossentropy()")

## set up lists and parameters
test_list     = []
run_list      = []
## use these lists of values to grid test hyper parameter sensitivity                
epochs_list   = [30]                                ## set of epochs to run for
filter_list   = [32,64]                             ## main filter sizes to use
bs_list       = [128]                               ## dataset batch size
lr_list       = [0.01, 0.001]                       ## learning rates
ly_list       = [4,5]                               ## number of covolution layers
dr_list       = [0.1,0.2,0.3]                       ## selected dropout rates
st_list       = [1,2]                               ## stride list
loss_list     = ['sparse_categorical_crossentropy'] ## loss functions to use
optimise_list = ['Adam']                            ## optimisation functions
padding       = "same"
for ep in epochs_list:
    for bs in bs_list:
        for lr in lr_list:
            for fi in filter_list:
                for ly in ly_list:
                    for dr in dr_list:
                        for ls in loss_list:
                            for op in optimise_list:
                                for st in st_list:
                                    parameter = ac.HyperParameters(learning_rate=lr, 
                                                                   batch_size=bs, 
                                                                   num_epochs=ep, 
                                                                   num_filter=fi,
                                                                   layers=ly,
                                                                   dropout_rate=dr,
                                                                   optimise=op,
                                                                   strides=st,
                                                                   padding="same",
                                                                   loss=ls)          
                                test_list.append([parameter])
## reshape parameters into a test grid that can be read using for loop
test_grid = [hp for sublist in test_list for hp in sublist]
print("test cases:",len(test_grid))

test cases: 24


In [12]:
# data set specifics and control (e.g. verbose) parameters

filebase   = "metrics/"
verbose    = 0                   # to control whether additional in process information is printed

## Load and preprocess the BloodMNIST Data
We load the dataset.

In [13]:
## Loading the data file using common MedMINST loader
data_flag  = 'bloodmnist'        # defines which dataset to load
result_set = ac.medMNIST_load(data_flag,parameter.batch_size)

## check that the loader returned data correctly and then split out
if result_set != []:
    train_dataset = result_set[0]
    test_dataset  = result_set[1]
    val_dataset   = result_set[2]

if verbose == 1:
    print("\nSummary metrics for train_dataset")
    print("type:",type(val_dataset))
    print("length:",len(val_dataset))
    print("shape:",val_dataset)

Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz


## Fit the model

In [14]:
## Create instances of the dataclass from the list
for item in test_grid:
    ## Define the model which is then run for all learning rates in set
    print("Run with",item)
    print(item.num_epochs)
    
    ## initialise tqdm callback
    tqdm_callback = ac.TqdmEpochProgress(total_epochs=item.num_epochs)
    
    ## Simple CNN model to support Learning Rate analysis
    ## added desired number of layers
    if item.layers == 3:
        model = Sequential([
            Conv2D(item.num_filter*4, (3, 3), activation=item.default_activation,\
                   input_shape=(28, 28, 3)),                                                        ## Input layer
            Conv2D(item.num_filter, (3, 3), activation=item.default_activation),                    ## Convolution layer 
            MaxPooling2D((2, 2)),                                                                   ## Reduce the features
            Conv2D(item.num_filter, (3, 3), activation=item.default_activation),                    ## Another Convolution layer 
            MaxPooling2D((2, 2)),                                                                   ## Again reduce the features
            Flatten(),                                                                              # Flatten
            Dropout(item.dropout_rate),                                                             ## added dropout
            Dense(8, activation='softmax')                                                          ## Output layer for 8 types 
        ])

    if item.layers == 4:
        model = Sequential([
            Conv2D(item.num_filter*4, (3, 3), padding=item.padding,\
                   activation=item.default_activation,input_shape=(28, 28, 3)),                ## Input layer
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Convolution layer 
            MaxPooling2D((2, 2),strides=item.strides),                                         ## Reduce the features
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Another Convolution layer 
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Added Convolution layer 
            MaxPooling2D((2, 2),strides=item.strides),                                         ## Again reduce the features
            Flatten(),                                                                         ## Flatten
            Dropout(item.dropout_rate),                                                        ## added dropout
            Dense(8, activation='softmax')                                                     ## Output layer for 8 types 
        ])
        
    if item.layers == 5:
        model = Sequential([
            Conv2D(item.num_filter*4, (3, 3), padding=item.padding,\
                   activation=item.default_activation,input_shape=(28, 28, 3)),                ## Input layer
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Convolution layer 
            MaxPooling2D((2, 2),strides=item.strides),                                         ## Reduce the features
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Another Convolution layer 
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Added Convolution layer 
            MaxPooling2D((2, 2),strides=item.strides),                                         ## Again reduce the features
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Another Convolution layer 
            Conv2D(item.num_filter, (3, 3), padding=item.padding,\
                   activation=item.default_activation),                                        ## Added Convolution layer 
            MaxPooling2D((2, 2),strides=item.strides),                                         ## Again reduce the features
            Flatten(),                                                                         ## Flatten
            Dropout(item.dropout_rate),                                                        ## added dropout
            Dense(8, activation='softmax')                                                     ## Output layer for 8 types 
        ])
                
    if verbose == 1:
        print(model.summary())
        
    ## Redirect the summary output to a string
    summary_string  = io.StringIO()
    model.summary(print_fn=lambda x: summary_string.write(x + "\n"))
    summary_content = summary_string.getvalue()
    summary_string.close()

    ## Compile the model
    model.compile(optimizer=item.optimise,                                                   
                  loss=item.loss,
                  metrics='acc')

    ## Fit the model
    history = model.fit(val_dataset, 
                        epochs=item.num_epochs, 
                        batch_size=item.batch_size, 
                        verbose=0,
                        callbacks = [tqdm_callback])
    
    ## Save results to files
    run_list.append(ac.hyper_process(history,summary_content,item))

print("Hyperparameter test run complete")

Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.1, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:38<00:00,  5.29s/epoch, loss=0.364, acc=0.873]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.2, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:36<00:00,  5.21s/epoch, loss=0.327, acc=0.886]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.3, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:35<00:00,  5.17s/epoch, loss=0.411, acc=0.848]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.1, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:42<00:00,  5.42s/epoch, loss=0.474, acc=0.821]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.2, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:42<00:00,  5.41s/epoch, loss=0.386, acc=0.864]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.3, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:56<00:00,  5.88s/epoch, loss=0.352, acc=0.866]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.1, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:32<00:00, 13.08s/epoch, loss=0.266, acc=0.901]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.2, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [07:00<00:00, 14.03s/epoch, loss=0.264, acc=0.901]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.3, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:58<00:00, 13.95s/epoch, loss=0.333, acc=0.884]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.1, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:38<00:00, 13.29s/epoch, loss=0.255, acc=0.914]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.2, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [07:24<00:00, 14.80s/epoch, loss=0.258, acc=0.909]


Run with HyperParameters(learning_rate=0.01, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.3, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [07:17<00:00, 14.57s/epoch, loss=0.322, acc=0.887]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.1, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:45<00:00,  5.52s/epoch, loss=0.409, acc=0.853]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.2, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:54<00:00,  5.82s/epoch, loss=0.351, acc=0.868]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.3, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:47<00:00,  5.57s/epoch, loss=0.356, acc=0.88] 


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.1, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:54<00:00,  5.81s/epoch, loss=0.341, acc=0.876]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.2, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:55<00:00,  5.86s/epoch, loss=0.337, acc=0.877]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=32, strides=2, padding='same', dropout_rate=0.3, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [02:56<00:00,  5.88s/epoch, loss=0.457, acc=0.836]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.1, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:58<00:00, 13.95s/epoch, loss=0.277, acc=0.898]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.2, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [09:43<00:00, 19.45s/epoch, loss=0.212, acc=0.926]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.3, layers=4, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:25<00:00, 12.87s/epoch, loss=0.247, acc=0.919]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.1, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:39<00:00, 13.32s/epoch, loss=0.222, acc=0.918]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.2, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [06:57<00:00, 13.92s/epoch, loss=0.337, acc=0.873]


Run with HyperParameters(learning_rate=0.001, batch_size=128, num_epochs=30, optimise='Adam', loss='sparse_categorical_crossentropy', num_filter=64, strides=2, padding='same', dropout_rate=0.3, layers=5, default_activation='relu')
30


Epoch Progress: 100%|██████████| 30/30 [07:34<00:00, 15.15s/epoch, loss=0.352, acc=0.871]

Hyperparameter test run complete





In [16]:
best_run,run_df = ac.analyse_run(run_list,filebase)
print("\nRun satisfying both smallest min_loss and largest max_acc:")
print(best_run)

feature_importance,coef = ac.analyse_hyperparameters(run_df)
print("\nImpact of Hyperparameters on Accuracy (from Linear Regression):")
print(coef)
print("\nHyperparameter Importance for Accuracy (from Random Forest):")
print(feature_importance)


Run satisfying both smallest min_loss and largest max_acc:
   metrics_file summary_file  min_loss   max_acc  last_loss  last_acc  \
21                            0.176788  0.936916   0.222018   0.91764   

    var_loss   var_acc  learning_rate  batch_size  num_epochs  num_filter  \
21  0.229043  0.033547          0.001         128          30          64   

    strides padding  dropout_rate  layers optimise  \
21        2    same           0.1       5     Adam   

                               loss default_activation  
21  sparse_categorical_crossentropy               relu  
R^2 Score: 0.3415676023988541
Mean Squared Error: 0.00023359836657046708

Impact of Hyperparameters on Accuracy (from Linear Regression):
  Hyperparameter   Coefficient
0  learning_rate -1.534400e-01
1     num_epochs -2.201364e-15
2     num_filter  1.446331e-03
3        strides  6.938894e-17
4         layers -1.059865e-02
5   dropout_rate -3.371952e-02
6     batch_size  0.000000e+00

Hyperparameter Importance fo