

# AMLS Assignment Draft
## Task B: CNN on BloodMNIST Dataset

Explore CNN based classifiers on the BloodMNIST dataset.

## Import libraries
The required libraries for this notebook are sklearn, copy, numpy and matplotlib.

In [14]:
## first enable autoreload during development so latest (new) version local code library is reloaded on execution 
## can be commented out when local code development not happening to avoid overhead
%load_ext autoreload
%autoreload 2

## import libraries
import io
import numpy as np
import matplotlib.pyplot as plt
## import tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.losses import BinaryCrossentropy
## MedMNIST specific library
import medmnist
from medmnist import BloodMNIST, INFO, Evaluator
## local code library
import MedMNIST_load as ml

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Set base parameters
Including hyper parameters and data set specifics

In [15]:
# Initialize parameters
parameter = ml.HyperParameters(learning_rate=0.001, 
                               batch_size=128, 
                               num_epochs=30, 
                               optimise="Adam",
                               loss="SparseCategoricalCrossentropy()",
                               default_activation="relu")
## use these lists to grid test hyper parameter sensitivity
epochs_list = [10,50,100,500]                 # number of epochs to be used
bs_list     = [32,64,128]                     # dataset batch size
lr_list     = [1, 0.1, 0.01, 0.001, 0.0001]   # learning rate

In [16]:
# data set specifics and control (e.g. verbose) parameters
data_flag  = 'bloodmnist'        # defines which dataset to load
info       = INFO[data_flag]     # info about this dataset
download   = True                # whether to download the dataset
filebase   = "metrics/"
verbose    = 1                   # to control whether additional in process information is printed

## Load and preprocess the BloodMNIST Data
We load the dataset.

In [17]:
# Loading the data file using common MedMINST loader
result_set = ml.medMNIST_load(data_flag,parameter.batch_size)

## check that the loader returned data correctly and then split out
if result_set != []:
    train_dataset = result_set[0]
    test_dataset  = result_set[1]
    val_dataset   = result_set[2]

if verbose == 1:
    print("\nSummary metrics for train_dataset")
    print("type:",type(train_dataset))
    print("length:",len(train_dataset))
    print("shape:",train_dataset)

Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\johnc\.medmnist\bloodmnist.npz

Summary metrics for train_dataset
type: <class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
length: 94
shape: <BatchDataset shapes: ((None, 28, 28, 3), (None, 1)), types: (tf.float64, tf.float32)>


## Define the CNN model

In [18]:
# Define the model
if verbose == 1:
    print("Default activation is ",parameter.default_activation)
    
model = Sequential([
    Conv2D(64, (3, 3), activation=parameter.default_activation, input_shape=(28, 28, 3)),   #1 Input
    Conv2D(16, (3, 3), activation=parameter.default_activation),                           #2 
    MaxPooling2D((2, 2)),                                                                   # Down-sample the feature maps
    Conv2D(16, (3, 3), activation=parameter.default_activation),                            #2 
    MaxPooling2D((2, 2)),                                                                   # Down-sample the feature maps
    Flatten(),                                                                              # Flatten
    Dense(8, activation='softmax')                                                          # Output layer for 8 types 
])

print(model.summary())
# Redirect the summary output to a string
summary_string = io.StringIO()
model.summary(print_fn=lambda x: summary_string.write(x + "\n"))
summary_content = summary_string.getvalue()
summary_string.close()

optimizer_choice = str(parameter.optimise)+'(learning_rate='+str(parameter.learning_rate)+')'
optimizer        = eval(optimizer_choice)
#loss_choice      = 'tf.keras.losses.SparseCategoricalCrossentropy()'
#loss_choice      = 'tf.keras.losses.BinaryCrossentropy()'
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),                                                   
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics='acc')

Default activation is  relu
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 26, 26, 64)        1792      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 24, 24, 16)        9232      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 16)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 10, 10, 16)        2320      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_2 (Dense)           

## Fit the model

In [None]:
# Fit the model
if verbose == 1:
    print(parameter.num_epochs,parameter.batch_size)
history = model.fit(train_dataset, 
                    validation_data=test_dataset, 
                    epochs=parameter.num_epochs, 
                    batch_size=parameter.batch_size, 
                    verbose=0)

ml.graph_and_save(history,summary_content,parameter,filebase,both="Y")

30 128
