In [1]:
import glob
import pandas as pd
import numpy as np
import imageio
from skimage.transform import resize
from scipy import misc

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing import image
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, TensorBoard
from keras.utils import np_utils

Using TensorFlow backend.


In [4]:
import pickle

In [5]:
PATH_DATA = './Data/'
PATH_IMAGES = glob.glob('./Images/*.jpg')

In [6]:
df_meta = pd.read_csv(PATH_DATA + 'HAM10000_metadata.csv')

## I. EDA

In [7]:
df_meta.shape

(10015, 7)

In [8]:
df_meta.sample()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
4929,HAM_0000826,ISIC_0027362,nv,follow_up,45.0,male,back


In [9]:
df_meta['dx'].value_counts()

nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [10]:
df_meta['dx_type'].value_counts()

histo        5340
follow_up    3704
consensus     902
confocal       69
Name: dx_type, dtype: int64

In [11]:
df_meta['age'].value_counts()

45.0    1299
50.0    1187
55.0    1009
40.0     985
60.0     803
70.0     756
35.0     753
65.0     731
75.0     618
30.0     464
80.0     404
85.0     290
25.0     247
20.0     169
5.0       86
15.0      77
10.0      41
0.0       39
Name: age, dtype: int64

In [12]:
df_meta['sex'].value_counts()

male       5406
female     4552
unknown      57
Name: sex, dtype: int64

In [13]:
df_meta['localization'].value_counts()

back               2192
lower extremity    2077
trunk              1404
upper extremity    1118
abdomen            1022
face                745
chest               407
foot                319
unknown             234
neck                168
scalp               128
hand                 90
ear                  56
genital              48
acral                 7
Name: localization, dtype: int64

## Experimentation
### Method 1 results in dead kernel

In [14]:
# Method 1: Load all Images and Scale by 255
# images = []
# for path_image in PATH_IMAGES:
#     image = misc.imread(path_image)
#     image = image / 255
#     images.append(image)

### Method 2 can be completed with approx 3GB memory consumption

In [15]:
# Method 2: Load all Images and Resize to 50%, no scaling
# images = []
# for path_image in PATH_IMAGES:
#     image = misc.imread(path_image)
#     image = misc.imresize(image, size=(300, 225), interp='nearest')
#     images.append(image)
#     print('Completed processing {}'.format(path_image))

### Method 3 can be completed with approx 5GB memory consumption

In [16]:
# Method 3: Load all Images and Resize to 70%, no scaling
# images = []
# for path_image in PATH_IMAGES:
#     image = misc.imread(path_image)
#     image = misc.imresize(image, size=(420, 315), interp='nearest')
#     images.append(image)
#     print('Completed processing {}'.format(path_image))

In [17]:
# images = np.asarray(images)

In [18]:
# np.asarray([images.shape[1], images.shape[2], images.shape[3]])
# array([420, 315,   3])

### Method 4 scaling values takes up entire 16GB + swap

In [19]:
# Method 4: Load all Images and Resize to 50%, with scaling
# images = []
# for path_image in PATH_IMAGES:
#     image = misc.imread(path_image)
#     image = misc.imresize(image, size=(300, 225), interp='nearest')
#     image = image / 255
#     images.append(image)

## Data Visualisation

In [20]:
# This is working, redo this later
# figure = plt.figure()
# count = 0
# for index, row in df_combined.iterrows():
#     if count < 10:
#         plt.imshow(row['image'])
#         plt.axis('off')
#         count += 1
#     else:
#         break

## Modeling - CNN4

In [21]:
# Subset the data and remove dx=nv from the dataset
df_meta = df_meta[df_meta['dx'] != 'nv']

In [22]:
df_meta.shape

(3310, 7)

In [23]:
df_meta['dx'].value_counts()

mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [24]:
# Prepare Train and Test Data
df_meta = df_meta.set_index('image_id')
y = df_meta['dx']
y_train, y_test = train_test_split(y, test_size=0.1, random_state=42)

In [25]:
# Original Size: 600*450

# Method 3: Load all Images and Resize to 50%, no scaling
x_train, x_test = [], []
for image_id in y_train.index.values:
    #Uses too much memory, will revert first to scipy
    #image = imageio.imread('./Images/{}.jpg'.format(image_id))
    #image = resize(image, output_shape=(360, 270), anti_aliasing=False, mode='constant')
    image = misc.imread('./Images/{}.jpg'.format(image_id))
    image = misc.imresize(image, size=(300, 225), interp='nearest')
    x_train.append(image)
    print('Completed processing {}.jpg'.format(image_id))

x_train = np.asarray(x_train)

for image_id in y_test.index.values:
    #Uses too much memory, will revert first to scipy
    #image = imageio.imread('./Images/{}.jpg'.format(image_id))
    #image = resize(image, output_shape=(360, 270), anti_aliasing=False, mode='constant')
    image = misc.imread('./Images/{}.jpg'.format(image_id))
    image = misc.imresize(image, size=(300, 225), interp='nearest')
    x_test.append(image)
    print('Completed processing {}.jpg'.format(image_id))

x_test = np.asarray(x_test)

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  if __name__ == '__main__':
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


Completed processing ISIC_0027419.jpg
Completed processing ISIC_0025250.jpg
Completed processing ISIC_0033491.jpg
Completed processing ISIC_0030730.jpg
Completed processing ISIC_0028352.jpg
Completed processing ISIC_0033722.jpg
Completed processing ISIC_0033174.jpg
Completed processing ISIC_0030391.jpg
Completed processing ISIC_0033999.jpg
Completed processing ISIC_0032826.jpg
Completed processing ISIC_0025612.jpg
Completed processing ISIC_0029600.jpg
Completed processing ISIC_0032692.jpg
Completed processing ISIC_0025814.jpg
Completed processing ISIC_0026714.jpg
Completed processing ISIC_0026811.jpg
Completed processing ISIC_0032842.jpg
Completed processing ISIC_0026644.jpg
Completed processing ISIC_0027332.jpg
Completed processing ISIC_0026336.jpg
Completed processing ISIC_0030995.jpg
Completed processing ISIC_0030659.jpg
Completed processing ISIC_0031565.jpg
Completed processing ISIC_0031146.jpg
Completed processing ISIC_0024981.jpg
Completed processing ISIC_0033736.jpg
Completed pr

Completed processing ISIC_0029545.jpg
Completed processing ISIC_0024729.jpg
Completed processing ISIC_0025700.jpg
Completed processing ISIC_0032763.jpg
Completed processing ISIC_0034103.jpg
Completed processing ISIC_0026219.jpg
Completed processing ISIC_0025154.jpg
Completed processing ISIC_0030034.jpg
Completed processing ISIC_0030712.jpg
Completed processing ISIC_0028043.jpg
Completed processing ISIC_0034058.jpg
Completed processing ISIC_0032135.jpg
Completed processing ISIC_0024839.jpg
Completed processing ISIC_0030825.jpg
Completed processing ISIC_0026113.jpg
Completed processing ISIC_0033505.jpg
Completed processing ISIC_0030394.jpg
Completed processing ISIC_0026369.jpg
Completed processing ISIC_0029627.jpg
Completed processing ISIC_0029823.jpg
Completed processing ISIC_0028389.jpg
Completed processing ISIC_0030344.jpg
Completed processing ISIC_0026493.jpg
Completed processing ISIC_0025389.jpg
Completed processing ISIC_0034000.jpg
Completed processing ISIC_0031169.jpg
Completed pr

Completed processing ISIC_0025874.jpg
Completed processing ISIC_0030770.jpg
Completed processing ISIC_0028370.jpg
Completed processing ISIC_0028548.jpg
Completed processing ISIC_0028481.jpg
Completed processing ISIC_0032219.jpg
Completed processing ISIC_0024913.jpg
Completed processing ISIC_0024612.jpg
Completed processing ISIC_0026056.jpg
Completed processing ISIC_0026790.jpg
Completed processing ISIC_0029502.jpg
Completed processing ISIC_0030526.jpg
Completed processing ISIC_0032422.jpg
Completed processing ISIC_0032245.jpg
Completed processing ISIC_0030521.jpg
Completed processing ISIC_0033860.jpg
Completed processing ISIC_0024972.jpg
Completed processing ISIC_0033284.jpg
Completed processing ISIC_0029974.jpg
Completed processing ISIC_0029824.jpg
Completed processing ISIC_0028984.jpg
Completed processing ISIC_0032836.jpg
Completed processing ISIC_0026213.jpg
Completed processing ISIC_0026941.jpg
Completed processing ISIC_0028188.jpg
Completed processing ISIC_0025005.jpg
Completed pr

Completed processing ISIC_0033668.jpg
Completed processing ISIC_0033494.jpg
Completed processing ISIC_0027609.jpg
Completed processing ISIC_0032194.jpg
Completed processing ISIC_0024900.jpg
Completed processing ISIC_0031334.jpg
Completed processing ISIC_0029541.jpg
Completed processing ISIC_0025466.jpg
Completed processing ISIC_0027991.jpg
Completed processing ISIC_0027090.jpg
Completed processing ISIC_0029705.jpg
Completed processing ISIC_0034065.jpg
Completed processing ISIC_0027071.jpg
Completed processing ISIC_0026556.jpg
Completed processing ISIC_0029233.jpg
Completed processing ISIC_0024688.jpg
Completed processing ISIC_0026350.jpg
Completed processing ISIC_0028739.jpg
Completed processing ISIC_0032622.jpg
Completed processing ISIC_0032095.jpg
Completed processing ISIC_0031228.jpg
Completed processing ISIC_0025807.jpg
Completed processing ISIC_0033651.jpg
Completed processing ISIC_0030925.jpg
Completed processing ISIC_0033158.jpg
Completed processing ISIC_0032316.jpg
Completed pr

Completed processing ISIC_0033392.jpg
Completed processing ISIC_0029163.jpg
Completed processing ISIC_0032456.jpg
Completed processing ISIC_0032051.jpg
Completed processing ISIC_0024853.jpg
Completed processing ISIC_0026697.jpg
Completed processing ISIC_0025196.jpg
Completed processing ISIC_0033483.jpg
Completed processing ISIC_0026475.jpg
Completed processing ISIC_0029791.jpg
Completed processing ISIC_0027649.jpg
Completed processing ISIC_0032476.jpg
Completed processing ISIC_0031943.jpg
Completed processing ISIC_0032031.jpg
Completed processing ISIC_0029606.jpg
Completed processing ISIC_0025885.jpg
Completed processing ISIC_0028035.jpg
Completed processing ISIC_0028899.jpg
Completed processing ISIC_0026704.jpg
Completed processing ISIC_0033847.jpg
Completed processing ISIC_0028579.jpg
Completed processing ISIC_0025439.jpg
Completed processing ISIC_0033279.jpg
Completed processing ISIC_0026430.jpg
Completed processing ISIC_0033300.jpg
Completed processing ISIC_0029837.jpg
Completed pr

Completed processing ISIC_0033630.jpg
Completed processing ISIC_0027622.jpg
Completed processing ISIC_0027084.jpg
Completed processing ISIC_0032128.jpg
Completed processing ISIC_0028537.jpg
Completed processing ISIC_0029929.jpg
Completed processing ISIC_0027826.jpg
Completed processing ISIC_0030970.jpg
Completed processing ISIC_0024498.jpg
Completed processing ISIC_0025832.jpg
Completed processing ISIC_0027872.jpg
Completed processing ISIC_0030261.jpg
Completed processing ISIC_0025928.jpg
Completed processing ISIC_0026171.jpg
Completed processing ISIC_0027388.jpg
Completed processing ISIC_0033260.jpg
Completed processing ISIC_0024511.jpg
Completed processing ISIC_0031095.jpg
Completed processing ISIC_0028120.jpg
Completed processing ISIC_0031189.jpg
Completed processing ISIC_0025064.jpg
Completed processing ISIC_0025663.jpg
Completed processing ISIC_0027700.jpg
Completed processing ISIC_0024581.jpg
Completed processing ISIC_0025867.jpg
Completed processing ISIC_0025083.jpg
Completed pr

Completed processing ISIC_0030935.jpg
Completed processing ISIC_0026687.jpg
Completed processing ISIC_0030445.jpg
Completed processing ISIC_0029010.jpg
Completed processing ISIC_0034070.jpg
Completed processing ISIC_0025409.jpg
Completed processing ISIC_0024436.jpg
Completed processing ISIC_0031108.jpg
Completed processing ISIC_0033752.jpg
Completed processing ISIC_0024943.jpg
Completed processing ISIC_0026831.jpg
Completed processing ISIC_0028411.jpg
Completed processing ISIC_0028543.jpg
Completed processing ISIC_0033001.jpg
Completed processing ISIC_0032725.jpg
Completed processing ISIC_0026796.jpg
Completed processing ISIC_0026045.jpg
Completed processing ISIC_0025234.jpg
Completed processing ISIC_0025376.jpg
Completed processing ISIC_0031428.jpg
Completed processing ISIC_0029831.jpg
Completed processing ISIC_0027989.jpg
Completed processing ISIC_0031253.jpg
Completed processing ISIC_0027030.jpg
Completed processing ISIC_0029281.jpg
Completed processing ISIC_0030187.jpg
Completed pr

Completed processing ISIC_0025578.jpg
Completed processing ISIC_0033256.jpg
Completed processing ISIC_0024865.jpg
Completed processing ISIC_0026769.jpg
Completed processing ISIC_0032797.jpg
Completed processing ISIC_0029585.jpg
Completed processing ISIC_0026766.jpg
Completed processing ISIC_0027037.jpg
Completed processing ISIC_0031545.jpg
Completed processing ISIC_0028465.jpg
Completed processing ISIC_0033732.jpg
Completed processing ISIC_0029014.jpg
Completed processing ISIC_0027394.jpg
Completed processing ISIC_0032660.jpg
Completed processing ISIC_0032462.jpg
Completed processing ISIC_0026086.jpg
Completed processing ISIC_0031376.jpg
Completed processing ISIC_0029849.jpg
Completed processing ISIC_0027884.jpg
Completed processing ISIC_0033397.jpg
Completed processing ISIC_0032897.jpg
Completed processing ISIC_0031016.jpg
Completed processing ISIC_0028329.jpg
Completed processing ISIC_0024351.jpg
Completed processing ISIC_0027162.jpg
Completed processing ISIC_0029486.jpg
Completed pr

Completed processing ISIC_0032504.jpg
Completed processing ISIC_0031513.jpg
Completed processing ISIC_0027650.jpg
Completed processing ISIC_0025851.jpg
Completed processing ISIC_0025818.jpg
Completed processing ISIC_0024987.jpg
Completed processing ISIC_0033499.jpg
Completed processing ISIC_0029002.jpg
Completed processing ISIC_0030314.jpg
Completed processing ISIC_0028765.jpg
Completed processing ISIC_0032808.jpg
Completed processing ISIC_0026409.jpg
Completed processing ISIC_0028814.jpg
Completed processing ISIC_0031496.jpg
Completed processing ISIC_0032424.jpg
Completed processing ISIC_0029893.jpg
Completed processing ISIC_0029596.jpg
Completed processing ISIC_0024832.jpg
Completed processing ISIC_0030759.jpg
Completed processing ISIC_0026074.jpg
Completed processing ISIC_0032179.jpg
Completed processing ISIC_0027678.jpg
Completed processing ISIC_0025465.jpg
Completed processing ISIC_0031130.jpg
Completed processing ISIC_0024571.jpg
Completed processing ISIC_0028760.jpg
Completed pr

Completed processing ISIC_0031554.jpg
Completed processing ISIC_0032876.jpg
Completed processing ISIC_0033020.jpg
Completed processing ISIC_0029698.jpg
Completed processing ISIC_0028229.jpg
Completed processing ISIC_0032170.jpg
Completed processing ISIC_0025188.jpg
Completed processing ISIC_0030512.jpg
Completed processing ISIC_0033465.jpg
Completed processing ISIC_0032987.jpg
Completed processing ISIC_0033368.jpg
Completed processing ISIC_0033068.jpg
Completed processing ISIC_0025838.jpg
Completed processing ISIC_0027064.jpg
Completed processing ISIC_0027663.jpg
Completed processing ISIC_0026275.jpg
Completed processing ISIC_0026378.jpg
Completed processing ISIC_0031401.jpg
Completed processing ISIC_0024623.jpg
Completed processing ISIC_0024772.jpg
Completed processing ISIC_0032756.jpg
Completed processing ISIC_0030240.jpg
Completed processing ISIC_0031026.jpg
Completed processing ISIC_0028050.jpg
Completed processing ISIC_0025265.jpg
Completed processing ISIC_0033123.jpg
Completed pr

Completed processing ISIC_0027527.jpg
Completed processing ISIC_0027606.jpg
Completed processing ISIC_0027344.jpg
Completed processing ISIC_0028688.jpg
Completed processing ISIC_0024652.jpg
Completed processing ISIC_0032511.jpg
Completed processing ISIC_0029279.jpg
Completed processing ISIC_0032040.jpg
Completed processing ISIC_0025303.jpg
Completed processing ISIC_0029288.jpg
Completed processing ISIC_0029947.jpg
Completed processing ISIC_0026927.jpg
Completed processing ISIC_0024665.jpg
Completed processing ISIC_0025277.jpg
Completed processing ISIC_0033088.jpg
Completed processing ISIC_0031350.jpg
Completed processing ISIC_0024412.jpg
Completed processing ISIC_0034068.jpg
Completed processing ISIC_0031007.jpg
Completed processing ISIC_0026720.jpg
Completed processing ISIC_0028231.jpg
Completed processing ISIC_0024931.jpg
Completed processing ISIC_0025209.jpg
Completed processing ISIC_0027057.jpg
Completed processing ISIC_0028173.jpg
Completed processing ISIC_0033559.jpg
Completed pr

Completed processing ISIC_0028858.jpg
Completed processing ISIC_0032552.jpg
Completed processing ISIC_0032420.jpg
Completed processing ISIC_0030706.jpg
Completed processing ISIC_0032356.jpg
Completed processing ISIC_0032450.jpg
Completed processing ISIC_0034085.jpg
Completed processing ISIC_0030758.jpg
Completed processing ISIC_0024747.jpg
Completed processing ISIC_0028807.jpg
Completed processing ISIC_0026282.jpg
Completed processing ISIC_0029570.jpg
Completed processing ISIC_0028445.jpg
Completed processing ISIC_0024829.jpg
Completed processing ISIC_0033553.jpg
Completed processing ISIC_0034239.jpg
Completed processing ISIC_0031339.jpg
Completed processing ISIC_0027875.jpg
Completed processing ISIC_0027326.jpg
Completed processing ISIC_0025285.jpg
Completed processing ISIC_0030511.jpg
Completed processing ISIC_0024337.jpg
Completed processing ISIC_0033872.jpg
Completed processing ISIC_0034011.jpg
Completed processing ISIC_0024707.jpg
Completed processing ISIC_0031050.jpg
Completed pr

Completed processing ISIC_0032887.jpg
Completed processing ISIC_0033868.jpg
Completed processing ISIC_0033687.jpg
Completed processing ISIC_0028969.jpg
Completed processing ISIC_0026930.jpg
Completed processing ISIC_0031408.jpg
Completed processing ISIC_0031243.jpg
Completed processing ISIC_0032709.jpg
Completed processing ISIC_0031459.jpg
Completed processing ISIC_0033151.jpg
Completed processing ISIC_0030722.jpg
Completed processing ISIC_0032768.jpg
Completed processing ISIC_0031570.jpg
Completed processing ISIC_0025512.jpg
Completed processing ISIC_0028977.jpg
Completed processing ISIC_0031449.jpg
Completed processing ISIC_0034028.jpg
Completed processing ISIC_0025531.jpg
Completed processing ISIC_0031986.jpg
Completed processing ISIC_0033646.jpg
Completed processing ISIC_0034140.jpg
Completed processing ISIC_0027673.jpg
Completed processing ISIC_0027693.jpg
Completed processing ISIC_0025425.jpg
Completed processing ISIC_0024800.jpg
Completed processing ISIC_0031191.jpg
Completed pr

Completed processing ISIC_0032246.jpg
Completed processing ISIC_0027171.jpg
Completed processing ISIC_0033488.jpg
Completed processing ISIC_0028017.jpg
Completed processing ISIC_0033458.jpg
Completed processing ISIC_0026316.jpg
Completed processing ISIC_0027888.jpg
Completed processing ISIC_0032019.jpg
Completed processing ISIC_0027560.jpg
Completed processing ISIC_0029372.jpg
Completed processing ISIC_0030689.jpg
Completed processing ISIC_0024420.jpg
Completed processing ISIC_0026626.jpg
Completed processing ISIC_0033204.jpg
Completed processing ISIC_0031378.jpg
Completed processing ISIC_0025955.jpg
Completed processing ISIC_0033114.jpg
Completed processing ISIC_0028201.jpg
Completed processing ISIC_0028820.jpg
Completed processing ISIC_0029129.jpg
Completed processing ISIC_0031119.jpg
Completed processing ISIC_0033576.jpg
Completed processing ISIC_0033344.jpg
Completed processing ISIC_0031601.jpg
Completed processing ISIC_0026968.jpg
Completed processing ISIC_0029843.jpg
Completed pr

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


Completed processing ISIC_0029147.jpg
Completed processing ISIC_0024446.jpg
Completed processing ISIC_0032190.jpg
Completed processing ISIC_0025744.jpg
Completed processing ISIC_0032508.jpg
Completed processing ISIC_0025366.jpg
Completed processing ISIC_0027060.jpg
Completed processing ISIC_0025596.jpg
Completed processing ISIC_0034159.jpg
Completed processing ISIC_0024726.jpg
Completed processing ISIC_0029034.jpg
Completed processing ISIC_0024970.jpg
Completed processing ISIC_0024696.jpg
Completed processing ISIC_0024496.jpg
Completed processing ISIC_0031967.jpg
Completed processing ISIC_0031088.jpg
Completed processing ISIC_0026393.jpg
Completed processing ISIC_0024833.jpg
Completed processing ISIC_0024345.jpg
Completed processing ISIC_0029081.jpg
Completed processing ISIC_0030360.jpg
Completed processing ISIC_0026388.jpg
Completed processing ISIC_0034104.jpg
Completed processing ISIC_0027745.jpg
Completed processing ISIC_0033241.jpg
Completed processing ISIC_0029040.jpg
Completed pr

Completed processing ISIC_0033666.jpg
Completed processing ISIC_0033884.jpg
Completed processing ISIC_0030446.jpg
Completed processing ISIC_0027504.jpg
Completed processing ISIC_0027008.jpg
Completed processing ISIC_0028146.jpg
Completed processing ISIC_0027956.jpg
Completed processing ISIC_0031648.jpg
Completed processing ISIC_0028994.jpg
Completed processing ISIC_0029021.jpg
Completed processing ISIC_0027172.jpg
Completed processing ISIC_0026738.jpg
Completed processing ISIC_0033606.jpg
Completed processing ISIC_0033575.jpg
Completed processing ISIC_0025617.jpg
Completed processing ISIC_0024431.jpg
Completed processing ISIC_0031940.jpg
Completed processing ISIC_0030623.jpg
Completed processing ISIC_0030002.jpg
Completed processing ISIC_0025368.jpg
Completed processing ISIC_0032906.jpg
Completed processing ISIC_0031008.jpg
Completed processing ISIC_0032653.jpg
Completed processing ISIC_0033565.jpg
Completed processing ISIC_0033156.jpg
Completed processing ISIC_0026774.jpg
Completed pr

In [26]:
target_names = y_test.value_counts()

y_train = y_train.values
y_test = y_test.values

In [27]:
def one_hot_encode_object_array(arr):
    # One hot encode a numpy array of objects (e.g. strings)'''
    uniques, ids = np.unique(arr, return_inverse=True)
    return np_utils.to_categorical(ids, len(uniques))

y_train = one_hot_encode_object_array(y_train)
y_test = one_hot_encode_object_array(y_test)

In [28]:
def cnn(size, n_layers):
    # INPUTS
    # size     - size of the input images
    # n_layers - number of layers
    # OUTPUTS
    # model    - compiled CNN

    # Define hyperparamters
    MIN_NEURONS = 20
    MAX_NEURONS = 120 # change this, make it 256, 512
    KERNEL = (3, 3)

    # Determine the # of neurons in each convolutional layer
    steps = np.floor(MAX_NEURONS / (n_layers + 1))
    nuerons = np.arange(MIN_NEURONS, MAX_NEURONS, steps)
    nuerons = nuerons.astype(np.int32)

    # Define a model
    model = Sequential()

    # Add convolutional layers
    for i in range(0, n_layers):
        if i == 0:
            shape = (size[0], size[1], size[2])
            model.add(Conv2D(nuerons[i], KERNEL, input_shape=shape))
        else:
            model.add(Conv2D(nuerons[i], KERNEL))

        model.add(Activation('relu'))
        model.add(Dropout(0.25))

    # Add max pooling layer
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.50))
    model.add(Flatten())
    model.add(Dense(MAX_NEURONS))
    model.add(Activation('relu'))
    model.add(Dense(MAX_NEURONS))
    model.add(Activation('relu'))
    
    # Add output layer
    model.add(Dense(6))
    model.add(Activation('softmax'))
    
    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Print a summary of the model
    model.summary()

    return model

In [29]:
# Instantiate the model
N_LAYERS = 2 # change value from 4 layers
image_size = np.asarray([x_train.shape[1], x_train.shape[2], x_train.shape[3]])
model = cnn(size=image_size, n_layers=N_LAYERS)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 298, 223, 20)      560       
_________________________________________________________________
activation_1 (Activation)    (None, 298, 223, 20)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 298, 223, 20)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 296, 221, 60)      10860     
_________________________________________________________________
activation_2 (Activation)    (None, 296, 221, 60)      0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 296, 221, 60)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 148, 110, 60)      0         
__________

In [30]:
# Training hyperparamters
EPOCHS = 16
BATCH_SIZE = 16

In [31]:
# Evaluate the Train Data
# model.evaluate(x_train, y_train, batch_size=BATCH_SIZE, verbose=1)

In [32]:
# Train the model
model_history = model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [33]:
# Make a prediction on the test set
test_predictions = model.predict(x_test)
#test_predictions = np.round(test_predictions)

In [34]:
# Check the accuracy
accuracy = accuracy_score(y_test, test_predictions)
print("Accuracy: " + str(accuracy))

Accuracy: 0.3867069486404834


In [35]:
# Check the Previous Value Counts of y_test and compare with the classification report below
target_names

bkl      128
mel      104
bcc       41
akiec     32
vasc      14
df        12
Name: dx, dtype: int64

In [36]:
# Check the Classification Report
print(classification_report(y_test, test_predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.00      0.00      0.00        41
           2       0.39      1.00      0.56       128
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00       104
           5       0.00      0.00      0.00        14

   micro avg       0.39      0.39      0.39       331
   macro avg       0.06      0.17      0.09       331
weighted avg       0.15      0.39      0.22       331
 samples avg       0.39      0.39      0.39       331



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


## Analysis...
- There is a huge data imbalance which causes the rest of the data to be classified wrongly as nv
- Will re-run this by combining all dx (not nv) as others and compare again
- Update the CNN function by adding dropout layers

In [37]:
# Save the Model
model_file = open('model-eda-preliminary-cnn4.sav','wb')
pickle.dump(model, model_file)
model_file.close()