# Dermatologist-AI Project: Single Network Approach
Workbook for a single network approach that classifies images into melanomas, nevus, or SBK

Import image files

In [1]:
import numpy as np
from sklearn.datasets import load_files       
from keras.utils import np_utils
from glob import glob
import pickle

#define dataset import function
def load_dataset(path, shuffle):
    data = load_files(path, shuffle=shuffle)
    file_paths = np.array(data['filenames'])
    one_hot_labels = np_utils.to_categorical(np.array(data['target']),3)
    return file_paths, one_hot_labels

#import datasets
train_files, train_labels = load_dataset('../data/train', True)
valid_files, valid_labels = load_dataset('../data/valid', True)
test_files, test_labels = load_dataset('../data/test', False)

# load list of skin condition names
skin_names = [item[14:-1] for item in sorted(glob("../data/train/*/"))]

# print statistics about the dataset
print('There are %d total categories.' % len(skin_names))
print('There are %s total images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training images.' % len(train_files))
print('There are %d validation images.' % len(valid_files))
print('There are %d test images.'% len(test_files))

Using TensorFlow backend.


There are 3 total categories.
There are 2750 total images.

There are 2000 training images.
There are 150 validation images.
There are 600 test images.


Convert images into 4D tensors

In [2]:
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from tqdm import tqdm
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

img_width = 224
img_height = 224

#define image processing functions (from udacity dog project)
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(img_width, img_height))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    x = preprocess_input(x) # convert format to VGG19 compatible
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)   

In [None]:
# Pre-process the data for Keras
# Skip if we already have pickled files
train_tensors = paths_to_tensor(train_files).astype('float32')
valid_tensors = paths_to_tensor(valid_files).astype('float32')
test_tensors = paths_to_tensor(test_files).astype('float32')

In [4]:
# Pickle tensors to pick up where I left off in future
# Skip if we already have pickled files

with open('train_tensors.pickle', 'wb') as handle:
    pickle.dump(train_tensors, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('valid_tensors.pickle', 'wb') as handle:
    pickle.dump(valid_tensors, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('test_tensors.pickle', 'wb') as handle:
    pickle.dump(test_tensors, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
# Un-pickle tensors
with open('train_tensors.pickle', 'rb') as handle:
    train_tensors = pickle.load(handle)
    
with open('valid_tensors.pickle', 'rb') as handle:
    valid_tensors = pickle.load(handle)

with open('test_tensors.pickle', 'rb') as handle:
    test_tensors = pickle.load(handle)

Create a new network based on first 2 blocks of VGGNet, plus 1 trainable convolutional block, followed by fully connected layer

In [4]:
from keras import applications
from keras.models import Sequential, Model 
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense

network = Sequential()

network.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='block1_conv1', 
                        input_shape=(224, 224, 3)))
network.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', name='block1_conv2'))
network.add(MaxPooling2D(pool_size=2, name='block1_pool'))
network.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', name='block2_conv1'))
network.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', name='block2_conv2'))
network.add(MaxPooling2D(pool_size=2, name='block2_pool'))
network.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', name='block3_conv1'))
network.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', name='block3_conv2'))
network.add(MaxPooling2D(pool_size=2, name='block3_pool'))
network.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', name='block4_conv4'))
network.add(MaxPooling2D(pool_size=2, name='block4_pool'))
network.add(Flatten())
network.add(Dense(256, activation='relu'))
network.add(Dropout(0.1))
network.add(Dense(256, activation='relu'))
network.add(Dropout(0.1))
network.add(Dense(3, activation='softmax'))

network.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 32)      896       
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 32)      9248      
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 32)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 64)      18496     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 64)      36928     
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 64)        0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 128)       73856     
__________

In [5]:
from keras import optimizers
opt = optimizers.rmsprop(lr=0.001, decay=1e-5)
network.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [6]:
layer = network.layers[0]
weights = layer.get_weights()
print(weights[0][0][0][0])

layer = network.layers[1]
weights = layer.get_weights()
print(weights[0][0][1][1])

[-0.08407555  0.0898973  -0.11789246  0.07885088  0.01906516 -0.01534862
  0.06545247  0.08131252 -0.12821057 -0.0716939  -0.11466567  0.1022532
 -0.11136031 -0.03130054 -0.00631005  0.05485012 -0.0221526  -0.11823421
 -0.01435578 -0.03684946 -0.04736159 -0.09313647 -0.05099712  0.00373368
  0.02384506  0.0475778  -0.03917969 -0.09484509 -0.08085485  0.03216121
  0.00047222  0.09072375]
[ 0.08358634  0.03556395  0.05611759 -0.05931219  0.01745763  0.03733291
  0.08405814  0.0496532  -0.0580952  -0.07578161 -0.03508082 -0.04225526
 -0.02258917  0.04271607 -0.08879847 -0.01265575 -0.06838452  0.01660739
 -0.05623283 -0.10075863 -0.03741194 -0.04985768  0.09860714 -0.07436102
 -0.08941995 -0.02911378 -0.07874042 -0.00496401 -0.07723515  0.08905144
  0.09574768 -0.04981188]


In [11]:
from keras.callbacks import ModelCheckpoint, Callback
epochs = 1

checkpointer = ModelCheckpoint(filepath='saved_models/best.weights.hdf5', 
                               verbose=1, save_best_only=True)

network.fit(valid_tensors[:19], valid_labels[:19],
            validation_data=(valid_tensors, valid_labels),
            epochs=epochs,
            batch_size=20,
            callbacks=[checkpointer],
            verbose=1)
                    

Train on 19 samples, validate on 150 samples
Epoch 1/1
Epoch 00000: val_loss improved from inf to 7.73669, saving model to saved_models/best.weights.hdf5


<keras.callbacks.History at 0x1d580384ef0>

In [8]:
layer = network.layers[0]
weights = layer.get_weights()
print(weights[0][0][0][0])

layer = network.layers[1]
weights = layer.get_weights()
print(weights[0][0][1][1])

[-0.08091331  0.08673506 -0.11473022  0.07568864  0.01590291 -0.01218638
  0.06861471  0.08447476 -0.13137282 -0.07485614 -0.11150343  0.09909096
 -0.11452255 -0.02813831 -0.00947229  0.05801236 -0.01899038 -0.12139645
 -0.01751802 -0.0400117  -0.05052383 -0.09629871 -0.05415936  0.00689592
  0.02068282  0.04441556 -0.03601744 -0.09168284 -0.08401709  0.03532345
  0.00363446  0.093886  ]
[ 0.08674857  0.03872617  0.05295535 -0.05614994  0.01429538  0.03417067
  0.0808959   0.05281544 -0.06125744 -0.07261938 -0.03191858 -0.03909305
 -0.02575133  0.03955384 -0.08563669 -0.00949351 -0.06522228  0.01976943
 -0.05307069 -0.0975964  -0.0342497  -0.04669544  0.10176938 -0.0711988
 -0.0862577  -0.03227602 -0.08190266 -0.00180177 -0.07407291  0.0858893
  0.09258544 -0.04664964]


In [10]:
layer = network.layers[0]
weights = layer.get_weights()
print(weights[0][0][0][0])

layer = network.layers[1]
weights = layer.get_weights()
print(weights[0][0][1][1])

[-0.08091331  0.08673506 -0.11473022  0.07568864  0.01590291 -0.01218638
  0.06861471  0.08447476 -0.13137282 -0.07485614 -0.11150343  0.09909096
 -0.11452255 -0.02813831 -0.00947229  0.05801236 -0.01899038 -0.12139645
 -0.01751802 -0.0400117  -0.05052383 -0.09629871 -0.05415936  0.00689592
  0.02068282  0.04441556 -0.03601744 -0.09168284 -0.08401709  0.03532345
  0.00363446  0.093886  ]
[ 0.08674857  0.03872617  0.05295535 -0.05614994  0.01429538  0.03417067
  0.0808959   0.05281544 -0.06125744 -0.07261938 -0.03191858 -0.03909305
 -0.02575133  0.03955384 -0.08563669 -0.00949351 -0.06522228  0.01976943
 -0.05307069 -0.0975964  -0.0342497  -0.04669544  0.10176938 -0.0711988
 -0.0862577  -0.03227602 -0.08190266 -0.00180177 -0.07407291  0.0858893
  0.09258544 -0.04664964]


In [12]:
layer = network.layers[0]
weights = layer.get_weights()
print(weights[0][0][0][0])

layer = network.layers[1]
weights = layer.get_weights()
print(weights[0][0][1][1])

[-0.08091331  0.08673506 -0.11473022  0.07568864  0.01590291 -0.01218638
  0.06861471  0.08447476 -0.13137282 -0.07485614 -0.11150343  0.09909096
 -0.11452255 -0.02813831 -0.00947229  0.05801236 -0.01899038 -0.12139645
 -0.01751802 -0.0400117  -0.05052383 -0.09629871 -0.05415936  0.00689592
  0.02068282  0.04441556 -0.03601744 -0.09168284 -0.08401709  0.03532345
  0.00363446  0.093886  ]
[ 0.08674857  0.03872617  0.05295535 -0.05614994  0.01429538  0.03417067
  0.0808959   0.05281544 -0.06125744 -0.07261938 -0.03191858 -0.03909305
 -0.02575133  0.03955384 -0.08563669 -0.00949351 -0.06522228  0.01976943
 -0.05307069 -0.0975964  -0.0342497  -0.04669544  0.10176938 -0.0711988
 -0.0862577  -0.03227602 -0.08190266 -0.00180177 -0.07407291  0.0858893
  0.09258544 -0.04664964]


## Create output excel

In [9]:
# Load the model weights with the best validation loss.

network.load_weights('saved_models/best.weights.hdf5')

In [10]:
import pandas as pd

y_pred = pd.DataFrame(columns=["task_1", "task_2"])

for ii in tqdm(range(len(test_files))):
    path = test_files[ii]
    prediction = np.argmax(network.predict(np.expand_dims(test_tensors[ii], axis=0)))
    if prediction == 0:
        y_pred.loc[path] = [1, 0]
    if prediction == 2:
        y_pred.loc[path] = [0, 1]
    else:
        y_pred.loc[path] = [0, 0]

y_pred.to_csv("predictions.csv")

100%|███████████████████████████████████████████████████████████████████████████████| 600/600 [00:04<00:00, 143.42it/s]
