Load Dataset

In [1]:
import timeit
start_time = timeit.default_timer()

def print_elapsed_time():
    start_time = timeit.default_timer()
    
def start_timer():
    elapsed = (timeit.default_timer() - start_time)/60
    print(elapsed)

In [2]:
from sklearn.datasets import load_files   
from keras.utils import np_utils
import numpy as np
from glob import glob




def load_dataset(data_path, shuffle=None):
    kwargs = {}
    if shuffle != None:
        kwargs['shuffle'] = shuffle
    data = load_files(data_path, **kwargs)
    img_files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return img_files, targets



start_timer()
train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test', shuffle=False)

# load lables
label_name = [item[11:-1] for item in sorted(glob("data/train/*/"))]

print_elapsed_time()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


0.3811088810837933


In [3]:
start_timer()

print('train_files size: {}'.format(len(train_files)))
print('train_files shape: {}'.format(train_files.shape))
print('target shape: {}'.format(train_targets.shape))
print(label_name)

start_time = timeit.default_timer()
elapsed = (timeit.default_timer() - start_time)/60
print(elapsed)


print_elapsed_time()


5.190188388953173
train_files size: 2000
train_files shape: (2000,)
target shape: (2000, 3)
['melanoma', 'nevus', 'seborrheic_keratosis']
7.765273693394192e-07


In [4]:
start_timer()
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from tqdm import tqdm


def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(384, 256))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(path) for path in image_paths])

print_elapsed_time()


0.0003094760232234724


Transfer Learning and putting images into tensor.

In [5]:
start_timer()

train_tensors = paths_to_tensor(tqdm(train_files))
valid_tensors = paths_to_tensor(tqdm(valid_files))
test_tensors = paths_to_tensor(tqdm(test_files))

print(train_tensors.shape)

print_elapsed_time()


0.0024611650982516646


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [06:17<00:00,  5.30it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [00:52<00:00,  2.84it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [05:25<00:00,  1.84it/s]


(2000, 384, 256, 3)


Flip Images

In [6]:
start_timer()


import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator


apply_train_image_transform = False

if apply_train_image_transform:
    # Caution: Doesn't guarantee prevention of duplication.
    datagen_train = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True)
    
    datagen_train.fit(train_tensors)
    shape = (train_tensors.shape[0] * 2,) + train_tensors.shape[1:]
    generated = np.ndarray(shape=shape)
    for i, image in tqdm(enumerate(train_tensors)):
        generated[i] = datagen_train.random_transform(image)
    
    train_tensors = np.concatenate((train_tensors, generated))
    train_targets = train_targets.repeat(2, axis=0)
    
print_elapsed_time()


12.656683204139657


Transfer learning using Inception Resnet V2¶

In [7]:
start_timer()

train_imgs_preprocess = preprocess_input(train_tensors)
valid_imgs_preprocess = preprocess_input(valid_tensors)
test_imgs_preprocess = preprocess_input(test_tensors)
del train_tensors, valid_tensors, test_tensors

print_elapsed_time()


12.681337163063922


In [8]:
start_timer()

from keras.applications.inception_resnet_v2 import InceptionResNetV2
transfer_model = InceptionResNetV2(include_top=False)

train_data = transfer_model.predict(train_imgs_preprocess)
valid_data = transfer_model.predict(valid_imgs_preprocess)
test_data = transfer_model.predict(test_imgs_preprocess)

del train_imgs_preprocess, valid_imgs_preprocess, test_imgs_preprocess
print(train_data.shape)

print_elapsed_time()


12.714697982052833
(2000, 10, 6, 1536)


In [9]:
start_timer()

from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
my_model.add(Dropout(0.2))
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.2))
my_model.add(Dense(3, activation='softmax'))
my_model.summary()

print_elapsed_time()


67.14202191513837
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 1536)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              1573888   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 3075      
Total params: 1,576,963
Trainable params: 1,576,963
Non-trainable params: 0
_________________________________________________________________


In [10]:
start_timer()

my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print_elapsed_time()


67.14523349580675


In [11]:
start_timer()

from keras.callbacks import ModelCheckpoint


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_data, train_targets, 
          validation_data=(valid_data, valid_targets),
          epochs=70, batch_size=200, callbacks=[my_checkpointer], verbose=1)

print_elapsed_time()


67.14736855354285
Train on 2000 samples, validate on 150 samples
Epoch 1/70

Epoch 00001: val_loss improved from inf to 5.62745, saving model to saved_models/weights.best.my.hdf5
Epoch 2/70

Epoch 00002: val_loss improved from 5.62745 to 4.82141, saving model to saved_models/weights.best.my.hdf5
Epoch 3/70

Epoch 00003: val_loss improved from 4.82141 to 1.00587, saving model to saved_models/weights.best.my.hdf5
Epoch 4/70

Epoch 00004: val_loss improved from 1.00587 to 0.95643, saving model to saved_models/weights.best.my.hdf5
Epoch 5/70

Epoch 00005: val_loss did not improve from 0.95643
Epoch 6/70

Epoch 00006: val_loss improved from 0.95643 to 0.90168, saving model to saved_models/weights.best.my.hdf5
Epoch 7/70

Epoch 00007: val_loss improved from 0.90168 to 0.87918, saving model to saved_models/weights.best.my.hdf5
Epoch 8/70

Epoch 00008: val_loss improved from 0.87918 to 0.85217, saving model to saved_models/weights.best.my.hdf5
Epoch 9/70

Epoch 00009: val_loss improved from 0.


Epoch 00015: val_loss improved from 0.78672 to 0.75301, saving model to saved_models/weights.best.my.hdf5
Epoch 16/70

Epoch 00016: val_loss improved from 0.75301 to 0.74670, saving model to saved_models/weights.best.my.hdf5
Epoch 17/70

Epoch 00017: val_loss improved from 0.74670 to 0.73232, saving model to saved_models/weights.best.my.hdf5
Epoch 18/70

Epoch 00018: val_loss improved from 0.73232 to 0.72821, saving model to saved_models/weights.best.my.hdf5
Epoch 19/70

Epoch 00019: val_loss did not improve from 0.72821
Epoch 20/70

Epoch 00020: val_loss did not improve from 0.72821
Epoch 21/70

Epoch 00021: val_loss did not improve from 0.72821
Epoch 22/70

Epoch 00022: val_loss did not improve from 0.72821
Epoch 23/70

Epoch 00023: val_loss improved from 0.72821 to 0.72723, saving model to saved_models/weights.best.my.hdf5
Epoch 24/70

Epoch 00024: val_loss did not improve from 0.72723
Epoch 25/70

Epoch 00025: val_loss did not improve from 0.72723
Epoch 26/70

Epoch 00026: val_los


Epoch 00029: val_loss did not improve from 0.68532
Epoch 30/70

Epoch 00030: val_loss did not improve from 0.68532
Epoch 31/70

Epoch 00031: val_loss did not improve from 0.68532
Epoch 32/70

Epoch 00032: val_loss did not improve from 0.68532
Epoch 33/70

Epoch 00033: val_loss did not improve from 0.68532
Epoch 34/70

Epoch 00034: val_loss did not improve from 0.68532
Epoch 35/70

Epoch 00035: val_loss did not improve from 0.68532
Epoch 36/70

Epoch 00036: val_loss did not improve from 0.68532
Epoch 37/70

Epoch 00037: val_loss did not improve from 0.68532
Epoch 38/70

Epoch 00038: val_loss did not improve from 0.68532
Epoch 39/70

Epoch 00039: val_loss did not improve from 0.68532
Epoch 40/70

Epoch 00040: val_loss did not improve from 0.68532
Epoch 41/70

Epoch 00041: val_loss did not improve from 0.68532
Epoch 42/70

Epoch 00042: val_loss improved from 0.68532 to 0.67479, saving model to saved_models/weights.best.my.hdf5
Epoch 43/70

Epoch 00043: val_loss did not improve from 0.674


Epoch 00045: val_loss did not improve from 0.67479
Epoch 46/70

Epoch 00046: val_loss did not improve from 0.67479
Epoch 47/70

Epoch 00047: val_loss did not improve from 0.67479
Epoch 48/70

Epoch 00048: val_loss did not improve from 0.67479
Epoch 49/70

Epoch 00049: val_loss did not improve from 0.67479
Epoch 50/70

Epoch 00050: val_loss did not improve from 0.67479
Epoch 51/70

Epoch 00051: val_loss did not improve from 0.67479
Epoch 52/70

Epoch 00052: val_loss did not improve from 0.67479
Epoch 53/70

Epoch 00053: val_loss did not improve from 0.67479
Epoch 54/70

Epoch 00054: val_loss did not improve from 0.67479
Epoch 55/70

Epoch 00055: val_loss did not improve from 0.67479
Epoch 56/70

Epoch 00056: val_loss did not improve from 0.67479
Epoch 57/70

Epoch 00057: val_loss did not improve from 0.67479
Epoch 58/70

Epoch 00058: val_loss did not improve from 0.67479
Epoch 59/70

Epoch 00059: val_loss did not improve from 0.67479
Epoch 60/70



Epoch 00060: val_loss did not improve from 0.67479
Epoch 61/70

Epoch 00061: val_loss did not improve from 0.67479
Epoch 62/70

Epoch 00062: val_loss did not improve from 0.67479
Epoch 63/70

Epoch 00063: val_loss improved from 0.67479 to 0.66946, saving model to saved_models/weights.best.my.hdf5
Epoch 64/70

Epoch 00064: val_loss did not improve from 0.66946
Epoch 65/70

Epoch 00065: val_loss did not improve from 0.66946
Epoch 66/70

Epoch 00066: val_loss did not improve from 0.66946
Epoch 67/70

Epoch 00067: val_loss did not improve from 0.66946
Epoch 68/70

Epoch 00068: val_loss did not improve from 0.66946
Epoch 69/70

Epoch 00069: val_loss did not improve from 0.66946
Epoch 70/70

Epoch 00070: val_loss did not improve from 0.66946


In [12]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()


68.97895079205792


Evaluavate 

In [13]:
start_timer()

import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_transfer.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        
print_elapsed_time()


68.99812342347877


Category 1 Score: 0.526
Category 2 Score: 0.606
Category 3 Score: 0.566

![ROC Curves](images/figure_1.png)

Without transfer learning, loading images into tensor

In [14]:
start_timer()
train_tensors = paths_to_tensor(tqdm(train_files))
train_tensors = train_tensors.astype('float32') / 255

valid_tensors = paths_to_tensor(tqdm(valid_files))
valid_tensors = valid_tensors.astype('float32') / 255

test_tensors = paths_to_tensor(tqdm(test_files))
test_tensors = test_tensors.astype('float32') / 255

print(train_tensors.shape)

print_elapsed_time()

69.04965888261978


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [06:33<00:00,  5.08it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [00:54<00:00,  2.74it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [05:33<00:00,  1.80it/s]


(2000, 384, 256, 3)


In [15]:
start_timer()
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', 
                        input_shape=train_tensors.shape[1:]))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=1024, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))

my_model.add(Conv2D(filters=2048, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))


my_model.add(GlobalAveragePooling2D())

my_model.add(Dense(3, activation='softmax'))

my_model.summary()

print_elapsed_time()

82.24814145427362
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_204 (Conv2D)          (None, 384, 256, 16)      448       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 192, 128, 16)      0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 192, 128, 16)      0         
_________________________________________________________________
conv2d_205 (Conv2D)          (None, 192, 128, 64)      9280      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 96, 64, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 96, 64, 64)        0         
_________________________________________________________________
conv2d_206 (Conv2D)          (None, 96, 64, 256)       147

In [16]:
start_timer()
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print_elapsed_time()

82.25484451532746


In [17]:
start_timer()


from keras.callbacks import ModelCheckpoint
import os


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=40, batch_size=100, callbacks=[my_checkpointer], verbose=1)


print_elapsed_time()

82.25640431134822
Train on 2000 samples, validate on 150 samples
Epoch 1/40

Epoch 00001: val_loss improved from inf to 7.73669, saving model to saved_models/weights.best.my.hdf5
Epoch 2/40

Epoch 00002: val_loss did not improve from 7.73669
Epoch 3/40

Epoch 00003: val_loss did not improve from 7.73669
Epoch 4/40

Epoch 00004: val_loss did not improve from 7.73669
Epoch 5/40

Epoch 00005: val_loss did not improve from 7.73669
Epoch 6/40

Epoch 00006: val_loss did not improve from 7.73669
Epoch 7/40

Epoch 00007: val_loss did not improve from 7.73669
Epoch 8/40

Epoch 00008: val_loss did not improve from 7.73669
Epoch 9/40



Epoch 00009: val_loss did not improve from 7.73669
Epoch 10/40

Epoch 00010: val_loss did not improve from 7.73669
Epoch 11/40

Epoch 00011: val_loss did not improve from 7.73669
Epoch 12/40

Epoch 00012: val_loss did not improve from 7.73669
Epoch 13/40

Epoch 00013: val_loss did not improve from 7.73669
Epoch 14/40

Epoch 00014: val_loss did not improve from 7.73669
Epoch 15/40

Epoch 00015: val_loss did not improve from 7.73669
Epoch 16/40

Epoch 00016: val_loss did not improve from 7.73669
Epoch 17/40



Epoch 00017: val_loss did not improve from 7.73669
Epoch 18/40

Epoch 00018: val_loss did not improve from 7.73669
Epoch 19/40

Epoch 00019: val_loss did not improve from 7.73669
Epoch 20/40

Epoch 00020: val_loss did not improve from 7.73669
Epoch 21/40

Epoch 00021: val_loss did not improve from 7.73669
Epoch 22/40

Epoch 00022: val_loss did not improve from 7.73669
Epoch 23/40

Epoch 00023: val_loss did not improve from 7.73669
Epoch 24/40

Epoch 00024: val_loss did not improve from 7.73669
Epoch 25/40



Epoch 00025: val_loss did not improve from 7.73669
Epoch 26/40

Epoch 00026: val_loss did not improve from 7.73669
Epoch 27/40

Epoch 00027: val_loss did not improve from 7.73669
Epoch 28/40

Epoch 00028: val_loss did not improve from 7.73669
Epoch 29/40

Epoch 00029: val_loss did not improve from 7.73669
Epoch 30/40

Epoch 00030: val_loss did not improve from 7.73669
Epoch 31/40

Epoch 00031: val_loss did not improve from 7.73669
Epoch 32/40

Epoch 00032: val_loss did not improve from 7.73669
Epoch 33/40



Epoch 00033: val_loss did not improve from 7.73669
Epoch 34/40

Epoch 00034: val_loss did not improve from 7.73669
Epoch 35/40

Epoch 00035: val_loss did not improve from 7.73669
Epoch 36/40

Epoch 00036: val_loss did not improve from 7.73669
Epoch 37/40

Epoch 00037: val_loss did not improve from 7.73669
Epoch 38/40

Epoch 00038: val_loss did not improve from 7.73669
Epoch 39/40

Epoch 00039: val_loss did not improve from 7.73669
Epoch 40/40

Epoch 00040: val_loss did not improve from 7.73669


In [18]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()

1185.5332182366174


In [19]:
start_timer()


import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_tensors]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_cnn.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        

print_elapsed_time()

1185.5665660679726
