Load Dataset

In [36]:
import timeit
start_time = timeit.default_timer()

def print_elapsed_time():
    start_time = timeit.default_timer()
    
def start_timer():
    elapsed = (timeit.default_timer() - start_time)/60
    print(elapsed)

In [19]:
from sklearn.datasets import load_files   
from keras.utils import np_utils
import numpy as np
from glob import glob




def load_dataset(data_path, shuffle=None):
    kwargs = {}
    if shuffle != None:
        kwargs['shuffle'] = shuffle
    data = load_files(data_path, **kwargs)
    img_files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return img_files, targets



start_timer()
train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test', shuffle=False)

# load lables
label_name = [item[11:-1] for item in sorted(glob("data/train/*/"))]

print_elapsed_time()

4.838409904550645e-06


In [20]:
start_timer()

print('train_files size: {}'.format(len(train_files)))
print('train_files shape: {}'.format(train_files.shape))
print('target shape: {}'.format(train_targets.shape))
print(label_name)

start_time = timeit.default_timer()
elapsed = (timeit.default_timer() - start_time)/60
print(elapsed)


print_elapsed_time()


4.641631641528268
train_files size: 2000
train_files shape: (2000,)
target shape: (2000, 3)
['melanoma', 'nevus', 'seborrheic_keratosis']
9.130685384661774e-07


In [21]:
start_timer()
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from tqdm import tqdm


def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(384, 256))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(path) for path in image_paths])

print_elapsed_time()


0.0010503104177056837


Transfer Learning and putting images into tensor.

In [22]:
start_timer()

train_tensors = paths_to_tensor(tqdm(train_files))
valid_tensors = paths_to_tensor(tqdm(valid_files))
test_tensors = paths_to_tensor(tqdm(test_files))

print(train_tensors.shape)

print_elapsed_time()


0.0014982516017501741


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [07:03<00:00,  4.72it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [00:58<00:00,  2.57it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [05:40<00:00,  1.76it/s]


(2000, 384, 256, 3)


Flip Images

In [23]:
start_timer()


import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator


apply_train_image_transform = False

if apply_train_image_transform:
    # Caution: Doesn't guarantee prevention of duplication.
    datagen_train = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True)
    
    datagen_train.fit(train_tensors)
    shape = (train_tensors.shape[0] * 2,) + train_tensors.shape[1:]
    generated = np.ndarray(shape=shape)
    for i, image in tqdm(enumerate(train_tensors)):
        generated[i] = datagen_train.random_transform(image)
    
    train_tensors = np.concatenate((train_tensors, generated))
    train_targets = train_targets.repeat(2, axis=0)
    
print_elapsed_time()


13.792365996098898


Transfer learning using Inception Resnet V2¶

In [24]:
start_timer()

train_imgs_preprocess = preprocess_input(train_tensors)
valid_imgs_preprocess = preprocess_input(valid_tensors)
test_imgs_preprocess = preprocess_input(test_tensors)
del train_tensors, valid_tensors, test_tensors

print_elapsed_time()


13.793053152706186


In [25]:
start_timer()

from keras.applications.inception_resnet_v2 import InceptionResNetV2
transfer_model = InceptionResNetV2(include_top=False)

train_data = transfer_model.predict(train_imgs_preprocess)
valid_data = transfer_model.predict(valid_imgs_preprocess)
test_data = transfer_model.predict(test_imgs_preprocess)

del train_imgs_preprocess, valid_imgs_preprocess, test_imgs_preprocess
print(train_data.shape)

print_elapsed_time()


13.832828193098809
(2000, 10, 6, 1536)


In [26]:
start_timer()

from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
my_model.add(Dropout(0.2))
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.2))
my_model.add(Dense(3, activation='softmax'))
my_model.summary()

print_elapsed_time()


67.63998663335929
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_2 ( (None, 1536)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              1573888   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 3075      
Total params: 1,576,963
Trainable params: 1,576,963
Non-trainable params: 0
_________________________________________________________________


In [27]:
start_timer()

my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print_elapsed_time()


67.64254146739158


In [28]:
start_timer()

from keras.callbacks import ModelCheckpoint


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_data, train_targets, 
          validation_data=(valid_data, valid_targets),
          epochs=70, batch_size=200, callbacks=[my_checkpointer], verbose=1)

print_elapsed_time()


67.64438656557036
Train on 2000 samples, validate on 150 samples
Epoch 1/70

Epoch 00001: val_loss improved from inf to 0.99318, saving model to saved_models/weights.best.my.hdf5
Epoch 2/70

Epoch 00002: val_loss did not improve from 0.99318
Epoch 3/70

Epoch 00003: val_loss improved from 0.99318 to 0.90020, saving model to saved_models/weights.best.my.hdf5
Epoch 4/70

Epoch 00004: val_loss improved from 0.90020 to 0.88822, saving model to saved_models/weights.best.my.hdf5
Epoch 5/70

Epoch 00005: val_loss improved from 0.88822 to 0.88480, saving model to saved_models/weights.best.my.hdf5
Epoch 6/70

Epoch 00006: val_loss improved from 0.88480 to 0.83818, saving model to saved_models/weights.best.my.hdf5
Epoch 7/70

Epoch 00007: val_loss did not improve from 0.83818
Epoch 8/70

Epoch 00008: val_loss improved from 0.83818 to 0.78468, saving model to saved_models/weights.best.my.hdf5
Epoch 9/70

Epoch 00009: val_loss did not improve from 0.78468
Epoch 10/70

Epoch 00010: val_loss did not


Epoch 00015: val_loss did not improve from 0.72248
Epoch 16/70

Epoch 00016: val_loss did not improve from 0.72248
Epoch 17/70

Epoch 00017: val_loss improved from 0.72248 to 0.71319, saving model to saved_models/weights.best.my.hdf5
Epoch 18/70

Epoch 00018: val_loss improved from 0.71319 to 0.70414, saving model to saved_models/weights.best.my.hdf5
Epoch 19/70

Epoch 00019: val_loss improved from 0.70414 to 0.68834, saving model to saved_models/weights.best.my.hdf5
Epoch 20/70

Epoch 00020: val_loss did not improve from 0.68834
Epoch 21/70

Epoch 00021: val_loss did not improve from 0.68834
Epoch 22/70

Epoch 00022: val_loss did not improve from 0.68834
Epoch 23/70

Epoch 00023: val_loss did not improve from 0.68834
Epoch 24/70

Epoch 00024: val_loss did not improve from 0.68834
Epoch 25/70

Epoch 00025: val_loss did not improve from 0.68834
Epoch 26/70

Epoch 00026: val_loss did not improve from 0.68834
Epoch 27/70

Epoch 00027: val_loss improved from 0.68834 to 0.67879, saving mod

Epoch 00029: val_loss improved from 0.67879 to 0.66892, saving model to saved_models/weights.best.my.hdf5
Epoch 30/70

Epoch 00030: val_loss did not improve from 0.66892
Epoch 31/70

Epoch 00031: val_loss did not improve from 0.66892
Epoch 32/70

Epoch 00032: val_loss improved from 0.66892 to 0.66741, saving model to saved_models/weights.best.my.hdf5
Epoch 33/70

Epoch 00033: val_loss did not improve from 0.66741
Epoch 34/70

Epoch 00034: val_loss did not improve from 0.66741
Epoch 35/70

Epoch 00035: val_loss did not improve from 0.66741
Epoch 36/70

Epoch 00036: val_loss did not improve from 0.66741
Epoch 37/70

Epoch 00037: val_loss did not improve from 0.66741
Epoch 38/70

Epoch 00038: val_loss did not improve from 0.66741
Epoch 39/70

Epoch 00039: val_loss improved from 0.66741 to 0.66655, saving model to saved_models/weights.best.my.hdf5
Epoch 40/70

Epoch 00040: val_loss improved from 0.66655 to 0.65706, saving model to saved_models/weights.best.my.hdf5
Epoch 41/70

Epoch 00041:

Epoch 00044: val_loss did not improve from 0.65706
Epoch 45/70

Epoch 00045: val_loss did not improve from 0.65706
Epoch 46/70

Epoch 00046: val_loss improved from 0.65706 to 0.63663, saving model to saved_models/weights.best.my.hdf5
Epoch 47/70

Epoch 00047: val_loss did not improve from 0.63663
Epoch 48/70

Epoch 00048: val_loss did not improve from 0.63663
Epoch 49/70

Epoch 00049: val_loss did not improve from 0.63663
Epoch 50/70

Epoch 00050: val_loss did not improve from 0.63663
Epoch 51/70

Epoch 00051: val_loss did not improve from 0.63663
Epoch 52/70

Epoch 00052: val_loss did not improve from 0.63663
Epoch 53/70

Epoch 00053: val_loss did not improve from 0.63663
Epoch 54/70

Epoch 00054: val_loss did not improve from 0.63663
Epoch 55/70

Epoch 00055: val_loss did not improve from 0.63663
Epoch 56/70

Epoch 00056: val_loss did not improve from 0.63663
Epoch 57/70

Epoch 00057: val_loss did not improve from 0.63663
Epoch 58/70

Epoch 00058: val_loss did not improve from 0.6366


Epoch 00060: val_loss did not improve from 0.63663
Epoch 61/70

Epoch 00061: val_loss did not improve from 0.63663
Epoch 62/70

Epoch 00062: val_loss did not improve from 0.63663
Epoch 63/70

Epoch 00063: val_loss did not improve from 0.63663
Epoch 64/70

Epoch 00064: val_loss did not improve from 0.63663
Epoch 65/70

Epoch 00065: val_loss did not improve from 0.63663
Epoch 66/70

Epoch 00066: val_loss did not improve from 0.63663
Epoch 67/70

Epoch 00067: val_loss did not improve from 0.63663
Epoch 68/70

Epoch 00068: val_loss did not improve from 0.63663
Epoch 69/70

Epoch 00069: val_loss did not improve from 0.63663
Epoch 70/70

Epoch 00070: val_loss did not improve from 0.63663


In [29]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()


69.74688307414986


Evaluavate 

In [30]:
start_timer()

import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_transfer.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        
print_elapsed_time()


69.79439494532085


Category 1 Score: 0.526
Category 2 Score: 0.606
Category 3 Score: 0.566

![ROC Curves](images/figure_1.png)

Without transfer learning, loading images into tensor

In [35]:
start_timer()
train_tensors = paths_to_tensor(tqdm(train_files))
train_tensors = train_tensors.astype('float32') / 255

valid_tensors = paths_to_tensor(tqdm(valid_files))
valid_tensors = valid_tensors.astype('float32') / 255

test_tensors = paths_to_tensor(tqdm(test_files))
test_tensors = test_tensors.astype('float32') / 255

print(train_tensors.shape)

print_elapsed_time()

0.10808795256416488


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [07:45<00:00,  4.30it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [01:01<00:00,  2.45it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [06:12<00:00,  1.61it/s]


(2000, 384, 256, 3)


In [37]:
start_timer()
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', 
                        input_shape=train_tensors.shape[1:]))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=1024, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))

my_model.add(Conv2D(filters=2048, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))


my_model.add(GlobalAveragePooling2D())

my_model.add(Dense(3, activation='softmax'))

my_model.summary()

print_elapsed_time()

5.867575357594311
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_411 (Conv2D)          (None, 384, 256, 16)      448       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 192, 128, 16)      0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 192, 128, 16)      0         
_________________________________________________________________
conv2d_412 (Conv2D)          (None, 192, 128, 64)      9280      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 96, 64, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 96, 64, 64)        0         
_________________________________________________________________
conv2d_413 (Conv2D)          (None, 96, 64, 256)       147

In [38]:
start_timer()
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print_elapsed_time()

5.94980908334231


In [None]:
start_timer()


from keras.callbacks import ModelCheckpoint
import os


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=40, batch_size=100, callbacks=[my_checkpointer], verbose=1)


print_elapsed_time()

6.022846783856888
Train on 2000 samples, validate on 150 samples
Epoch 1/40

In [None]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()

In [None]:
start_timer()


import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_tensors]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_cnn.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        

print_elapsed_time()