Load Dataset

In [1]:
import timeit
start_time = timeit.default_timer()

def print_elapsed_time():
    start_time = timeit.default_timer()
    
def start_timer():
    elapsed = (timeit.default_timer() - start_time)/60
    print(elapsed)

In [None]:
from sklearn.datasets import load_files   
from keras.utils import np_utils
import numpy as np
from glob import glob




def load_dataset(data_path, shuffle=None):
    kwargs = {}
    if shuffle != None:
        kwargs['shuffle'] = shuffle
    data = load_files(data_path, **kwargs)
    img_files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return img_files, targets



start_timer()
train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test', shuffle=False)

# load lables
label_name = [item[11:-1] for item in sorted(glob("data/train/*/"))]

print_elapsed_time()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


0.3811088810837933


In [None]:
start_timer()

print('train_files size: {}'.format(len(train_files)))
print('train_files shape: {}'.format(train_files.shape))
print('target shape: {}'.format(train_targets.shape))
print(label_name)

start_time = timeit.default_timer()
elapsed = (timeit.default_timer() - start_time)/60
print(elapsed)


print_elapsed_time()


In [None]:
start_timer()
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from tqdm import tqdm


def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(384, 256))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(path) for path in image_paths])

print_elapsed_time()


Transfer Learning and putting images into tensor.

In [None]:
start_timer()

train_tensors = paths_to_tensor(tqdm(train_files))
valid_tensors = paths_to_tensor(tqdm(valid_files))
test_tensors = paths_to_tensor(tqdm(test_files))

print(train_tensors.shape)

print_elapsed_time()


Flip Images

In [None]:
start_timer()


import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator


apply_train_image_transform = False

if apply_train_image_transform:
    # Caution: Doesn't guarantee prevention of duplication.
    datagen_train = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True)
    
    datagen_train.fit(train_tensors)
    shape = (train_tensors.shape[0] * 2,) + train_tensors.shape[1:]
    generated = np.ndarray(shape=shape)
    for i, image in tqdm(enumerate(train_tensors)):
        generated[i] = datagen_train.random_transform(image)
    
    train_tensors = np.concatenate((train_tensors, generated))
    train_targets = train_targets.repeat(2, axis=0)
    
print_elapsed_time()


Transfer learning using Inception Resnet V2¶

In [None]:
start_timer()

train_imgs_preprocess = preprocess_input(train_tensors)
valid_imgs_preprocess = preprocess_input(valid_tensors)
test_imgs_preprocess = preprocess_input(test_tensors)
del train_tensors, valid_tensors, test_tensors

print_elapsed_time()


In [None]:
start_timer()

from keras.applications.inception_resnet_v2 import InceptionResNetV2
transfer_model = InceptionResNetV2(include_top=False)

train_data = transfer_model.predict(train_imgs_preprocess)
valid_data = transfer_model.predict(valid_imgs_preprocess)
test_data = transfer_model.predict(test_imgs_preprocess)

del train_imgs_preprocess, valid_imgs_preprocess, test_imgs_preprocess
print(train_data.shape)

print_elapsed_time()


In [None]:
start_timer()

from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
my_model.add(Dropout(0.2))
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.2))
my_model.add(Dense(3, activation='softmax'))
my_model.summary()

print_elapsed_time()


In [None]:
start_timer()

my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print_elapsed_time()


In [None]:
start_timer()

from keras.callbacks import ModelCheckpoint


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_data, train_targets, 
          validation_data=(valid_data, valid_targets),
          epochs=70, batch_size=200, callbacks=[my_checkpointer], verbose=1)

print_elapsed_time()


In [None]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()


Evaluavate 

In [None]:
start_timer()

import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_transfer.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        
print_elapsed_time()


Category 1 Score: 0.526
Category 2 Score: 0.606
Category 3 Score: 0.566

![ROC Curves](images/figure_1.png)

Without transfer learning, loading images into tensor

In [None]:
start_timer()
train_tensors = paths_to_tensor(tqdm(train_files))
train_tensors = train_tensors.astype('float32') / 255

valid_tensors = paths_to_tensor(tqdm(valid_files))
valid_tensors = valid_tensors.astype('float32') / 255

test_tensors = paths_to_tensor(tqdm(test_files))
test_tensors = test_tensors.astype('float32') / 255

print(train_tensors.shape)

print_elapsed_time()

In [None]:
start_timer()
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', 
                        input_shape=train_tensors.shape[1:]))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=1024, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))

my_model.add(Conv2D(filters=2048, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))


my_model.add(GlobalAveragePooling2D())

my_model.add(Dense(3, activation='softmax'))

my_model.summary()

print_elapsed_time()

In [None]:
start_timer()
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print_elapsed_time()

In [None]:
start_timer()


from keras.callbacks import ModelCheckpoint
import os


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=40, batch_size=100, callbacks=[my_checkpointer], verbose=1)


print_elapsed_time()

In [None]:
start_timer()

my_model.load_weights(checkpoint_filepath)

print_elapsed_time()

In [None]:
start_timer()


import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_tensors]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_cnn.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])
        

print_elapsed_time()