In [None]:
!pip install sklearn
!pip install tensorflow
!pip install keras
!pip install tqdm
!pip install matplotlib


In [6]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np 
from glob import glob 

In [7]:
def load_dataset(data_path, shuffle=None):
    kwargs = {}
    if shuffle != None:
        kwargs['shuffle'] = shuffle
    data = load_files(data_path, **kwargs)
    img_files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return img_files, targets

train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test', shuffle=False)

# load lables
label_name = [item[11:-1] for item in sorted(glob("data/train/*/"))]

print('train_files size: {}'.format(len(train_files)))
print('train_files shape: {}'.format(train_files.shape))
print('target shape: {}'.format(train_targets.shape))
print(label_name)

train_files size: 2000
train_files shape: (2000,)
target shape: (2000, 3)
['melanoma', 'nevus', 'seborrheic_keratosis']


In [8]:
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from tqdm import tqdm 
def path_to_tensor(img_path):
    img = image.load_img(img_path,target_size=(384,256))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)
def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(path)for path in image_paths])


In [9]:
#loading images into tensors 
train_tensors = paths_to_tensor(tqdm(train_files))

valid_tensors = paths_to_tensor(tqdm(valid_files))

test_tensors = paths_to_tensor(tqdm(test_files))


print(train_tensors.shape)

100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [06:19<00:00,  5.28it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [00:35<00:00,  4.23it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [03:24<00:00,  2.93it/s]


(2000, 384, 256, 3)


In [10]:
import matplotlib.pyplot as plt 
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
apply_train_image_tranform = False 
if apply_train_image_tranform:
    datagen_train = ImageDataGenerator(horizontal_flip=True
                                      ,vertical_flip=True)
    datagen_train.fit(train_tensors)
    shape = (train_tensors.shape[0]*2,)+train_tensors.shape[1:]
    generated = np.ndarray(shape=shape)
    for i , image in tqdm(enumerate(train_tensors)):
        generated[i]=datagen_train.random_transform(image)
        train_tensors=np.concatenate((train_tensors,generated))
        train_targets=train_targets.repeat(2, axis=0)
train_imgs_preprocess = preprocess_input(train_tensors)
valid_imgs_preprocess = preprocess_input(valid_tensors)
test_imgs_preprocess = preprocess_input(test_tensors)
del train_tensors, valid_tensors, test_tensors

    

In [11]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2
transfer_model = InceptionResNetV2(include_top=False)
train_data = transfer_model.predict(train_imgs_preprocess)
valid_data = transfer_model.predict(valid_imgs_preprocess)
test_data = transfer_model.predict(test_imgs_preprocess)
del train_imgs_preprocess,valid_imgs_preprocess,test_imgs_preprocess
print(train_data.shape)


(2000, 10, 6, 1536)


In [12]:
import gc
gc.collect()


2157

In [23]:
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

BCD = Sequential()
BCD.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
BCD.add(Dropout(0.2))
BCD.add(Dense(2048,activation='relu'))
BCD.add(Dropout(0.3))
BCD.add(Dense(3,activation='softmax'))
BCD.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 1536)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 2048)              3147776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 6147      
Total params: 3,153,923
Trainable params: 3,153,923
Non-trainable params: 0
_________________________________________________________________


In [24]:

BCD.compile(loss='categorical_crossentropy',optimizer='adam',
           metrics=['accuracy'])


In [25]:
from keras.callbacks import ModelCheckpoint
import os 
checkpoint_filepath = 'weights.hdf5'
checkpointer = ModelCheckpoint(
    filepath=checkpoint_filepath,
    verbose=1,save_best_only=True)
BCD.fit(train_data,train_targets,
        validation_data=(valid_data,valid_targets),
       epochs=60,batch_size=1,
        callbacks=[checkpointer],verbose=1)

Epoch 1/60
Epoch 00001: val_loss improved from inf to 1.40694, saving model to weights.hdf5
Epoch 2/60
Epoch 00002: val_loss improved from 1.40694 to 1.29560, saving model to weights.hdf5
Epoch 3/60
Epoch 00003: val_loss improved from 1.29560 to 0.80606, saving model to weights.hdf5
Epoch 4/60
Epoch 00004: val_loss improved from 0.80606 to 0.77050, saving model to weights.hdf5
Epoch 5/60
Epoch 00005: val_loss did not improve from 0.77050
Epoch 6/60
Epoch 00006: val_loss improved from 0.77050 to 0.75609, saving model to weights.hdf5
Epoch 7/60
Epoch 00007: val_loss did not improve from 0.75609
Epoch 8/60
Epoch 00008: val_loss did not improve from 0.75609
Epoch 9/60
Epoch 00009: val_loss did not improve from 0.75609
Epoch 10/60
Epoch 00010: val_loss did not improve from 0.75609
Epoch 11/60
Epoch 00011: val_loss did not improve from 0.75609
Epoch 12/60
Epoch 00012: val_loss did not improve from 0.75609
Epoch 13/60
Epoch 00013: val_loss did not improve from 0.75609
Epoch 14/60
Epoch 00014:

Epoch 29/60
Epoch 00029: val_loss did not improve from 0.64359
Epoch 30/60
Epoch 00030: val_loss did not improve from 0.64359
Epoch 31/60
Epoch 00031: val_loss did not improve from 0.64359
Epoch 32/60
Epoch 00032: val_loss did not improve from 0.64359
Epoch 33/60
Epoch 00033: val_loss did not improve from 0.64359
Epoch 34/60
Epoch 00034: val_loss did not improve from 0.64359
Epoch 35/60
Epoch 00035: val_loss did not improve from 0.64359
Epoch 36/60
Epoch 00036: val_loss did not improve from 0.64359
Epoch 37/60
Epoch 00037: val_loss did not improve from 0.64359
Epoch 38/60
Epoch 00038: val_loss did not improve from 0.64359
Epoch 39/60
Epoch 00039: val_loss did not improve from 0.64359
Epoch 40/60
Epoch 00040: val_loss did not improve from 0.64359
Epoch 41/60
Epoch 00041: val_loss did not improve from 0.64359
Epoch 42/60
Epoch 00042: val_loss did not improve from 0.64359
Epoch 43/60
Epoch 00043: val_loss did not improve from 0.64359
Epoch 44/60
Epoch 00044: val_loss did not improve from 

Epoch 58/60
Epoch 00058: val_loss did not improve from 0.64359
Epoch 59/60
Epoch 00059: val_loss did not improve from 0.64359
Epoch 60/60
Epoch 00060: val_loss did not improve from 0.64359


<tensorflow.python.keras.callbacks.History at 0x1c802e555c0>

In [27]:
BCD.load_weights(checkpoint_filepath)

In [28]:
import csv 
my_predictions = [BCD.predict(np.expand_dims(feature, axis=0)) for feature in test_data]



with open('my_transfer.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])