In [None]:
import pandas as pd
import os
import tensorflow as tf

from keras_preprocessing.image import ImageDataGenerator
% matplotlib inline
from sklearn.model_selection import train_test_split
from PIL import ImageFile
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
data_pd = pd.read_csv('data/HAM10000_metadata')
data_pd.head()


In [None]:
train_dir = os.path.join('HAM10000', 'train_dir')
test_dir = os.path.join('HAM10000', 'test_dir')

In [None]:
df_count = data_pd.groupby('lesion_id').count()
df_count.head()

df_count = df_count[df_count['dx'] == 1]
df_count.reset_index(inplace=True)


In [None]:
def duplicates(x):
    unique = set(df_count['lesion_id'])
    if x in unique:
        return 'no'
    else:
        return 'duplicates'

In [None]:
data_pd['is_duplicate'] = data_pd['lesion_id'].apply(duplicates)
data_pd.head()

In [None]:
df_count = data_pd[data_pd['is_duplicate'] == 'no']

In [None]:
train, test_df = train_test_split(df_count, test_size=0.15, stratify=df_count['dx'])

In [None]:
def identify_trainOrtest(x):
    test_data = set(test_df['image_id'])
    if str(x) in test_data:
        return 'test'
    else:
        return 'train'

In [None]:
#creating train_df
data_pd['train_test_split'] = data_pd['image_id'].apply(identify_trainOrtest)
train_df = data_pd[data_pd['train_test_split'] == 'train']
train_df.head()
print("\n")
test_df.head()

In [None]:
# Image id of train and test images
train_list = list(train_df['image_id'])
test_list = list(test_df['image_id'])
print('Test list length = ')
len(test_list)
print("\n Train list length = ")
len(train_list)


In [None]:
# Set the image_id as the index in data_pd
data_pd.set_index('image_id', inplace=True)

In [None]:
os.mkdir(train_dir)
os.mkdir(test_dir)

In [None]:
from TrainTestGenerator import generateTrainTestDir

generateTrainTestDir(train_dir, test_dir, data_pd, train_list, test_list)

In [None]:
from DataAugmentation import startAugmentation

startAugmentation()

In [None]:
train_path = 'HAM10000/train_dir'
test_path = 'HAM10000/test_dir'
batch_size = 16

In [None]:
datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input)

In [None]:
image_size = 224
print("\nTrain Batches: ")
train_batches = datagen.flow_from_directory(directory=train_path,
                                            target_size=(image_size, image_size),
                                            batch_size=batch_size,
                                            shuffle=True)

print("\nTest Batches: ")
test_batches = datagen.flow_from_directory(test_path,
                                           target_size=(image_size, image_size),
                                           batch_size=batch_size,
                                           shuffle=False)

In [None]:
from ModelResNet import create_model

model = create_model()

In [None]:
opt1 = tf.keras.optimizers.Adam(learning_rate=0.01, epsilon=0.1)
model.compile(optimizer=opt1,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
class_weights = {
    0: 1.0,  # bkl
    1: 1.0,  # nv
    2: 1.0,  # mel
    3: 1.0,  # bcc
    4: 5.0,  # akiec
}

checkpoint = ModelCheckpoint(filepath='ResNet152.hdf5', monitor='val_accuracy', save_best_only=True,
                             save_weights_only=True)
Earlystop = EarlyStopping(monitor='val_loss', mode='min', patience=40, min_delta=0.001)

In [None]:
history = model.fit(train_batches,
                    steps_per_epoch=(len(train_df) / 10),
                    epochs=300,
                    verbose=1,
                    validation_data=test_batches, validation_steps=len(test_df) / batch_size,
                    callbacks=[checkpoint, Earlystop], class_weight=class_weights)


In [None]:
model.load_weights("ResNet152.hdf5")

In [None]:
from PredictAndEvaluate import evaluatemodel

predicted = evaluatemodel(model, test_batches, test_df, batch_size)
