In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
np.random.seed(123)
from sklearn.preprocessing import label_binarize
from sklearn.metrics import confusion_matrix
import itertools
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping,ModelCheckpoint
from tensorflow.keras.layers import Input, Conv2D,MaxPool2D, Dense, Flatten, Dropout
import tensorflow as tf
import os 
import shutil 
from tensorflow.keras.models import Sequential

In [3]:
def my_train_test_split_aug(X, Y):
    # Splitting into train and test set
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
    
    # Prepare data for training and testing the model
    train_datagen = ImageDataGenerator(rescale = 1./255,
                                  rotation_range = 10,
                                  width_shift_range = 0.2,
                                  height_shift_range = 0.2,
                                  shear_range = 0.2,
                                  horizontal_flip = True,
                                  vertical_flip = True,
                                  fill_mode = 'nearest')
    train_datagen.fit(X_train)
    test_datagen = ImageDataGenerator(rescale = 1./255)
    test_datagen.fit(X_test)
    return X_train, X_test, Y_train, Y_test

In [4]:
base_skin_dir = os.path.join('..', 'input')

# Merging images from both folders HAM10000_images_part1.zip and HAM10000_images_part2.zip into one dictionary

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir,"skin-cancer-mnist-ham10000/", '*', '*.jpg'))}
lesion_type_dict = {
    'nv': 'Melanocytic nevi (nv)',
    'mel': 'Melanoma (mel)',
    'bkl': 'Benign keratosis-like lesions (bkl)',
    'bcc': 'Basal cell carcinoma (bcc)',
    'akiec': 'Actinic keratoses (akiec)',
    'vasc': 'Vascular lesions (vasc)',
    'df': 'Dermatofibroma (df)'
}
label_mapping = {
    0: 'nv',
    1: 'mel',
    2: 'bkl',
    3: 'bcc',
    4: 'akiec',
    5: 'vasc',
    6: 'df'
}
reverse_label_mapping = dict((value, key) for key, value in label_mapping.items())

In [5]:
data = pd.read_csv(os.path.join(base_skin_dir,"skin-cancer-mnist-ham10000/",'HAM10000_metadata.csv'))
data

In [6]:
data['cell_type'] = data['dx'].map(lesion_type_dict.get)
data['path'] = data['image_id'].map(imageid_path_dict.get)
data.head(10)

In [7]:
data['image_pixel'] = data['path'].map(lambda x: np.asarray(Image.open(x).resize((28,28))))
data

In [8]:
data['class'] = data['dx'].map(reverse_label_mapping.get)

data = data.sort_values('class')
data = data.reset_index()
data

In [9]:
counter = 0
frames = [data]
for i in [4,8,12,16,20,24]:
    counter+=1
    index = data[data['class'] == counter].index.values
    df_index = data.iloc[int(min(index)):int(max(index)+1)]
    df_index = df_index.append([df_index]*i, ignore_index = True)
    frames.append(df_index)

In [10]:
len(frames)


In [11]:
final_data = pd.concat(frames)
print(data.shape)
print(final_data.shape)

In [12]:
#Original data for testing
X_orig = data['image_pixel'].to_numpy()
X_orig = np.stack(X_orig, axis=0)
Y_orig = np.array(data.iloc[:, -1:])
print(X_orig.shape)
print(Y_orig.shape)

In [13]:
# Augmented data for training
X_aug = final_data['image_pixel'].to_numpy()
X_aug = np.stack(X_aug, axis=0)
Y_aug = np.array(final_data.iloc[:, -1:])
print(X_aug.shape)
print(Y_aug.shape)

In [14]:
X_train_aug, X_test_aug, Y_train_aug, Y_test_aug = my_train_test_split_aug(X_aug, Y_aug)

In [15]:
print(type(X_train_aug))
X_train_aug = tf.constant(X_train_aug) 
X_test_aug= tf.constant(X_test_aug) 
Y_train_aug = tf.constant(Y_train_aug) 
Y_test_aug= tf.constant(Y_test_aug) 
print(type(X_train_aug))

In [16]:
model = Sequential()
model.add(Conv2D(16, kernel_size = (3,3), input_shape = (28, 28, 3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Conv2D(32, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))

model.add(Conv2D(64, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))
model.add(Conv2D(128, kernel_size = (3,3), activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size = (2,2), padding = 'same'))

model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(7, activation='softmax'))

optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)

model.compile(loss = 'sparse_categorical_crossentropy',
             optimizer = optimizer,
              metrics = ['accuracy'])
print(model.summary())

In [17]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, 
                           mode='auto')
                               #, restore_best_weights=True)
    
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                              verbose=1, mode='auto')


model.fit(X_train_aug,Y_train_aug, epochs = 50,validation_data=(X_test_aug, Y_test_aug), callbacks = [reduce_lr, early_stop] )


In [18]:
model.save('HAM10000')


In [19]:
model.save_weights("Skin_Cancer.hdf5")

In [49]:
X_train,X_test,y_train,y_test = train_test_split(X_orig, Y_orig, test_size=0.99, random_state = 42)

In [63]:
pred = model.predict(tf.expand_dims(tf.constant(X_test[100]), axis=0))
print(pred)
print(y_test[100])
pred = np.argmax(pred, axis=1)
print(pred)

In [78]:
# cross validation
from tensorflow import keras
y_test = list(y_test)
correct = 0 
for index, image in enumerate(X_test):
    pred = model.predict(tf.expand_dims(tf.constant(image), axis=0))
    predicition = np.argmax(pred,axis=1)
#     print(predicition, y_test[index])
    if predicition == y_test[index]:
        correct +=1
print((correct/len(y_test))*100)