<a href="https://www.kaggle.com/code/abdulrahmanamukhlif/skin-cancer-classification?scriptVersionId=105830274" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# Importing the libraries
import os
import shutil
import glob
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import cv2
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
from random import seed
#from livelossplot import PlotLossesKeras
import math
from tensorflow.keras.metrics import Recall,Precision,AUC
from keras.models import load_model
from keras.layers import GlobalAveragePooling2D, Dense

In [5]:
# Creating a config class to store all the configurations
class config:
    
    TRAIN_IMAGES_FOLDER='../input/skin-cancer-dataset-source/skin cancer dataset source after delete 2016-2017-2018/train/'
    TEST_IMAGES_FOLDER='../input/skin-cancer-dataset-source/skin cancer dataset source after delete 2016-2017-2018/test/'
    
    # Input parameters for data preprocessing
    TARGET_NAME = "target"
    SEED = 42
    
    # Tensorflow settings for model training
    IMAGE_HEIGHT = 299
    IMAGE_WIDTH = 299
    NO_CHANNELS = 3
    BATCH_SIZE = 64
    EPOCHS = 40
    PATIENCE = 8

In [6]:
# Preparing the data for phase#1

train_datagen = ImageDataGenerator(
    rescale=1./255,
    dtype=tf.float32
)

train_generator = train_datagen.flow_from_directory(
    directory=config.TRAIN_IMAGES_FOLDER,
    target_size=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    color_mode="rgb",
    batch_size=config.BATCH_SIZE,
    class_mode="categorical" , #"categorical",  #"binary",
    shuffle=True
)


# Creating a test generator for test data
test_datagen = ImageDataGenerator(
    rescale=1./255,
    dtype=tf.float32
)

test_generator = test_datagen.flow_from_directory(
    directory=config.TEST_IMAGES_FOLDER,
    target_size=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    color_mode="rgb",
    batch_size=config.BATCH_SIZE,
    class_mode="categorical" , #"categorical",  #"binary",
    shuffle=False
)

In [7]:
# Using callbacks to save model parameters and perform early stopping

checkpoint_cb = keras.callbacks.ModelCheckpoint("save_epochx.h5",monitor='val_accuracy', # quantity to monitor
                             verbose=1, # verbosity - 0 or 1
                             save_best_only= True, # The latest best model will not be overwritten
                             mode='auto',
                             save_weights_only=False,
                             save_freq="epoch")
early_stopping_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=config.PATIENCE, restore_best_weights=True)

In [9]:
# Calculating the different step size for the model while training
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_test= test_generator.n // test_generator.batch_size

In [None]:
# Building a transfer learning model (Xception) using Keras

xception = keras.applications.Xception(input_shape=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH,config.NO_CHANNELS),
                                       weights="imagenet", include_top=False)
#xception.trainable = False

for layer in xception.layers[:115]:
    layer.trainable =  False
    
for layer in xception.layers[115:]:
    layer.trainable =  True    

x = xception.output

    # Global averaging pool layer
x = keras.layers.GlobalAveragePooling2D()(x)

    # Output layer
predictions = keras.layers.Dense(6, activation='softmax')(x)

xcept_model= keras.Model(xception.input, predictions)  
   

In [None]:
xcept_model.summary()

In [None]:
for i, layer in enumerate(xcept_model.layers):
    print(i, layer.name, layer.trainable)

In [None]:
# Compiling the transfer learning model
xcept_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.000001), 
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy',Precision(),Recall()] 
             )

In [None]:
# Training the CNN model
xcept_history = xcept_model.fit_generator(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, epochs=40,#config.EPOCHS,
                 validation_data=test_generator, validation_steps=STEP_SIZE_test,#initial_epoch=2,
                  callbacks=[early_stopping_cb, checkpoint_cb]
                 )
xcept_model.save('save_epoch.h5')

In [None]:
# Creating a config class to store all the configurations for phase#2
class config:
    
    TRAIN_IMAGES_FOLDER='../input/balance/split/train/'
    TEST_IMAGES_FOLDER='../input/balance/split/test/'
    
    # Input parameters for data preprocessing
    TARGET_NAME = "target"
    SEED = 42
    
    # Tensorflow settings for model training
    IMAGE_HEIGHT = 299
    IMAGE_WIDTH = 299
    NO_CHANNELS = 3
    BATCH_SIZE = 64
    EPOCHS = 30
    PATIENCE = 8

In [None]:
# Preparing the data for Phase#2

train_datagen = ImageDataGenerator(
    rescale=1./255,
    dtype=tf.float32
)
   
train_generator = train_datagen.flow_from_directory(
    directory=config.TRAIN_IMAGES_FOLDER,
    target_size=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    color_mode="rgb",
    batch_size=config.BATCH_SIZE,
    class_mode="binary" , #"categorical",  #"binary",
    shuffle=True
)

# Creating a test generator for test data
test_datagen = ImageDataGenerator(
    rescale=1./255,
    dtype=tf.float32
)

test_generator = test_datagen.flow_from_directory(
    directory=config.TEST_IMAGES_FOLDER,
    target_size=(config.IMAGE_HEIGHT, config.IMAGE_WIDTH),
    color_mode="rgb",
    batch_size=config.BATCH_SIZE,
    class_mode="binary" , #"categorical",  #"binary",
    shuffle=False
)

In [None]:
# Using callbacks to save model parameters and perform early stopping

checkpoint_cb = keras.callbacks.ModelCheckpoint("save_epochxx.h5",monitor='val_accuracy', # quantity to monitor
                             verbose=1, # verbosity - 0 or 1
                             save_best_only= True, # The latest best model will not be overwritten
                             mode='auto',
                             save_weights_only=False,
                             save_freq="epoch")
early_stopping_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=config.PATIENCE, restore_best_weights=True)

In [16]:

# Building a transfer learning model (Xception)for phase#2

xception= load_model('../input/xception-model-tri-3/save_epoch.h5')  # or ('../save_epoch.h5')
xception.include_top=False

x = xception.layers[-4].output
xcept_model = keras.models.Model(inputs=xception.input, outputs=x)

xcept_model.save('aaa.h5')

In [17]:
xception= load_model('./aaa.h5')
#xception.trainable=False
for layer in xception.layers[:126]:
    layer.trainable =  False
for layer in xception.layers[126:]:
    layer.trainable =  True 
x = xception.layers[-1].output
x = keras.layers.GlobalAveragePooling2D()(x)
    # Regular densely connected layer    
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(1, activation="sigmoid")(x)
xcept_model = keras.models.Model(inputs=xception.input, outputs=x)

In [18]:
#print("Number of Xception layers:", xception.layers[85])
xcept_model.summary()

In [19]:
for i, layer in enumerate(xcept_model.layers):
    print(i, layer.name, layer.trainable)

In [20]:
# Compiling the transfer learning model
xcept_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.000001), #learning_rate=0.0001
              loss=keras.losses.BinaryCrossentropy(from_logits=True),#CategoricalCrossentropy
              metrics=['accuracy',Precision(),Recall()] 
             )



In [21]:
# Training the CNN model
xcept_history = xcept_model.fit_generator(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, epochs=30,#config.EPOCHS,
                 validation_data=test_generator, validation_steps=STEP_SIZE_test,#initial_epoch=10,
                  callbacks=[early_stopping_cb, checkpoint_cb] # lr_scheduler, 3
                 )
xcept_model.save('target_xception_model.h5')

In [22]:
# plot loss during training
from matplotlib import pyplot
pyplot.figure(figsize=(12, 8))

pyplot.subplot(2, 2, 1)
pyplot.plot(xcept_history.history['loss'], label='Loss')
pyplot.plot(xcept_history.history['val_loss'], label='Val_Loss')
pyplot.ylabel('Loss')
pyplot.xlabel('Epoch')
pyplot.legend()

pyplot.title('Loss Evolution')

pyplot.subplot(2, 2, 2)
pyplot.plot(xcept_history.history['accuracy'], label='Accuracy')
pyplot.plot(xcept_history.history['val_accuracy'], label='Val_Accuracy')
pyplot.ylabel('Accuracy')
pyplot.xlabel('Epoch')
pyplot.legend()
pyplot.title('Accuracy Evolution')

In [23]:
# Predicting output on the test dataset
y_pred = xcept_model.predict(test_generator)
y_pred

In [None]:
#Computing the TPR and FPR values from the roc curve
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred)

In [None]:
# Plotting the ROC curve
def plot_roc_curve (fpr, tpr, label = None):
    plt.plot(fpr, tpr, linewidth = 2, label = label)
    plt.plot([0,1], [0,1], 'k--') # Dashed diagonal
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate (Recall)")
    plt.grid()
    
plot_roc_curve(fpr, tpr)
plt.show()

In [24]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support

Y_pred=[1 if x>0.5 else 0 for x in y_pred]  # تقيم الاداء لمتعدد الفئات
#Y_pred = np.argmax(y_pred, axis=1)

cm = confusion_matrix(test_generator.classes, Y_pred) 

# Transform to df for easier plotting
cm_df = pd.DataFrame(cm,
                     index = ['benign','malignant'],
                     columns = ['benign','malignant'])

plt.figure(figsize=(4,3))
sns.heatmap(cm_df, annot=True, fmt="d")
plt.title('confusion_matrix \nAccuracy:{0:.3f}'.format(accuracy_score(test_generator.classes, Y_pred)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [25]:
FP = cm.sum(axis=0) - np.diag(cm) 
FN = cm.sum(axis=1) - np.diag(cm)
TP = np.diag(cm)
TN = cm.sum() - (FP + FN + TP)
FP = FP.astype(float)
FN = FN.astype(float)
TP = TP.astype(float)
TN = TN.astype(float)
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)
# Overall accuracy for each class
ACC = (TP+TN)/(TP+FP+FN+TN)
#f1-score
F1=2*(PPV*TPR)/(PPV+TPR)

print ('Accuracy : ', ACC)
print('Precision : ', PPV)
print('Recall (Sensitivity) : ', TPR )
print('Specificity : ', TNR)
print('F1-score : ', F1)

In [None]:
from sklearn.metrics import classification_report

# Generate a classification report
report = classification_report(test_generator.classes, Y_pred, target_names=['benign','malignant'])

print(report)