In [None]:
import os,glob
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import time


from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import Callback,EarlyStopping
from tensorflow.keras import Sequential
from sklearn import metrics
#for ResNet50
from tensorflow.keras.applications import ResNet50V2, MobileNetV2, InceptionResNetV2
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics import classification_report
#for Xception
from tf_explain.core.activations import ExtractActivations
from tensorflow.keras.applications.xception import decode_predictions

In [None]:
#get file path of training data 
file_path='data'
#get classes from folder names for benign and malignant
name_class=os.listdir(file_path)
name_class

In [None]:
#get and store filepaths of all images
filepaths=list(glob.glob(file_path+'/**/*.*'))
#store the labels according to folder
labels=list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))
labels

In [None]:
#store data as series
filepath= pd.Series(filepaths, name='Filepath').astype(str)
labels=pd.Series(labels, name='Label')
data=pd.concat([filepath, labels],axis=1)
data=data.sample(frac=1).reset_index(drop=True)
data.head(5)

In [None]:
#check count of each class
counts=data.Label.value_counts()
sb.barplot(x=counts.index, y=counts)
plt.xlabel('Type')
plt.xticks(rotation=90)

In [None]:
#train test split for validation with 0.25 
train, test= train_test_split(data, test_size=0.20, stratify=labels, random_state=53)

In [None]:
#apply preprocessing: normalizing, reshaping, augmentation
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest")

test_datagen = ImageDataGenerator(rescale=1. /255)

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    dataframe=train,
    x_col='Filepath',
    y_col='Label',
    target_size=(224,224),
    class_mode='categorical',
    batch_size=256,
    shuffle=True,
    seed=42
)
valid_gen = train_datagen.flow_from_dataframe(
    dataframe=test,
    x_col='Filepath',
    y_col='Label',
    target_size=(224,224),
    class_mode='categorical',
    batch_size=256,
    shuffle=False,
    seed=42
)
test_gen = test_datagen.flow_from_dataframe(
    dataframe=test,
    x_col='Filepath',
    y_col='Label',
    target_size=(224,224),
    class_mode='categorical',
    batch_size=256,
    shuffle=False
)

In [None]:
pretrained_model= InceptionResNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights='imagenet'
)
pretrained_model.traindable= False

In [None]:
model = Sequential([
    pretrained_model,
    Flatten(name="flatten"),
    Dense(1024, activation='relu', name='hidden_layer'),
    Dropout(0.5),
    Dense(2, activation='sigmoid', name='output')
])

In [None]:
model.compile(
    optimizer='adam',  #can test different ones
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
#early stopping function if no improvement for more than 2 epochs
my_callbacks = [EarlyStopping(monitor='val_accuracy',
                              min_delta=0,
                              patience=2,
                              mode='max')]


In [None]:
start = time.time()
history= model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=30
)

In [None]:
model.save("inception-resnet.h5")
time.time()-start

In [None]:
pd.DataFrame(history.history)[['accuracy', 'val_accuracy']].plot()
plt.title("Accuracy")
plt.show()

pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title("Loss")
plt.show()


In [None]:
results= model.evaluate(test_gen, verbose=0)
print("\tTest Loss: {:5f}".format(results[0]))
print("test Accuracy: {:2f}%".format(results[1]*100))

In [None]:
pred= model.predict(test_gen)
pred=np.argmax(pred,axis=1)

labels=(train_gen.class_indices)
labels=dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]


In [None]:
y_test=list(test.Label)
print(classification_report(y_test, pred))

In [None]:
#confusion matrix
confusion_matrix = metrics.confusion_matrix(y_test, pred)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [False, True])
cm_display.plot()
plt.show()