# Importing libraries

In [None]:
import os
from PIL import Image 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import EfficientNetB3

import warnings
warnings.filterwarnings("ignore")

# Reading Data

In [1]:
def train_df(tr_path):
    classes = []
    class_paths = []
    files = os.listdir(tr_path)
    for file in files:
        label_dir = os.path.join(tr_path, file)
        label = os.listdir(label_dir)
        for image in label:
            image_path = os.path.join(label_dir, image)
            class_paths.append(image_path)
            classes.append(file)
    image_classes = pd.Series(classes, name='Class')
    image_paths = pd.Series(class_paths, name='Class Path')
    tr_df = pd.concat([image_paths, image_classes], axis=1)
    return tr_df

def test_df(test_path):
    classes = []
    class_paths = []
    files = os.listdir(test_path)
    for file in files:
        label_dir = os.path.join(test_path, file)
        label = os.listdir(label_dir)
        for image in label:
            image_path = os.path.join(label_dir, image)
            class_paths.append(image_path)
            classes.append(file)
    image_classes = pd.Series(classes, name='Class')
    image_paths = pd.Series(class_paths, name='Class Path')
    ts_df = pd.concat([image_paths, image_classes], axis=1)
    return ts_df

In [None]:
tr_df = train_df('/kaggle/input/skin-cancer-malignant-vs-benign/train')
ts_df = test_df('/kaggle/input/skin-cancer-malignant-vs-benign/test')

In [None]:
#Split into vaild and test sets
valid_df, ts_df = train_test_split(ts_df,  train_size= 0.5, shuffle= True, random_state= 20)

# Preprocessing

In [None]:
batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (224, 224, 3)

gen = ImageDataGenerator()

tr_gen = gen.flow_from_dataframe(tr_df, x_col= 'Class Path', y_col= 'Class', target_size= img_size, class_mode = 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

valid_gen = gen.flow_from_dataframe(valid_df, x_col= 'Class Path', y_col= 'Class', target_size= img_size, class_mode = 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

ts_gen = gen.flow_from_dataframe(ts_df, x_col= 'Class Path', y_col= 'Class', target_size= img_size, class_mode = 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

In [None]:
g_dict = tr_gen.class_indices      
classes = list(g_dict.keys())       
images, labels = next(tr_gen) 

plt.figure(figsize= (20, 20))

for i in range(16):
    plt.subplot(4, 4, i + 1)
    image = images[i] / 255       
    plt.imshow(image)
    index = np.argmax(labels[i])  
    class_name = classes[index]   
    plt.title(class_name, color= 'blue', fontsize= 12)
    plt.axis('off')
plt.show()

# Buliding model

In [None]:
base_model = EfficientNetB3(include_top=False, weights="imagenet",
                      input_shape= img_shape, pooling= 'max')

model = Sequential([
    base_model,
    Dense(256,activation= 'relu'),
    Dropout(rate= 0.40, seed= 20),
    Dense(2, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

In [None]:
epochs = 25

history = model.fit(x= tr_gen, epochs= epochs, verbose= 1, validation_data= valid_gen, 
                    validation_steps= None)

# Testing and Validation

In [None]:
train_score = model.evaluate(tr_gen, verbose= 1)
valid_score = model.evaluate(valid_gen, verbose= 1)
test_score = model.evaluate(ts_gen, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

In [None]:
preds = model.predict_generator(ts_gen)
y_pred = np.argmax(preds, axis=1)

In [None]:
print(classification_report(ts_gen.classes, y_pred, target_names= classes))

In [None]:
g_dict = ts_gen.class_indices
classes = list(g_dict.keys())

cm = confusion_matrix(ts_gen.classes, y_pred)

plt.figure(figsize= (10, 10))
plt.imshow(cm, interpolation= 'nearest', cmap= plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation= 45)
plt.yticks(tick_marks, classes)


thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i, j], horizontalalignment= 'center', color= 'white' if cm[i, j] > thresh else 'black')

plt.tight_layout()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

plt.show()