# importing needed libraries

In [1]:
#OS libs
import os
import shutil
import itertools
import pathlib


#Data handling tools
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix , classification_report

#Deep learning libs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D , MaxPooling2D , Flatten , Activation , Dense , Dropout , BatchNormalization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam , Adamax
from tensorflow.keras import regularizers

# loading train dataset and making dictionary of images and labels

In [2]:
train_data_dir = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train'
filepaths = []
labels = []
folds = os.listdir(train_data_dir)
for fold in folds:
    foldpath = os.path.join(train_data_dir, fold)
    filelist = os.listdir(foldpath)
    for file in filelist:
        fpath = os.path.join(foldpath, file)
        
        filepaths.append(fpath)
        labels.append(fold)

# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
train_df = pd.concat([Fseries, Lseries], axis= 1)

# training dataframe presentation 

In [4]:
train_df.head()

Unnamed: 0,filepaths,labels
0,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
1,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
2,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
3,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
4,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign


# loading test dataset and making dictionary of images and labels

In [5]:
test_data_dir = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test'
filepaths = []
labels = []

folds = os.listdir(test_data_dir)
for fold in folds:
    foldpath = os.path.join(test_data_dir, fold)
    filelist = os.listdir(foldpath)
    for file in filelist:
        fpath = os.path.join(foldpath, file)
        
        filepaths.append(fpath)
        labels.append(fold)

# Concatenate data paths with labels into one dataframe
Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
test_df = pd.concat([Fseries, Lseries], axis= 1)

# testing dataframe presentation

In [7]:
test_df.head()

Unnamed: 0,filepaths,labels
0,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
1,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
2,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
3,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign
4,/kaggle/input/melanoma-skin-cancer-dataset-of-...,benign


# creating image data generator

In [8]:
# crobed image size
batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_dataframe( dataframe=train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( dataframe=test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

Found 9605 validated image filenames belonging to 2 classes.
Found 1000 validated image filenames belonging to 2 classes.


# finetuning MobileNetV2 on our dataset

In [9]:
# Create Model Structure
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys())) # to define number of classes in dense layer


# Load the MobileNetV2 model with pre-trained weights, excluding the top fully connected layers
base_model = MobileNetV2(include_top= False, weights= "imagenet", input_shape= img_shape, pooling= 'max')

# Freeze the base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(512, activation='relu'),  # Add a dense layer
    Dropout(0.5),  # Add dropout for regularization
    Dense(class_count, activation='softmax')  # Output layer for binary classification (benign vs malignant)
])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:

model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # Use categorical_crossentropy for multi-class classification
              metrics=['accuracy'])
history = model.fit(x= train_gen, epochs= 10, verbose= 1, validation_data= test_gen, 
                    validation_steps= None, shuffle= False)

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1730104725.159644     168 service.cc:145] XLA service 0x7dd10c0104e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730104725.159706     168 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1730104725.159710     168 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m  3/601[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m43s[0m 72ms/step - accuracy: 0.6528 - loss: 1.0807   

I0000 00:00:1730104731.566337     168 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 145ms/step - accuracy: 0.7839 - loss: 0.6451 - val_accuracy: 0.8840 - val_loss: 0.3696
Epoch 2/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 34ms/step - accuracy: 0.8499 - loss: 0.3734 - val_accuracy: 0.8580 - val_loss: 0.4156
Epoch 3/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 34ms/step - accuracy: 0.8636 - loss: 0.3381 - val_accuracy: 0.8820 - val_loss: 0.3504
Epoch 4/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 34ms/step - accuracy: 0.8768 - loss: 0.3057 - val_accuracy: 0.8910 - val_loss: 0.3642
Epoch 5/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 34ms/step - accuracy: 0.8790 - loss: 0.2925 - val_accuracy: 0.8840 - val_loss: 0.3160
Epoch 6/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 34ms/step - accuracy: 0.8837 - loss: 0.2741 - val_accuracy: 0.8760 - val_loss: 0.3903
Epoch 7/10
[1m601/601[0m

# model evaluation

In [15]:
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

Train Loss:  0.140945702791214
Train Accuracy:  0.940625011920929
--------------------
Test Loss:  0.22315578162670135
Test Accuracy:  0.9468749761581421
