## AIH301m - Melanoma Skin Cancer Detection
##### Họ và tên: Lê Minh Quân
##### MSSV : HE191289

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from glob import glob
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

In [None]:
df = pd.read_csv('/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv')
dataset_path = '../input/skin-cancer-mnist-ham10000'

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(dataset_path, '*', '*.jpg'))}

In [None]:
# df['image_id'] = df['image_id'].astype(str) + '.jpg'
df['path'] = df['image_id'].map(imageid_path_dict.get)

In [None]:
df

In [None]:
# tạo nhãn label nhị phân

df['labels'] = df['dx'].apply(lambda x: '1' if x == 'mel' else '0')

In [None]:
df

In [None]:
# phân chia dữ liệu thành 70 15 15
df_train, df_temp = train_test_split(df, test_size=0.3, stratify=df['labels'], random_state=42)
df_val, df_test = train_test_split(df_temp, test_size=0.5, stratify=df_temp['labels'], random_state=42)

In [None]:
# Data augmentation and preprocessing

from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
datagen = ImageDataGenerator(
    rescale=1./255,  # normalize giá trị pixel
    rotation_range=10,  # xoay ảnh
    horizontal_flip=True,  # lật ảnh
    zoom_range=0.05,  # phóng to ảnh
    width_shift_range=0.05, # dịch trái phải
    height_shift_range=0.05,  # dịch lên xuống
    fill_mode='nearest',  # fill pixel
)
train_data = datagen.flow_from_dataframe(
    df_train,
    x_col='path',
    y_col='labels',
    target_size = img_size,
    batch_size=32,
    shuffle= True,
    class_mode = 'binary',

)
valdatagen = ImageDataGenerator(rescale=1./255)
val_data = valdatagen.flow_from_dataframe(
    df_val,
    x_col='path',
    y_col='labels',
    target_size=img_size,
    batch_size=32,
    shuffle=False,
    class_mode='binary'
)


In [None]:
from sklearn.utils.class_weight import compute_class_weight
labels = df['labels'].values
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = dict(enumerate(class_weights))
class_weights_dict = {i: class_weights_dict[i] for i in range(len(class_weights_dict))}
print("Class weights:", class_weights_dict)

In [None]:
from tensorflow.keras.applications import EfficientNetV2B0, ResNet50, VGG16, MobileNetV2

models_to_build = {
    'ResNet50': ResNet50
}


In [None]:
# def build_model(model_name, input=(224, 224, 3)):
#     if model_name not in models_to_build:
#         raise ValueError(f"Model {model_name} is not supported. Choose from {list(models_to_build.keys())}.")
    
#     base_model = models_to_build[model_name](input_shape=input, include_top=False, weights='imagenet')
#     base_model.trainable = False  # Freeze the base model for fine-tuning
    
#     model = models.Sequential([
#         base_model,
#         layers.GlobalAveragePooling2D(),
#         layers.Dense(128, activation='relu'),
#         layers.Dropout(0.3),
#         layers.Dense(64, activation='relu'),
#         layers.Dropout(0.3),
#         layers.Dense(1, activation='sigmoid')  # Assuming 2 classes for melanoma detection
#     ])
    
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
#     return model

In [None]:
base_model = ResNet50(input_shape=(224,224,3), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze the base model for fine-tuning

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # Assuming 2 classes for melanoma detection
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC()])


In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

result = {}

    
    # Train the model
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1,
    class_weight = class_weights
)
# Save the model
model.save(f'abce_melanoma_detection.h5')

In [None]:
testdatagen = ImageDataGenerator(rescale=1./255)
test_data = testdatagen.flow_from_dataframe(
    df_test,
    x_col='path',
    y_col='labels',
    target_size=img_size,
    batch_size=32,
    shuffle=False,
    class_mode='binary'
)

In [None]:
result = model.evaluate(test_data)
print(result)

y_pred_probs = model.predict(test_data)
y_pred_label = (y_pred_probs > 0.5).astype(int)
y_true = test_data.classes
class_names = list(test_data.class_indices.keys())


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
print("Model Classification Report:")
print(classification_report(y_true, y_pred_label, target_names=class_names))

cm_alt = confusion_matrix(y_true, y_pred_label)
plt.figure(figsize=(10, 8))
sns.heatmap(cm_alt, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix - Alternative Model')
plt.show()