<a href="https://colab.research.google.com/github/Yanis52/Skin-Cancer-Detection/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kagglehub




In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
from pathlib import Path
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import pandas as pd
import seaborn as sns
import shutil
import os
from pathlib import Path


In [3]:

drive.mount('/content/drive')
train_dir = Path("/content/drive/MyDrive/CBAM_project/augmented_data/Train")
test_dir = Path("/content/drive/MyDrive/CBAM_project/augmented_data/Test")



Mounted at /content/drive


In [4]:
# dst_train = "/content/dataset/Train"
# dst_test = "/content/dataset/Test"

# if not os.path.exists(dst_test):
#     shutil.copytree(test_dir, dst_test)
    # print("✅ Données de test copiées.")

In [5]:
dst_train = "/content/dataset/Train"
# dst_test = "/content/dataset/Test"

if not os.path.exists(dst_train):
    shutil.copytree(train_dir, dst_train)
    print("✅ Données de train copiées.")

✅ Données de train copiées.


**Définition du module CBAM**

In [44]:
class AttentionBlock(tf.keras.layers.Layer):
    def __init__(self, filters, ratio=16, name="AttentionBlock", **kwargs):
        super().__init__(name=name, **kwargs)
        self.filters = filters
        self.ratio = ratio

    def build(self, input_shape):
        initializer = tf.keras.initializers.HeNormal()

        # Channel-wise attention
        self.avg_pool = layers.GlobalAveragePooling2D()
        self.max_pool = layers.GlobalMaxPooling2D()
        self.mlp = tf.keras.Sequential([
            layers.Dense(self.filters // self.ratio, activation='relu', use_bias=False, kernel_initializer=initializer),
            layers.Dense(self.filters, activation='sigmoid', use_bias=False, kernel_initializer=initializer)
        ])

        # Spatial attention block
        self.spatial = tf.keras.Sequential([
            layers.Conv2D(1, kernel_size=5, strides=1, padding='same', activation='sigmoid'),
            layers.BatchNormalization()
        ])



    def call(self, inputs,training=False):
        # Channel attention
        avg_out = self.avg_pool(inputs)
        max_out = self.max_pool(inputs)
        avg_weight = self.mlp(avg_out)
        max_weight = self.mlp(max_out)
        scale = tf.nn.sigmoid(avg_weight + max_weight)
        scale = tf.reshape(scale, [-1, 1, 1, self.filters])
        x = inputs * scale


        # Spatial attention
        avg_map = tf.reduce_mean(x, axis=-1, keepdims=True)
        max_map = tf.reduce_max(x, axis=-1, keepdims=True)
        combined = tf.concat([avg_map, max_map], axis=-1)
        attn_map = self.spatial(combined)

        return x * attn_map


Construction du **modèle**

In [45]:
def build_cbam_model(input_shape=(128, 128, 3), num_classes=9):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Rescaling(1./255)(inputs)

    for i, filters in enumerate([64, 128, 256, 512]):
        x = layers.Conv2D(filters, 3, padding="same", use_bias=False, name=f"conv_{i}")(x)
        x = layers.BatchNormalization(name=f"bn_{i}")(x)
        x = layers.LeakyReLU(name=f"leakyrelu_{i}")(x)
        x = AttentionBlock(filters, name=f"cbam_block_{i}")(x)
        x = layers.MaxPooling2D(name=f"maxpool_{i}")(x)

    x = layers.GlobalAveragePooling2D(name="gap")(x)
    x = layers.Dense(512, activation='leaky_relu', name="dense_1")(x)
    outputs = layers.Dense(num_classes, activation='softmax', name="output")(x)

    return tf.keras.Model(inputs, outputs, name="CBAM_Model")


Chargement du dataset depuis les images augmentées

In [46]:
# img_height = 128
# img_width = 128
# batch_size = 32
# num_classes = 9



# train_ds = tf.keras.preprocessing.image_dataset_from_directory(
#     train_dir,
#     seed=123,
#     image_size=(img_height, img_width),
#     batch_size=batch_size
# )

# val_ds = tf.keras.preprocessing.image_dataset_from_directory(
#     test_dir,
#     seed=123,
#     image_size=(img_height, img_width),
#     batch_size=batch_size
# )


In [47]:
# split avec tensorflow

In [48]:
img_height = 128
img_width = 128
batch_size = 32
num_classes = 9

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.8,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
     )

val_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
      )


Found 54000 files belonging to 9 classes.
Using 10800 files for training.
Found 54000 files belonging to 9 classes.
Using 10800 files for validation.


compilation

In [49]:
model = build_cbam_model(input_shape=(img_height, img_width, 3), num_classes=num_classes)
# optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


callbacks

In [50]:
checkpoint_path = "/content/drive/MyDrive/CBAM_project/models/model3.keras"
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    # tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_best_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
]


In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    callbacks=callbacks
)


Epoch 1/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 228ms/step - accuracy: 0.2552 - loss: 2.0795 - val_accuracy: 0.1102 - val_loss: 3.1815 - learning_rate: 0.0010
Epoch 2/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 85ms/step - accuracy: 0.3775 - loss: 1.5700 - val_accuracy: 0.2711 - val_loss: 1.9697 - learning_rate: 0.0010
Epoch 3/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 86ms/step - accuracy: 0.4222 - loss: 1.4593 - val_accuracy: 0.3781 - val_loss: 1.5813 - learning_rate: 0.0010
Epoch 4/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 85ms/step - accuracy: 0.4564 - loss: 1.3913 - val_accuracy: 0.4504 - val_loss: 1.4486 - learning_rate: 0.0010
Epoch 5/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 84ms/step - accuracy: 0.4855 - loss: 1.3051 - val_accuracy: 0.2236 - val_loss: 3.5147 - learning_rate: 0.0010
Epoch 6/100
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━

Évolution de la précision et de la perte par epoch

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, acc, label='Train accuracy')
plt.plot(epochs, val_acc, label='Val accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, loss, label='Train loss')
plt.plot(epochs, val_loss, label='Val loss')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


matrice de confusion

In [None]:


y_true = []
y_pred = []

class_names = train_ds.class_names

for images, labels in val_ds:
    preds = model.predict(images, verbose=0)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

# Matrice de confusion
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
plt.figure(figsize=(10, 8))
disp.plot(xticks_rotation=90, cmap="Blues")
plt.title("Matrice de confusion")
plt.show()


 Distribution des prédictions (par classe)

In [None]:


# Distribution des prédictions
df_pred = pd.DataFrame({'true': y_true, 'pred': y_pred})
df_pred['true_class'] = df_pred['true'].map(lambda i: class_names[i])
df_pred['pred_class'] = df_pred['pred'].map(lambda i: class_names[i])

plt.figure(figsize=(12, 5))
sns.countplot(x='pred_class', data=df_pred, order=class_names)
plt.title("Distribution des prédictions par classe")
plt.xticks(rotation=45)
plt.show()


Graphe des erreurs par classe

In [None]:
# Pourcentage d'erreurs par classe
error_by_class = (df_pred['true'] != df_pred['pred']).groupby(df_pred['true_class']).mean()

error_by_class.plot(kind='bar', figsize=(10, 5), color='salmon')
plt.title("Taux d'erreur par classe")
plt.ylabel("Taux d'erreur")
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.show()
