In [2]:
import pandas as pd
import numpy as np 
from plotly import graph_objects as go
import matplotlib.pyplot as plt
from pathlib import Path

import sklearn
from sklearn.model_selection import train_test_split
import tensorflow as tf 

import json
import os
from random import randint
import random
import shutil
from numpy import array
from numpy import argmax
from numpy import array_equal

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping


Preprocessing

In [3]:
# Chargement des ensembles
BATCH_SIZE = 64
#img_size = (256, 256)   # taille originelle du dataset
img_size = (128, 128)  # pour faire une réduction de taille : de 256,256 à 128,128
seed = 42

In [4]:
# Fonction pour convertir les images en niveaux de gris en images RGB
def grayscale_to_rgb(images):
    return np.repeat(images, 3, axis=-1)


In [5]:
# Fonction pour charger et prétraiter les images
def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=1)  # Charger en niveaux de gris
    image = tf.image.resize(image, img_size)
    image = image / 255.0  # Normaliser
    return image, label

In [7]:
# Fonction pour créer un dataset à partir des chemins d'images et des labels
def create_dataset(file_paths, labels, batch_size, seed):
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(grayscale_to_rgb, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000, seed=seed)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

# Chemin vers les données
data_dir = "../../../../Main_dataset_Sample_Binaire"

# Dictionnaire pour mapper les labels de chaînes de caractères aux valeurs numériques
label_map = {"Normal": 1, "Malades": 0}

# Lister les fichiers et les labels
file_paths = []
labels = []
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"):
            file_paths.append(os.path.join(root, file))
            labels.append(int(root.split('/')[-1]))  # Supposons que les noms de dossiers sont les labels : ne marche PAS !!

# Convertir les labels en tenseurs
labels = np.array(labels)

# Créer les datasets d'entraînement et de validation
train_size = int(0.8 * len(file_paths))
train_file_paths = file_paths[:train_size]
train_labels = labels[:train_size]
val_file_paths = file_paths[train_size:]
val_labels = labels[train_size:]

train_dataset = create_dataset(train_file_paths, train_labels, BATCH_SIZE, seed)
val_dataset = create_dataset(val_file_paths, val_labels, BATCH_SIZE, seed)

ValueError: invalid literal for int() with base 10: 'Normal'

In [5]:
# Définir les poids de classe
from sklearn.utils.class_weight import compute_class_weight
class_indices = img_generator_flow_train.class_indices
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(list(class_indices.values())),
    y=img_generator_flow_train.classes
)
class_weights = dict(enumerate(class_weights))

# code donné par Malika :
# classes = np.unique(y_train) 
# class_weights = compute_class_weight('balanced', classes=classes, y=y_train)

# # Convertir en dict pour Keras
# class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}
# print("Class Weights:", class_weight_dict)

Model Inception v3

Fit without fine-tuning

In [7]:
base_model = tf.keras.applications.InceptionV3(input_shape=(128,128,3),
                                               include_top=False,
                                               weights = "imagenet"
                                               )
base_model.trainable = False

2025-01-23 15:09:54.910573: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [8]:
model_binary_simple = tf.keras.Sequential([
    base_model,
    #tf.keras.layers.GlobalMaxPooling2D(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model_binary_simple.summary()

In [None]:
# Définir un callback d'early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Surveiller la perte de validation
    patience=3,          # Nombre d'époques sans amélioration avant d'arrêter
    restore_best_weights=True  # Rétablir les poids du meilleur modèle
)

In [None]:
# Let's create a learning rate schedule to decrease the learning rate as we train the model.
initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=1000,
    decay_rate=0.96,
    staircase=True)

# Créer un optimiseur avec le planning du taux d'apprentissage
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

In [9]:
model_binary_simple.compile(
              #optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001),
              optimizer=optimizer
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy()])

In [10]:
import time
starttime = time.time

# Entraîner le modèle avec early stopping
history = model_binary_simple.fit(
    rgb_train_generator,
    validation_data=rgb_val_generator,
    epochs=10,  # Plus d'époques pour laisser l'early stopping décider
    steps_per_epoch=len(rgb_train_generator),
    validation_steps=len(rgb_val_generator),
    class_weight=class_weights,
    callbacks=[early_stopping]  #  callback 
)

stoptime = time.time
print(stoptime - starttime)

Epoch 1/10


   1462/Unknown [1m991s[0m 673ms/step - binary_accuracy: 0.9481 - loss: 0.1693

In [None]:
#model_binary_simple.save("./Binary_Inception_woFineTun.keras")

Fit with fine-tuning

In [7]:
len(base_model.layers)

311

In [None]:
base_model = tf.keras.applications.InceptionV3(input_shape=(128,128,3),
                                               include_top=False,
                                               weights = "imagenet"
                                               )

base_model.trainable = True

fine_tune_at = len(base_model.layers) - 30
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable = False
for layer in base_model.layers[fine_tune_at:]:
  layer.trainable = True

In [None]:
model_binary_finetuned = tf.keras.Sequential([
    base_model,
    #tf.keras.layers.GlobalMaxPooling2D(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model_binary_finetuned.summary()

In [None]:
model_binary_finetuned.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy()])

In [None]:
import time
starttime = time.time
model_binary_finetuned.fit(img_generator_flow_train, validation_data=img_generator_flow_valid, epochs=10, # steps_per_epoch=20,
                class_weight=d_cat)
# par défaut, steps_per_epoch= taille du dataset / taille des batchs, en arrondissant à l'inférieur
stoptime = time.time
print(stoptime - starttime)

In [None]:
#model_binary_finetuned.save("./Binary_Inception_withFineTun.keras")

Interpretation with GradCam

In [None]:
from IPython.display import Image
import matplotlib.pyplot as plt
import matplotlib.cm as cm

imgs, targets = next(iter(img_generator_flow_valid))

In [None]:
classifier_layer_names = [layer.name for layer in model_binary_finetuned.layers][1:]
print(classifier_layer_names)

last_conv_layer_name = base_model.layers[-1].name
print(last_conv_layer_name)

In [None]:
from tensorflow import keras
# Display
from IPython.display import Image
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [None]:
# The Grad-CAM algorithm
def get_img_array(img_path, size):
    # `img` is a PIL image of size 299x299
    img = keras.preprocessing.image.load_img(img_path, target_size=size)
    # `array` is a float32 Numpy array of shape (299, 299, 3)
    array = keras.preprocessing.image.img_to_array(img)
    # We add a dimension to transform our array into a "batch"
    # of size (1, 299, 299, 3)
    array = np.expand_dims(array, axis=0)
    return array


def make_gradcam_heatmap(
    img_array, base_model, model, last_conv_layer_name, classifier_layer_names):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer
    last_conv_layer = base_model.get_layer(last_conv_layer_name)
    last_conv_layer_model = tf.keras.Model(base_model.inputs, last_conv_layer.output)

    # Second, we create a model that maps the activations of the last conv
    # layer to the final class predictions
    classifier_input = tf.keras.Input(shape=last_conv_layer.output.shape[1:])
    x = classifier_input
    for layer_name in classifier_layer_names:
        x = model.get_layer(layer_name)(x)
    classifier_model = tf.keras.Model(classifier_input, x)

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        # Compute activations of the last conv layer and make the tape watch it
        last_conv_layer_output = last_conv_layer_model(img_array)
        tape.watch(last_conv_layer_output)
        # Compute class predictions
        preds = classifier_model(last_conv_layer_output)
        top_pred_index = tf.argmax(preds[0])
        top_class_channel = preds[:, top_pred_index]

    # This is the gradient of the top predicted class with regard to
    # the output feature map of the last conv layer
    grads = tape.gradient(top_class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    last_conv_layer_output = last_conv_layer_output.numpy()[0]
    pooled_grads = pooled_grads.numpy()
    for i in range(pooled_grads.shape[-1]):
        last_conv_layer_output[:, :, i] *= pooled_grads[i]

    # The channel-wise mean of the resulting feature map
    # is our heatmap of class activation
    heatmap = np.mean(last_conv_layer_output, axis=-1)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = np.maximum(heatmap, 0) / np.max(heatmap)
    return heatmap

In [None]:
preds = model_binary_finetuned.predict(imgs)
print(preds)

In [None]:
# à modifier
pred_labels = tf.argmax(preds, axis=-1)
print(pred_labels)

In [None]:
  # Generate class activation heatmap
heatmaps = []
for img in imgs:
    heatmap = make_gradcam_heatmap(
      tf.expand_dims(img,axis=0), base_model, model_binary_finetuned, last_conv_layer_name, classifier_layer_names
    )
    heatmaps.append(heatmap)

# Display heatmap
plt.matshow(heatmaps[0])
plt.show()

In [None]:
num_rows = 4
num_cols = 8
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 15))

#Parcours des images / labels / heatmaps
for idx, (img, pred_label, true_label, heatmap) in enumerate(
    zip(imgs, pred_labels, labels, heatmaps)
):
    # Normalisation de la heatmap
    heatmap = np.uint8(255 * heatmap)

#Application de la cmap "jet"
    jet = plt.colormaps.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Redimensionnement de la heatmap
    jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)

    # Superposition
    superimposed_img = jet_heatmap * 0.003 + img
    superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)

    # Récup et affichage de pred et true
    pred_file_path = np.argmax(img_generator_flow_valid.labels == pred_label)
    pred_label_name = Path(img_generator_flow_valid.filepaths[pred_file_path]).parent.name
    true_file_path = np.argmax(img_generator_flow_valid.labels == tf.argmax(true_label))
    true_label_name = Path(img_generator_flow_valid.filepaths[true_file_path]).parent.name

    # Positionnement de l'image dans la grille
    ax = axes[idx // num_cols, idx % num_cols]

#Affichage
    ax.imshow(superimposed_img)
    ax.axis("off")
    ax.set_title(f"Pred: {pred_label_name}\nTrue: {true_label_name}", fontsize=8)

plt.tight_layout()
plt.show()