# **PHASE 1 : TRAITEMENT DES IMAGES**

# **1 - Importation des bibliothèques**

In [None]:
import cv2 # pour afficher les images
import pandas as pd # pour manipuler les fichiers csv
import numpy as np # bibliothèque de l'algèbre linéaire
import matplotlib.pyplot as plt # pour le traçage des graphes
from matplotlib.colors import ListedColormap

from sklearn.model_selection import train_test_split # pour fractionner les données
from tensorflow.keras.preprocessing.image import ImageDataGenerator # pour traiter les images

# **2 - Préparation des données**

**Visualisation du fichier csv**

In [None]:
csv_path = '../input/coronahack-chest-xraydataset/Chest_xray_Corona_Metadata.csv' 
train_path = '../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/' 
test_path = '../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test/'

In [None]:
data = pd.read_csv(csv_path) # pour accéder au fichier csv
data

Il y a des informations inutiles pour notre objectif. De plus, la répartition des images est non équilibrée.

Je vais me contenter uniquement des noms des images et leur classe (label).

**Fractionnement et équilibrage des données**

In [None]:
Train = data[data['Dataset_type'] == 'TRAIN'].copy() # les données d'entrainement
Train = Train[['X_ray_image_name', 'Label']] 
Test = data[data['Dataset_type'] == 'TEST'].copy() # les données de prédiction
Test = Test[['X_ray_image_name', 'Label']]
Train, Valid = train_test_split(Train, test_size=0.2, stratify=Train['Label'], random_state=38) # les données d'entrainement et de validation

In [None]:
print('Train Data :')
print(Train['Label'].value_counts())
print('\n'+'Validation Data :')
print(Valid['Label'].value_counts())
print('\n'+'Test Data :')
print(Test['Label'].value_counts())

**Visualisation des images**

In [None]:
img1 = cv2.imread(train_path + data['X_ray_image_name'][1250])
img2 = cv2.imread(train_path + data['X_ray_image_name'][1966])

plt.style.use('dark_background')
plt.figure(figsize=(18,12))

plt.subplot(1,2,1)
plt.title(data['Label'][1250])
plt.imshow(img1)

plt.subplot(1,2,2)
plt.title(data['Label'][1966])
plt.imshow(img2)

plt.show()

> Les tailles des images sont différentes. Il faut appliquer une normalisation.

**Normalisation des images**

In [None]:
Generator_1 = ImageDataGenerator(rescale=1/255.)
Generator_2 = ImageDataGenerator( rescale = 1./255,
                                width_shift_range = 0.15,
                                height_shift_range = 0.15,
                                zoom_range = [0.9, 1.25],
                                brightness_range = [0.5, 1.5]
                                )  
targetSize = (224, 224) # la taille commune sera 224x224
batchSize = 32 # pour regrouper les images (32 images par groupe)
classMode = 'binary' # nous avons deux classes : 0 et 1
colorMode = 'rgb'  # rgb pour red/green/blue , 3 dimensions

Final_Train = Generator_2.flow_from_dataframe(
    dataframe = Train,
    directory = train_path,
    x_col = 'X_ray_image_name',
    y_col = 'Label',
    target_size = targetSize, 
    batch_size = batchSize,
    class_mode = classMode, 
    color_mode = colorMode,
    shuffle = True # reorganniser les images 
    )

Final_Valid = Generator_1.flow_from_dataframe(
    dataframe = Valid,
    directory = train_path,
    x_col = 'X_ray_image_name',
    y_col = 'Label',
    target_size = targetSize, 
    batch_size = batchSize,
    class_mode = classMode, 
    color_mode = colorMode,
    shuffle = True
    )

Final_Test = Generator_1.flow_from_dataframe(
    dataframe = Test,
    directory = test_path,
    x_col = 'X_ray_image_name',
    y_col = 'Label',
    target_size = targetSize, 
    batch_size = batchSize,
    class_mode = classMode, 
    color_mode = colorMode,
    shuffle = True
    )

> Final_Test : tuple qui contient 20 blocs, chaque blocs contient une matrice de taille 32x224x224x3 et une matrice de taille 32x1 (labels)
...

# **PHASE 2 : ENTRAINEMENT**

# **1 - Importation des bibliothèques**

In [None]:
# Pour l'apprentissage profond
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.optimizers import Adam
import tensorflow_hub as tf_hub
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# **2 - Le choix des métriques**

In [None]:
metrics = ['accuracy',keras.metrics.Precision(name='precision')]
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)]

# **3 - Le modèle : DenseNet 121**

In [None]:
tf.random.set_seed(38)

densenet_model = DenseNet121(include_top = False)
densenet_model.trainable = False

inputs = keras.layers.Input(shape=(224, 224, 3), name = 'input_layer')
x = densenet_model(inputs, training = False)
x = keras.layers.GlobalMaxPooling2D()(x)
x = keras.layers.Dense(1, activation = 'sigmoid', name = 'output_layer')(x)
densenet_model = keras.Model(inputs, x)

densenet_model.compile(loss = 'binary_crossentropy',
                    optimizer = Adam(learning_rate = 5e-5),
                    metrics = metrics)
densenet_model.summary()

In [None]:
densenet_history = densenet_model.fit(Final_Train,
                                    epochs = 30,
                                    validation_data = Final_Valid,
                                    steps_per_epoch = len(Final_Train),
                                    validation_steps = len(Final_Valid),
                                    callbacks = callbacks)

In [None]:
densenet_model.save("DenseNet_121.h5")

# **PHASE 3 : EVALUATION ET PREDICTION**

# **1 - La courbe d'apprentissage**

In [None]:
loss = densenet_history.history.get('loss')
val_loss = densenet_history.history.get('val_loss')
epochs = np.arange(1, len(densenet_history.history.get('loss')) + 1)

plt.plot(epochs, loss, color = '#a9007a')
plt.plot(epochs, val_loss, color = '#7900a5')
plt.legend(("DenseNet 121 train", "DenseNet 121 val"))
plt.title('Loss')
plt.show()

# **2 - La matrice de confusion**

In [None]:
i=0
y_true = []
for l in Final_Test:
    i+=1
    for x in l[1]:
        y_true.append(x)
    if i>=len(Final_Test):
        break

In [None]:
Final_Test.reset()

labels = ["Normal", "Pneumonia"]

densenet_predictions = densenet_model.predict(Final_Test)
y_pred = [1 if i > 0.5 else 0 for i in densenet_predictions]

densenet_matrix = confusion_matrix(y_true, y_pred)
densenet_disp = ConfusionMatrixDisplay(confusion_matrix=densenet_matrix, display_labels=labels)
densenet_disp.plot(cmap=plt.cm.Purples)
plt.show()

# **2 - Les scores**

In [None]:
Final_Test.reset()
score = densenet_model.evaluate(Final_Test) 

print('Test loss:', score[0]) 
print('Test accuracy:', score[1])