In [4]:
!pip install kaggle tensorflow plotly -q

In [None]:
!kaggle datasets download -d ghostbat101/lung-x-ray-image-clinical-text-dataset
!unzip -n lung-x-ray-image-clinical-text-dataset.zip

In [None]:
import matplotlib.pyplot as plt
import mlflow
import numpy as np
import os
import tensorflow as tf
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

# Démarrage du tracking MLFLOW

In [None]:
MLFLOW_SERVER_URI = 'https://david-rem-jedha-final-project-mlops.hf.space'
EXPERIMENT_NAME = 'multi' # 'binary' ou 'multi'
TRAINER = 'david' # Le prénom de la personne qui a exécuté l'entrainement
MODEL_TYPE = 'baseline' # Le type de modèle utilisé
EPOCHS = 1
IMAGE_PATH = 'jedha-final-project/tests/baseline/binary/Main dataset'

mlflow.set_tracking_uri(MLFLOW_SERVER_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.tensorflow.autolog()

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
mlflow.start_run(experiment_id = experiment.experiment_id)

# Preprocessing

Mettre ici la préparation des données.

# Entrainement

In [11]:
CLASSES = 1

# Sauvegarde du tracking MLFLOW

In [None]:
mlflow.log_param("trainer", TRAINER) 
mlflow.log_param("epochs", EPOCHS) 
mlflow.log_param("model_type", MODEL_TYPE)

# Sauvegarde du modèle
mlflow.keras.log_model(model, "model")

# Sauvegarde des métriques par époque
history = model.history
for epoch in range(len(history.history['loss'])):
    mlflow.log_metric('loss', history.history['loss'][epoch], step=epoch)
    mlflow.log_metric('accuracy', history.history['accuracy'][epoch], step=epoch)
    mlflow.log_metric('val_loss', history.history['val_loss'][epoch], step=epoch)
    mlflow.log_metric('val_accuracy', history.history['val_accuracy'][epoch], step=epoch)


In [None]:
predictions = model.predict(val_generator)
y_pred = np.argmax(predictions, axis=1)
y_true = val_generator.classes

In [None]:
report = classification_report(y_pred, y_true, output_dict=True)
mlflow.log_metric('global_accuracy', report['accuracy'])
mlflow.log_metric('macro_avg_precision', report['macro avg']['precision'])
mlflow.log_metric('macro_avg_recall', report['macro avg']['recall'])
mlflow.log_metric('macro_avg_f1_score', report['macro avg']['f1-score'])
mlflow.log_metric('macro_avg_support', report['macro avg']['support'])
mlflow.log_metric('weighted_avg_precision', report['weighted avg']['precision'])
mlflow.log_metric('weighted_avg_recall', report['weighted avg']['recall'])
mlflow.log_metric('weighted_avg_f1_score', report['weighted avg']['f1-score'])
mlflow.log_metric('weighted_avg_support', report['weighted avg']['support'])

In [None]:
for class_mesure in list(report.items())[:CLASSES]:
    for m_name, m_value in class_mesure[1].items():
        mlflow.log_metric(m_name, m_value, step=int(class_mesure[0]))

In [None]:
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=val_generator.class_indices.keys())
disp.plot()
plt.title("Matrice de Confusion")
plt.savefig("confusion_matrix.png")
mlflow.log_artifact("confusion_matrix.png", artifact_path='model')

In [None]:
mlflow.end_run()