In [7]:
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!pip install mlflow
!pip install boto3 awscli
!pip install mlflow optuna xgboost
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import mlflow
import mlflow.sklearn
!aws configure
# =========================
# Configuration générale
# =========================
mlflow.set_tracking_uri("http://ec2-18-207-206-140.compute-1.amazonaws.com:5000")
mlflow.set_experiment("HDFS")

# =========================
# Chargement des données
# =========================
test_file = "/content/drive/MyDrive/ProjetEts/HDFS_results/Event_occurence_matrix_HDFS_test.csv"

df_test = pd.read_csv(test_file)

X_test = df_test.drop(columns=["BlockId", "Label", "Type", "Time", "Date"])
y_test = df_test["Label"].apply(lambda x: 1 if x == "Fail" else 0)

# =========================
# Chargement du modèle
# =========================
model_path = "/content/drive/MyDrive/ProjetEts/models/random_forest.pkl"  # Modifie si besoin
model = joblib.load(model_path)

# =========================
# Évaluation
# =========================
y_pred = model.predict(X_test)

accuracy  = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall    = recall_score(y_test, y_pred)
f1        = f1_score(y_test, y_pred)

print("📊 Résultats de l’évaluation :")
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1)
print("\n📄 Rapport de classification :")
print(classification_report(y_test, y_pred))

# =========================
# Log dans MLflow
# =========================
with mlflow.start_run(run_name="model_evaluation"):
    mlflow.log_param("model", "RandomForest")
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)
    mlflow.sklearn.log_model(model, artifact_path="model_rf_eval")


Mounted at /content/drive
AWS Access Key ID [****************ADFV]: AKIAQTO4ENAVVPUDADFV
AWS Secret Access Key [****************SlAV]: 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
Default region name [us-east-1]: us-east-1
Default output format [None]: 
📊 Résultats de l’évaluation :
Accuracy : 0.9740249670466
Precision: 0.39352428393524286
Recall   : 0.0699269749944678
F1 Score : 0.11875234874107478

📄 Rapport de classification :
              precision    recall  f1-score   support

           0       0.98      1.00      0.99    176039
           1       0.39      0.07      0.12      4519

    accuracy                           0.97    180558
   macro avg       0.69      0.53      0.55    180558
weighted avg       0.96      0.97      0.97    180558





🏃 View run model_evaluation at: http://ec2-18-207-206-140.compute-1.amazonaws.com:5000/#/experiments/904462445519544081/runs/bac1df0f64c34fdca76f6145cd374dd4
🧪 View experiment at: http://ec2-18-207-206-140.compute-1.amazonaws.com:5000/#/experiments/904462445519544081
