In [18]:
# Imports

from pathlib import Path
import json

import numpy as np
import joblib
import mlflow
import mlflow.sklearn

from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    classification_report,
    precision_score,
    recall_score,
    f1_score
)

import matplotlib.pyplot as plt


In [22]:
# Paths & load data

# âœ… Force tracking store to PROJECT ROOT / mlruns
PROJECT_ROOT = Path.cwd().parents[1]
MLRUNS_DIR = PROJECT_ROOT / "mlruns"
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"

ARTIFACTS_DIR.mkdir(exist_ok=True)

mlflow.set_tracking_uri(f"file:///{MLRUNS_DIR.as_posix()}")

print("âœ… Project root:", PROJECT_ROOT)
print("âœ… Tracking URI:", mlflow.get_tracking_uri())
print("âœ… Artifacts dir:", ARTIFACTS_DIR)


DATA_PATH = PROJECT_ROOT / "data" / "processed"

X_train = joblib.load(DATA_PATH / "X_train.joblib")
X_test  = joblib.load(DATA_PATH / "X_test.joblib")
y_train = joblib.load(DATA_PATH / "y_train.joblib")
y_test  = joblib.load(DATA_PATH / "y_test.joblib")

# ðŸ”§ adapte ce chemin Ã  lâ€™emplacement rÃ©el de ton modÃ¨le final
LOGREG_PIPELINE_PATH = PROJECT_ROOT / "notebooks" / "models" / "logreg_baseline_pipeline.joblib"
logreg_pipeline = joblib.load(LOGREG_PIPELINE_PATH)

THRESHOLD = 0.40

print("âœ… Loaded splits:", X_train.shape, X_test.shape)
print("âœ… Loaded model:", LOGREG_PIPELINE_PATH)

âœ… Project root: C:\Users\Anna\PycharmProjects\churn-mlops-telco
âœ… Tracking URI: file:///C:/Users/Anna/PycharmProjects/churn-mlops-telco/mlruns
âœ… Artifacts dir: C:\Users\Anna\PycharmProjects\churn-mlops-telco\artifacts
âœ… Loaded splits: (5634, 21) (1409, 21)
âœ… Loaded model: C:\Users\Anna\PycharmProjects\churn-mlops-telco\notebooks\models\logreg_baseline_pipeline.joblib


In [26]:
# Evaluation & Artifacts logging

THRESHOLD = 0.40  # seuil mÃ©tier validÃ©

y_proba = logreg_pipeline.predict_proba(X_test)[:, 1]
y_pred = (y_proba >= THRESHOLD).astype(int)

roc_auc = roc_auc_score(y_test, y_proba)
recall_churn = recall_score(y_test, y_pred, pos_label=1)
precision_churn = precision_score(y_test, y_pred, pos_label=1)
f1_churn = f1_score(y_test, y_pred, pos_label=1)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=3)

print("ROC-AUC:", round(roc_auc, 4))
print("Recall churn:", round(recall_churn, 4))
print("Precision churn:", round(precision_churn, 4))
print("F1 churn:", round(f1_churn, 4))
print("CM:\n", cm)

# Save artifacts
report_path = ARTIFACTS_DIR / "classification_report.txt"
report_path.write_text(report)

plt.figure()
plt.imshow(cm)
plt.title(f"Confusion Matrix - Logistic Regression (threshold={THRESHOLD})")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks([0, 1]); plt.yticks([0, 1])

for (i, j), v in np.ndenumerate(cm):
    plt.text(j, i, str(v), ha="center", va="center")

cm_path = ARTIFACTS_DIR / "confusion_matrix.png"
plt.savefig(cm_path, bbox_inches="tight")
plt.close()

threshold_path = ARTIFACTS_DIR / "threshold.json"
threshold_path.write_text(json.dumps({"threshold": THRESHOLD}, indent=2))

print("âœ… Artifacts saved:", report_path.name, cm_path.name, threshold_path.name)


ROC-AUC: 0.8414
Recall churn: 0.8636
Precision churn: 0.4647
F1 churn: 0.6043
CM:
 [[663 372]
 [ 51 323]]
âœ… Artifacts saved: classification_report.txt confusion_matrix.png threshold.json


In [28]:
# MLflow logging


mlflow.set_experiment("telco-churn-final")

with mlflow.start_run(run_name="logreg_final_threshold_0.40"):
    # Params
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("threshold", THRESHOLD)
    mlflow.log_param("pipeline", "preprocessor + logreg")
    mlflow.log_param("model_file", str(LOGREG_PIPELINE_PATH.name))

    # Metrics
    mlflow.log_metric("roc_auc", float(roc_auc))
    mlflow.log_metric("recall_churn", float(recall_churn))
    mlflow.log_metric("precision_churn", float(precision_churn))
    mlflow.log_metric("f1_churn", float(f1_churn))

    # Artifacts
    mlflow.log_artifact(str(report_path))
    mlflow.log_artifact(str(cm_path))
    mlflow.log_artifact(str(threshold_path))

    # Model
    mlflow.sklearn.log_model(logreg_pipeline, artifact_path="model")

print("âœ… MLflow run logged. Refresh the UI.")


âœ… MLflow run logged. Refresh the UI.
