## MLflow Configuration

In [1]:
#!pip install mlflow
#!pip install --upgrade jinja2
#!pip install --upgrade Flask
#!pip install setuptools

In [2]:
## Import de librairie
import pandas as pd
import mlflow
from mlflow import MlflowClient
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score , classification_report, confusion_matrix

In [3]:
## Connecxion au serveur MLFlow
client= MlflowClient(tracking_uri="http://127.0.0.1:8080")
## Configuration 
mlflow.set_tracking_uri("http://127.0.0.1:8080")


In [4]:
# Récupération de toutes les expériences MLflow
all_experiments = client.search_experiments()
# Affichage des expériences 
print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/606383372813198707', creation_time=1760806668542, experiment_id='606383372813198707', last_update_time=1760806668542, lifecycle_stage='active', name='TREE_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/389458555972686615', creation_time=1760804935652, experiment_id='389458555972686615', last_update_time=1760804935652, lifecycle_stage='active', name='RLG_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/668826699846372727', creation_time=1760799130123, experiment_id='668826699846372727', last_update_time=1760799130123, lifecycle_stage='active', name='RDF_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1760798921200, experiment_id='0', last_update_time=1760798921200, lifecycle_stage='active', name='Default', 

In [5]:
# Recherche des expériences Mlflow Spécifique.
client.search_experiments(filter_string="name = 'TREE_Models'")

[<Experiment: artifact_location='mlflow-artifacts:/606383372813198707', creation_time=1760806668542, experiment_id='606383372813198707', last_update_time=1760806668542, lifecycle_stage='active', name='TREE_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>]

## Chargement du fichier 

In [6]:
# Définition du chemin vers le fichier de  données 
data_dir = r"C:\Users\j_aka\Desktop\mlops\credit_risk_dataset.csv"
# Chargement de la données 
df = pd.read_csv(data_dir, delimiter=",") 
#Affichage des 3 premières lignes 
print(df.head(3).to_string())

   person_age  person_income person_home_ownership  person_emp_length loan_intent loan_grade  loan_amnt  loan_int_rate  loan_status  loan_percent_income cb_person_default_on_file  cb_person_cred_hist_length
0          22          59000                  RENT              123.0    PERSONAL          D      35000          16.02            1                 0.59                         Y                           3
1          21           9600                   OWN                5.0   EDUCATION          B       1000          11.14            0                 0.10                         N                           2
2          25           9600              MORTGAGE                1.0     MEDICAL          C       5500          12.87            1                 0.57                         N                           3


## Décision tree model

In [None]:
# Définition des Features et de la cible 
target_col = 'loan_status'
X = df.drop(columns=['loan_status'])    
y = df['loan_status']

# Split des données 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Colonne numérique et colonne catégorielle 
num_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

#Encoder et Gérer les données Nan 
preprocessor = ColumnTransformer(
    transformers= [
        ("num_features", SimpleImputer(strategy="mean"), num_features),
        ("cat_features", OneHotEncoder(handle_unknown="ignore"),cat_features)
    ]
)


# Hyperparamètre du modèle
params = {
    "criterion": "gini",
    "max_depth": 10,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "random_state": 42
}
# Nom du modèle : 
mlflow.set_experiment("TREE_Models")
# Nom du run pour cette itération d'entraînement
run_name = "TREE_Run"
# les artefacts du modèle
artifact_path = "TREE_artefacts"
# Fermer tout run précédent
#mlflow.end_run()

with mlflow.start_run(run_name=run_name):
    # Tag de version du modèle
    mlflow.set_tag("version", "v1.0")

# Encodage et entrainement du modèle 
    pipeline_fr = Pipeline(steps=[
    ("processor", preprocessor),
    ("model", DecisionTreeClassifier(**params))
   ])
    
# Entrainement du modèle
    pipeline_fr.fit(X_train,y_train)

# Prédiction du modèle
    y_proba = pipeline_fr.predict_proba(X_val)[:, 1]
    y_pred = (y_proba >= 0.3).astype(int)



# Calcul des métriques
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1
    }

    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model= pipeline_fr, 
        input_example=X_val, 
        artifact_path= artifact_path
    ) 

    print("✅ Modèle entraîné et logué dans MLflow avec succès.")
    print("\nRapport de classification :\n", classification_report(y_val, y_pred))
    print("\nMatrice de confusion :\n", confusion_matrix(y_val, y_pred))
    print("📊 Métriques :", metrics)



✅ Modèle entraîné et logué dans MLflow avec succès.

Rapport de classification :
               precision    recall  f1-score   support

           0       0.92      0.98      0.95      5072
           1       0.91      0.72      0.80      1445

    accuracy                           0.92      6517
   macro avg       0.92      0.85      0.88      6517
weighted avg       0.92      0.92      0.92      6517


Matrice de confusion :
 [[4972  100]
 [ 410 1035]]
📊 Métriques : {'accuracy': 0.921743133343563, 'precision': 0.9118942731277533, 'recall': 0.7162629757785467, 'f1': 0.8023255813953488}
🏃 View run TREE_Run at: http://127.0.0.1:8080/#/experiments/606383372813198707/runs/401b7b68908840948320077326fc2bef
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/606383372813198707


In [8]:
import mlflow
print(mlflow.__version__)

3.5.0


In [9]:
import mlflow

experiments = mlflow.search_experiments()
for exp in experiments:
    print(f"Nom : {exp.name} | ID : {exp.experiment_id}")

Nom : TREE_Models | ID : 606383372813198707
Nom : RLG_Models | ID : 389458555972686615
Nom : RDF_Models | ID : 668826699846372727
Nom : Default | ID : 0


In [10]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
runs = client.search_runs(
    experiment_ids=["606383372813198707"],
    filter_string="tags.version = 'v1.0'"
)

for run in runs:
    print("Run ID :", run.info.run_id)
    print("Artefacts :", run.info.artifact_uri)


Run ID : 401b7b68908840948320077326fc2bef
Artefacts : mlflow-artifacts:/606383372813198707/401b7b68908840948320077326fc2bef/artifacts
Run ID : f55fcd44eb874d41a9bc1a8bccdddcdc
Artefacts : mlflow-artifacts:/606383372813198707/f55fcd44eb874d41a9bc1a8bccdddcdc/artifacts
Run ID : 990f13fdba71400f8bc519d45326c5c4
Artefacts : mlflow-artifacts:/606383372813198707/990f13fdba71400f8bc519d45326c5c4/artifacts
Run ID : b7ad840cf8e648bab020b034954d92f4
Artefacts : mlflow-artifacts:/606383372813198707/b7ad840cf8e648bab020b034954d92f4/artifacts
Run ID : 3b3a105dba904beabd5e18a267368ccc
Artefacts : mlflow-artifacts:/606383372813198707/3b3a105dba904beabd5e18a267368ccc/artifacts
Run ID : 4ed00a71bbb94973b62828662e438dc4
Artefacts : mlflow-artifacts:/606383372813198707/4ed00a71bbb94973b62828662e438dc4/artifacts
Run ID : b83c4cd06ad04d17a8f485050210adde
Artefacts : mlflow-artifacts:/606383372813198707/b83c4cd06ad04d17a8f485050210adde/artifacts


In [11]:
from mlflow.tracking import MlflowClient

artifacts = client.list_artifacts("b7ad840cf8e648bab020b034954d92f4", path="TREE_artefacts")

for artifact in artifacts:
    print("📦", artifact.path)

📦 TREE_artefacts/MLmodel
📦 TREE_artefacts/conda.yaml
📦 TREE_artefacts/input_example.json
📦 TREE_artefacts/model.pkl
📦 TREE_artefacts/python_env.yaml
📦 TREE_artefacts/requirements.txt
📦 TREE_artefacts/serving_input_example.json


In [12]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Prédictions
y_train_pred = pipeline_fr.predict(X_train)
y_val_pred = pipeline_fr.predict(X_val)

# Métriques
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))


train_r2 = r2_score(y_train, y_train_pred)
val_r2 = r2_score(y_val, y_val_pred)

print(train_r2)
print(val_r2)

0.6151228957754777
0.5838532740986542
