## MLflow Configuration

In [13]:
#!pip install mlflow
#!pip install --upgrade jinja2
#!pip install --upgrade Flask
#!pip install setuptools

In [14]:
## Import de librairie
import pandas as pd
import mlflow
from mlflow import MlflowClient
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score , classification_report, confusion_matrix

In [None]:
## Connection au serveur MLFlow
client= MlflowClient(tracking_uri="http://127.0.0.1:8080")
## Configuration 
mlflow.set_tracking_uri("http://127.0.0.1:8080")


In [16]:
# R√©cup√©ration de toutes les exp√©riences MLflow
all_experiments = client.search_experiments()
# Affichage des exp√©riences 
print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/606383372813198707', creation_time=1760806668542, experiment_id='606383372813198707', last_update_time=1760806668542, lifecycle_stage='active', name='TREE_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/389458555972686615', creation_time=1760804935652, experiment_id='389458555972686615', last_update_time=1760804935652, lifecycle_stage='active', name='RLG_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/668826699846372727', creation_time=1760799130123, experiment_id='668826699846372727', last_update_time=1760799130123, lifecycle_stage='active', name='RDF_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1760798921200, experiment_id='0', last_update_time=1760798921200, lifecycle_stage='active', name='Default', 

In [17]:
# Recherche des exp√©riences Mlflow Sp√©cifique.
client.search_experiments(filter_string="name = 'TREE_Models'")

[<Experiment: artifact_location='mlflow-artifacts:/606383372813198707', creation_time=1760806668542, experiment_id='606383372813198707', last_update_time=1760806668542, lifecycle_stage='active', name='TREE_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>]

## Chargement du fichier 

In [18]:
# D√©finition du chemin vers le fichier de  donn√©es 
data_dir = r"C:\Users\j_aka\Desktop\mlops\credit_risk_dataset.csv"
# Chargement de la donn√©es 
df = pd.read_csv(data_dir, delimiter=",") 
#Affichage des 3 premi√®res lignes 
print(df.head(3).to_string())

   person_age  person_income person_home_ownership  person_emp_length loan_intent loan_grade  loan_amnt  loan_int_rate  loan_status  loan_percent_income cb_person_default_on_file  cb_person_cred_hist_length
0          22          59000                  RENT              123.0    PERSONAL          D      35000          16.02            1                 0.59                         Y                           3
1          21           9600                   OWN                5.0   EDUCATION          B       1000          11.14            0                 0.10                         N                           2
2          25           9600              MORTGAGE                1.0     MEDICAL          C       5500          12.87            1                 0.57                         N                           3


## TREE_Model

In [19]:
# D√©finition des Features et de la cible 
target_col = 'loan_status'
X = df.drop(columns=['loan_status'])    
y = df['loan_status']

# Split des donn√©es 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Colonne num√©rique et colonne cat√©gorielle 
num_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

# Conversion des colonnes num√©riques en float64
#X[num_features] = X[num_features].astype("float64")


#Encoder et G√©rer les donn√©es Nan 
preprocessor = ColumnTransformer(
    transformers= [
        ("num_features", SimpleImputer(strategy="mean"), num_features),
        ("cat_features", OneHotEncoder(handle_unknown="ignore"),cat_features)
    ]
)


# Hyperparam√®tre du mod√®le
params = {
    "criterion": "gini",
    "max_depth": 10,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "random_state": 42
}
# Nom du mod√®le : 
mlflow.set_experiment("TREE_Models")
# Nom du run pour cette it√©ration d'entra√Ænement
run_name = "TREE_Run"
# les artefacts du mod√®le
artifact_path = "TREE_artefacts"
# Fermer tout run pr√©c√©dent
#mlflow.end_run()

with mlflow.start_run(run_name=run_name):
    # Tag de version du mod√®le
    mlflow.set_tag("version", "v1.0")

# Encodage et entrainement du mod√®le 
    pipeline_fr = Pipeline(steps=[
    ("processor", preprocessor),
    ("model", DecisionTreeClassifier(**params))
   ])
    
# Entrainement du mod√®le
    pipeline_fr.fit(X_train,y_train)

# Pr√©diction du mod√®le
    y_proba = pipeline_fr.predict_proba(X_val)[:, 1]
    y_pred = (y_proba >= 0.3).astype(int)



# Calcul des m√©triques
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1
    }

    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model= pipeline_fr, 
        input_example=X_val, 
        name = artifact_path
    ) 

    print("‚úÖ Mod√®le entra√Æn√© et logu√© dans MLflow avec succ√®s.")
    print("\nRapport de classification :\n", classification_report(y_val, y_pred))
    print("\nMatrice de confusion :\n", confusion_matrix(y_val, y_pred))
    print("üìä M√©triques :", metrics)



‚úÖ Mod√®le entra√Æn√© et logu√© dans MLflow avec succ√®s.

Rapport de classification :
               precision    recall  f1-score   support

           0       0.92      0.98      0.95      5072
           1       0.91      0.72      0.80      1445

    accuracy                           0.92      6517
   macro avg       0.92      0.85      0.88      6517
weighted avg       0.92      0.92      0.92      6517


Matrice de confusion :
 [[4972  100]
 [ 410 1035]]
üìä M√©triques : {'accuracy': 0.921743133343563, 'precision': 0.9118942731277533, 'recall': 0.7162629757785467, 'f1': 0.8023255813953488}
üèÉ View run TREE_Run at: http://127.0.0.1:8080/#/experiments/606383372813198707/runs/a96f7d8998d14182849e50e948dedf88
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/606383372813198707


In [20]:
import mlflow
print(mlflow.__version__)

3.5.0


In [21]:
import mlflow

experiments = mlflow.search_experiments()
for exp in experiments:
    print(f"Nom : {exp.name} | ID : {exp.experiment_id}")

Nom : TREE_Models | ID : 606383372813198707
Nom : RLG_Models | ID : 389458555972686615
Nom : RDF_Models | ID : 668826699846372727
Nom : Default | ID : 0


In [22]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
runs = client.search_runs(
    experiment_ids=["606383372813198707"],
    filter_string="tags.version = 'v1.0'"
)

for run in runs:
    print("Run ID :", run.info.run_id)
    print("Artefacts :", run.info.artifact_uri)


Run ID : a96f7d8998d14182849e50e948dedf88
Artefacts : mlflow-artifacts:/606383372813198707/a96f7d8998d14182849e50e948dedf88/artifacts
Run ID : 9eb4e0e4c0614bb7b44332ac3c286bdc
Artefacts : mlflow-artifacts:/606383372813198707/9eb4e0e4c0614bb7b44332ac3c286bdc/artifacts
Run ID : d08d123bde804594aa6b871afa33a590
Artefacts : mlflow-artifacts:/606383372813198707/d08d123bde804594aa6b871afa33a590/artifacts
Run ID : 613b30c2963644408b06b8170d1f3477
Artefacts : mlflow-artifacts:/606383372813198707/613b30c2963644408b06b8170d1f3477/artifacts
Run ID : 401b7b68908840948320077326fc2bef
Artefacts : mlflow-artifacts:/606383372813198707/401b7b68908840948320077326fc2bef/artifacts
Run ID : b7ad840cf8e648bab020b034954d92f4
Artefacts : mlflow-artifacts:/606383372813198707/b7ad840cf8e648bab020b034954d92f4/artifacts
Run ID : 3b3a105dba904beabd5e18a267368ccc
Artefacts : mlflow-artifacts:/606383372813198707/3b3a105dba904beabd5e18a267368ccc/artifacts
Run ID : 4ed00a71bbb94973b62828662e438dc4
Artefacts : mlflow-a

In [23]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Pr√©dictions
y_train_pred = pipeline_fr.predict(X_train)
y_val_pred = pipeline_fr.predict(X_val)

# M√©triques
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))


train_r2 = r2_score(y_train, y_train_pred)
val_r2 = r2_score(y_val, y_val_pred)

print(train_r2)
print(val_r2)

0.6151228957754777
0.5838532740986542
