## MLflow Configuration

In [1]:
#!pip install mlflow
#!pip install --upgrade jinja2
#!pip install --upgrade Flask
#!pip install setuptools

In [2]:
## Import de librairie
import pandas as pd
import mlflow
from mlflow import MlflowClient
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from pprint import pprint
import joblib
from sklearn import pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [3]:
# Configuration de mon Mlflow sur un server local 
# à exéuter dans le terminal
#!mlflow server --host 127.0.0.1 --port 8080

In [4]:
## Connecxion au serveur MLFlow
client= MlflowClient(tracking_uri="http://127.0.0.1:8080")
## Configuration 
mlflow.set_tracking_uri("http://127.0.0.1:8080")



In [5]:
# Récupération de toutes les expériences MLflow
all_experiments = client.search_experiments()
# Affichage des expériences 
print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/952359116459871595', creation_time=1760986536774, experiment_id='952359116459871595', last_update_time=1760986536774, lifecycle_stage='active', name='TREE_Models', tags={'mlflow.experimentKind': 'custom_model_development'}>, <Experiment: artifact_location='file:///C:/Users/j_aka/Desktop/mlops/my_app/mlruns/0', creation_time=1760877394979, experiment_id='0', last_update_time=1760877394979, lifecycle_stage='active', name='Default', tags={}>]


In [6]:
# Recherche des expériences Mlflow Spécifique.
client.search_experiments(filter_string="name = 'RDF_Models'")

[]

## Chargement du fichier

In [7]:
# Définition du chemin vers le fichier de  données 
data_dir = r"C:\Users\j_aka\Desktop\mlops\credit_risk_dataset.csv"
# Chargement de la données 
df = pd.read_csv(data_dir, delimiter=",") 
#Affichage des 3 premières lignes 
print(df.head(3).to_string())

   person_age  person_income person_home_ownership  person_emp_length loan_intent loan_grade  loan_amnt  loan_int_rate  loan_status  loan_percent_income cb_person_default_on_file  cb_person_cred_hist_length
0          22          59000                  RENT              123.0    PERSONAL          D      35000          16.02            1                 0.59                         Y                           3
1          21           9600                   OWN                5.0   EDUCATION          B       1000          11.14            0                 0.10                         N                           2
2          25           9600              MORTGAGE                1.0     MEDICAL          C       5500          12.87            1                 0.57                         N                           3


## Random Forest model

In [9]:
# Définition des Features et de la cible 
target_col = 'loan_status'
X = df.drop(columns=['loan_status'])    
y = df['loan_status']

# Split des données 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


# Colonne numérique et colonne catégorielle 
num_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

# Conversion des colonnes numériques en float64
X[num_features] = X[num_features].astype("float64")

#Encoder et Gérer les données Nan 
preprocessor = ColumnTransformer(
    transformers= [
        ("num_features", "passthrough",num_features),
        ("cat_features", OneHotEncoder(handle_unknown="ignore"),cat_features)
    ]
)


# Hyperparamètre du modèle
params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}
# Nom du modèle : 
mlflow.set_experiment("RDF_Models")
# Nom du run pour cette itération d'entraînement
run_name = "RDF_Run"
# les artefacts du modèle
artifact_path = "RDF_artefacts"
# Fermer tout run précédent
#mlflow.end_run()

with mlflow.start_run(run_name=run_name):
    # Tag de version du modèle
    mlflow.set_tag("version", "v1.0")

# Encodage et entrainement du modèle 
    pipeline_fr = Pipeline(steps=[
    ("processor", preprocessor),
    ("model", RandomForestClassifier(**params))
   ])
    
# Entrainement du modèle
    pipeline_fr.fit(X_train,y_train)

# Prédiction du modèle
    y_proba = pipeline_fr.predict_proba(X_val)[:, 1]
    y_pred = (y_proba >= 0.3).astype(int)

# Calcul des métriques
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1
    }

    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model= pipeline_fr, 
        input_example=X_val, 
        name = artifact_path
    ) 

    print("Modèle entraîné et logué dans MLflow avec succès.")
    print("\nRapport de classification :\n", classification_report(y_val, y_pred))
    print("\nMatrice de confusion :\n", confusion_matrix(y_val, y_pred))
    print("Métriques :", metrics)

2025/10/20 21:08:46 INFO mlflow.tracking.fluent: Experiment with name 'RDF_Models' does not exist. Creating a new experiment.


Modèle entraîné et logué dans MLflow avec succès.

Rapport de classification :
               precision    recall  f1-score   support

           0       0.93      0.93      0.93      5072
           1       0.76      0.75      0.75      1445

    accuracy                           0.89      6517
   macro avg       0.85      0.84      0.84      6517
weighted avg       0.89      0.89      0.89      6517


Matrice de confusion :
 [[4739  333]
 [ 368 1077]]
Métriques : {'accuracy': 0.8924351695565444, 'precision': 0.7638297872340426, 'recall': 0.7453287197231834, 'f1': 0.7544658493870403}
🏃 View run RDF_Run at: http://127.0.0.1:8080/#/experiments/244462297279921565/runs/3cebcdf2d5194a50b049c96d8252a1b4
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/244462297279921565


In [None]:
mlflow.active_run()
