## MLflow Configuration

In [None]:
#!pip install mlflow
#!pip install --upgrade jinja2
#!pip install --upgrade Flask
#!pip install setuptools

In [None]:
## Import de librairie
import pandas as pd
import mlflow
from mlflow import MlflowClient
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [None]:
## Connecxion au serveur MLFlow
client= MlflowClient(tracking_uri="http://127.0.0.1:8080")
## Configuration 
mlflow.set_tracking_uri("http://127.0.0.1:8080")


In [None]:
# R√©cup√©ration de toutes les exp√©riences MLflow
all_experiments = client.search_experiments()
# Affichage des exp√©riences 
print(all_experiments)

In [None]:
# Recherche des exp√©riences Mlflow Sp√©cifique.
client.search_experiments(filter_string="name = 'RLG_Models'")

## Chargement du fichier 

In [None]:
# D√©finition du chemin vers le fichier de  donn√©es 
data_dir = r"C:\Users\j_aka\Desktop\mlops\credit_risk_dataset.csv"
# Chargement de la donn√©es 
df = pd.read_csv(data_dir, delimiter=",") 
#Affichage des 3 premi√®res lignes 
print(df.head(3).to_string())

## Regression logistic model

In [None]:
# D√©finition des Features et de la cible 
target_col = 'loan_status'
X = df.drop(columns=['loan_status'])    
y = df['loan_status']

# Split des donn√©es 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Colonne num√©rique et colonne cat√©gorielle 
num_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

# Conversion des colonnes num√©riques en float64
X[num_features] = X[num_features].astype("float64")

#Encoder et G√©rer les donn√©es Nan 
preprocessor = ColumnTransformer(
    transformers= [
        ("num_features", SimpleImputer(strategy="mean"), num_features),
        ("cat_features", OneHotEncoder(handle_unknown="ignore"),cat_features)
    ]
)


# Hyperparam√®tre du mod√®le
#params = {
   # "max_iter": 1000,
#}
# Nom du mod√®le : 
mlflow.set_experiment("RLG_Models")
# Nom du run pour cette it√©ration d'entra√Ænement
run_name = "RLG_Run"
# les artefacts du mod√®le
artifact_path = "RLG_artefacts"
# Fermer tout run pr√©c√©dent
#mlflow.end_run()

with mlflow.start_run(run_name=run_name):
    # Tag de version du mod√®le
    mlflow.set_tag("version", "v1.0")

# Encodage et entrainement du mod√®le 
    pipeline_fr = Pipeline(steps=[
    ("processor", preprocessor),
    ("model", LogisticRegression(max_iter=3000))
   ])
    
# Entrainement du mod√®le
    pipeline_fr.fit(X_train,y_train)

# Pr√©diction du mod√®le
    y_proba = pipeline_fr.predict_proba(X_val)[:, 1]
    y_pred = (y_proba >= 0.3).astype(int)

# Calcul des m√©triques
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1
    }

    mlflow.log_param("max_iter",1000)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model= pipeline_fr, 
        input_example=X_val, 
        name=artifact_path
    ) 

    print("‚úÖ Mod√®le entra√Æn√© et logu√© dans MLflow avec succ√®s.")
    print("\nRapport de classification :\n", classification_report(y_val, y_pred))
    print("\nMatrice de confusion :\n", confusion_matrix(y_val, y_pred))
    print("üìä M√©triques :", metrics)