# Tests MLflow pour générer quelques runs en autologs

> https://parivision.heuzef.com:5000

In [172]:
# Imports librairies et secrets
import mlflow
from mlflow import MlflowClient
from mlflow.server import get_app_client
from mlflow.exceptions import MlflowException
import requests
import setuptools
import os
from dotenv import load_dotenv
load_dotenv("../.env")
username = os.getenv('MLFLOW_TRACKING_USERNAME')
password = os.getenv('MLFLOW_TRACKING_PASSWORD')
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np

In [2]:
# Connexion au serveur
mlflow_uri = "http://parivision.heuzef.com:5000" # L'adresse du serveur MLFlow'
client = get_app_client("basic-auth", tracking_uri=mlflow_uri)

print(f"Utilisateur : {client.get_user("parivision").username}")
print(f"Est admin : {client.get_user("parivision").is_admin}")

mlflow.set_tracking_uri(mlflow_uri)

# Tester la dispo
response = requests.get(
    f"{mlflow_uri}/",
    auth=(username, password)
)

if response.status_code == 200:
    print("Le serveur de tracking MLflow est disponible ! Youpi ! ", mlflow_uri)
    print("Message :", response.text)
else:
    print(f"Échec de la connexion. Code d'erreur : {response.status_code}")
    print("Message :", response.text)

Utilisateur : parivision
Est admin : True
Le serveur de tracking MLflow est disponible ! Youpi !  http://parivision.heuzef.com:5000
Message : <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"/><link rel="shortcut icon" href="./static-files/favicon.ico"/><meta name="theme-color" content="#000000"/><link rel="manifest" href="./static-files/manifest.json" crossorigin="use-credentials"/><title>MLflow</title><script defer="defer" src="static-files/static/js/main.366ca744.js"></script><link href="static-files/static/css/main.17c4184f.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><div id="modal"></div></body></html>


In [3]:
# Créer une experience test
mlflow.set_experiment("test")

<Experiment: artifact_location='mlflow-artifacts:/155739054227684976', creation_time=1741858116895, experiment_id='155739054227684976', last_update_time=1741858116895, lifecycle_stage='active', name='test', tags={}>

In [5]:
# Récupération d'un projet
!git clone https://github.com/DataScientest/MLflow_Course.git

Clonage dans 'MLflow_Course'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 66 (delta 25), reused 54 (delta 21), pack-reused 1 (from 1)[K
Réception d'objets: 100% (66/66), 67.28 Kio | 3.06 Mio/s, fait.
Résolution des deltas: 100% (25/25), fait.


In [10]:
# Import Database
data = pd.read_csv("MLflow_Course/data/fake_data.csv")
X = data.drop(columns=["date", "demand"])
X = X.astype('float')
y = data["demand"]
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

display(X_train.shape)
display(X_val.shape)
display(y_train.shape)
display(y_val.shape)

(800, 7)

(200, 7)

(800,)

(200,)

# Entrainement d'un modèle et envois des métriques en Autolog

In [211]:
mlflow.autolog()

for i in range(10): # Boucle pour exécuter 10 fois
    with mlflow.start_run():
        params = {
        "n_estimators": np.random.randint(1, 100),
        "max_depth": np.random.randint(1, 100),
        "random_state": np.random.randint(1, 100),
        }
        
        rf = RandomForestRegressor(**params)
        rf.fit(X_train, y_train)

        # Evaluate model
        y_pred = rf.predict(X_val)
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_val, y_pred)
        metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}

2025/03/13 13:56:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


🏃 View run welcoming-shad-86 at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976/runs/1c1e2282cb0746958f43ffb597288168
🧪 View experiment at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976
🏃 View run angry-ray-885 at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976/runs/0354119511b245b0a8a591618664e55a
🧪 View experiment at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976
🏃 View run bold-yak-281 at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976/runs/37196b0daa58457287619b331320df05
🧪 View experiment at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976
🏃 View run awesome-eel-358 at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976/runs/16bf7877389b4032a1b99151c58fa6eb
🧪 View experiment at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976
🏃 View run clumsy-stork-445 at: http://parivision.heuzef.com:5000/#/experiments/155739054227684976/runs

In [212]:
# Récupérer les runs MLFlow dans un dataframe
runs = mlflow.search_runs(experiment_names=["test"], order_by=["start_time ASC"])
display(runs.head(3))

# Supprimer les runs échoués
runs = runs[runs['status'] == 'FINISHED']

print("Nombre total de run : ", len(runs))

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.training_mean_absolute_error,metrics.training_mean_squared_error,metrics.training_r2_score,metrics.training_score,...,params.criterion,params.n_estimators,params.random_state,tags.mlflow.log-model.history,tags.mlflow.source.name,tags.mlflow.source.type,tags.estimator_class,tags.estimator_name,tags.mlflow.runName,tags.mlflow.user
0,1434f6c19b974e7f926d6ba71b4a5ea8,155739054227684976,FINISHED,mlflow-artifacts:/155739054227684976/1434f6c19...,2025-03-13 09:45:34.545000+00:00,2025-03-13 09:45:41.585000+00:00,20.736414,749.241954,0.978088,0.978088,...,squared_error,14,18,"[{""run_id"": ""1434f6c19b974e7f926d6ba71b4a5ea8""...",/home/heuzef/GIT/DEC24_MLOPS_PARIS_SPORTIFS/.v...,LOCAL,sklearn.ensemble._forest.RandomForestRegressor,RandomForestRegressor,bold-squirrel-452,parivision
1,186f36b4a44940128d31140ed484ac14,155739054227684976,FINISHED,mlflow-artifacts:/155739054227684976/186f36b4a...,2025-03-13 09:49:04.237000+00:00,2025-03-13 09:49:10.640000+00:00,19.061543,587.218585,0.982826,0.982826,...,squared_error,55,77,"[{""run_id"": ""186f36b4a44940128d31140ed484ac14""...",/home/heuzef/GIT/DEC24_MLOPS_PARIS_SPORTIFS/.v...,LOCAL,sklearn.ensemble._forest.RandomForestRegressor,RandomForestRegressor,wise-ant-503,parivision
2,12a8e751057547f089c3be9e16147d70,155739054227684976,FINISHED,mlflow-artifacts:/155739054227684976/12a8e7510...,2025-03-13 09:49:11.193000+00:00,2025-03-13 09:49:17.632000+00:00,25.516734,1011.288151,0.970424,0.970424,...,squared_error,71,92,"[{""run_id"": ""12a8e751057547f089c3be9e16147d70""...",/home/heuzef/GIT/DEC24_MLOPS_PARIS_SPORTIFS/.v...,LOCAL,sklearn.ensemble._forest.RandomForestRegressor,RandomForestRegressor,sneaky-fish-708,parivision


Nombre total de run :  31


In [215]:
# Récupérer la run champion, filtré sur la variable training_score maximal
run_champion = runs.loc[runs['metrics.training_score'].idxmax()]
display(run_champion)

run_id                                                       b60f1e7648c44497bb86d7587514b000
experiment_id                                                              155739054227684976
status                                                                               FINISHED
artifact_uri                                mlflow-artifacts:/155739054227684976/b60f1e764...
start_time                                                   2025-03-13 10:20:43.991000+00:00
end_time                                                     2025-03-13 10:20:50.408000+00:00
metrics.training_mean_absolute_error                                                18.741581
metrics.training_mean_squared_error                                                551.169059
metrics.training_r2_score                                                            0.983881
metrics.training_score                                                               0.983881
metrics.training_root_mean_squared_error                    

In [None]:
def register_model_champion(mlflow_uri, model_name):
    """
    Vérifie la meilleur Run présente, la compare avec le tenant au titre.
    Si la meilleur Run est un challenger, définir le nouveau champion puis
    crée une nouvelle version du model sur MlFlow et lui decerne l'alias "champion".
    """

    # Configuration du client
    mlflow_client = MlflowClient(tracking_uri=mlflow_uri)

    # Récupérer la dernière version du modèle
    model_latest = mlflow_client.search_model_versions(f"name='{model_name}'")[0].version

    # Récupérer les runs MLFlow dans un dataframe
    runs = mlflow.search_runs(experiment_names=[model_name], order_by=["start_time ASC"])
    runs = runs[runs['status'] == 'FINISHED'] # Supprimer les runs échoués

    # Récupérer l'ID de la Run tenant au titre de champion
    run_tenant = mlflow.pyfunc.load_model(f"models:/"+model_name+"@champion").metadata

    # Récupérer l'ID de la run du champion actuel, filtré sur la variable training_score maximal
    run_champion = runs.loc[runs['metrics.training_score'].idxmax()]

    # Créer une nouvelle version du modèle s'il y a un nouveau champion
    if run_champion.run_id == run_tenant.run_id :
        print("Pas de nouveau champion.")
    else:
        print("NOUVEAU CHAMPION !")
        print(run_champion['tags.mlflow.runName'])
        print("---------------------------------")
        print("Création d'une nouvelle version")
        model_uri = f"runs:/"+run_champion.run_id+"/model"
        mlflow.register_model(model_uri=model_uri, name=model_name)
        model_latest = str(int(model_latest)+1)
        print("---------------------------------")
        mlflow_client.set_registered_model_alias(model_name, "champion", version=model_latest)

register_model_champion(mlflow_uri=mlflow_uri, model_name="test")

NOUVEAU CHAMPION !
lyrical-hen-549
---------------------------------
Création d'une nouvelle version


Registered model 'test' already exists. Creating a new version of this model...
2025/03/13 14:07:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: test, version 2
Created version '2' of model 'test'.


---------------------------------


In [218]:
register_model_champion(mlflow_uri=mlflow_uri, model_name="test")

Pas de nouveau champion.


# Charger le modèle le plus performant

In [219]:
champion = mlflow.pyfunc.load_model(f"models:/test@champion")

champion

mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: b60f1e7648c44497bb86d7587514b000

In [220]:
y_pred = champion.predict(X_val)
display(y_pred[:10])

array([ 968.8753053 , 1473.50852144,  900.93331674, 1278.02932773,
        980.34849773,  904.58644434, 1191.53962639, 1224.32342824,
        914.41162025,  978.96272046])