In [18]:
# Installer les packages nécessaires
# !pip install mlflow boto3 psycopg2-binary

In [2]:
import mlflow
import os
import boto3

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [6]:
os.environ["AWS_PROFILE"] = "mlflow-user" # fill in with your AWS profile. More info: https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/setup.html#setup-credentials

TRACKING_SERVER_HOST = "ec2-51-20-3-129.eu-north-1.compute.amazonaws.com" # fill in with the public DNS of the EC2 instance
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [7]:
print(f"Tracking URI: '{mlflow.get_tracking_uri()}'")

Tracking URI: 'http://ec2-51-20-3-129.eu-north-1.compute.amazonaws.com:5000'


In [8]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow-artifact-remote-eff/1', creation_time=1720452889580, experiment_id='1', last_update_time=1720452889580, lifecycle_stage='active', name='my-experiment-1', tags={}>,
 <Experiment: artifact_location='s3://mlflow-artifact-remote-eff/0', creation_time=1720452252445, experiment_id='0', last_update_time=1720452252445, lifecycle_stage='active', name='Default', tags={}>]

In [9]:
# Initialiser une session boto3 avec le profil spécifié
session = boto3.Session()

# Exemple d'utilisation avec le client S3
s3 = session.client('s3')

In [11]:
# Liste des buckets pour vérifier la configuration
response = s3.list_buckets()
print("Buckets:", [bucket['Name'] for bucket in response['Buckets']])

Buckets: ['mlflow-artifact-remote-eff']


In [10]:
mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

default artifacts URI: 's3://mlflow-artifact-remote-eff/1/20839fadd1204d97af31541f948605f0/artifacts'


In [12]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow-artifact-remote-eff/1', creation_time=1720452889580, experiment_id='1', last_update_time=1720452889580, lifecycle_stage='active', name='my-experiment-1', tags={}>,
 <Experiment: artifact_location='s3://mlflow-artifact-remote-eff/0', creation_time=1720452252445, experiment_id='0', last_update_time=1720452252445, lifecycle_stage='active', name='Default', tags={}>]

## Interagir avec le registre de modèles

In [13]:
from mlflow.tracking import MlflowClient

In [14]:
client = MlflowClient(f"http://{TRACKING_SERVER_HOST}:5000")

In [15]:
client.search_registered_models()

[]

In [17]:
# Spécifiez l'ID de l'expérience
experiment_id = '1'

# Rechercher les runs de l'expérience spécifiée
runs = client.search_runs(
    experiment_ids=[experiment_id],
    order_by=["attribute.start_time desc"],
    max_results=1
)

# Obtenir le run_id du run le plus récent
if runs:
    run_id = runs[0].info.run_id

    # Enregistrer le modèle en utilisant le run_id
    mlflow.register_model(
        model_uri=f"runs:/{run_id}/models",
        name='iris-classifier'
    )
else:
    print("No runs found for the specified experiment_id")

Successfully registered model 'iris-classifier'.
2024/07/09 09:34:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier, version 1
Created version '1' of model 'iris-classifier'.
