Scenario 3: Multiple data scientists working on multiple ML models
MLflow setup:

Tracking server: yes, remote server (EC2).
Backend store: postgresql database.
Artifacts store: s3 bucket.
The experiments can be explored by accessing the remote server.

The exampe uses AWS to host a remote server. In order to run the example you'll need an AWS account. Follow the steps described in the file mlflow_on_aws.md to create a new AWS account and launch the tracking server

In [4]:
import mlflow
import os

os.environ["AWS_PROFILE"] =

TRACKING_SERVER_HOST = "ec2-35-171-161-207.compute-1.amazonaws.com"
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [5]:
print(f"tracking uri: {mlflow.get_tracking_uri()}")

tracking uri: http://ec2-35-171-161-207.compute-1.amazonaws.com:5000


In [6]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow-datacamp/0', creation_time=1717530568967, experiment_id='0', last_update_time=1717530568967, lifecycle_stage='active', name='Default', tags={}>]

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my_exp")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C":0.1, "random_state":42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X,y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}")



default artifacts URI: 's3://mlflow-datacamp/1/4d4a244dda9d456fb24e5f487b9dca7f/artifacts


In [11]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow-datacamp/1', creation_time=1717531609811, experiment_id='1', last_update_time=1717531609811, lifecycle_stage='active', name='my_exp', tags={}>,
 <Experiment: artifact_location='s3://mlflow-datacamp/0', creation_time=1717530568967, experiment_id='0', last_update_time=1717530568967, lifecycle_stage='active', name='Default', tags={}>]

In [12]:
from mlflow.tracking import MlflowClient

In [13]:
client = MlflowClient(f"http://{TRACKING_SERVER_HOST}:5000")

In [14]:
run_info = client.search_runs(experiment_ids=['1'])[0]
print(run_info)
run_id = run_info.info.run_id
mlflow.register_model(
    model_uri=f"runs:/{run_id}/models",
    name='iris-classifier'
)

<Run: data=<RunData: metrics={'accuracy': 0.96}, params={'C': '0.1', 'random_state': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "4d4a244dda9d456fb24e5f487b9dca7f", '
                             '"artifact_path": "models", "utc_time_created": '
                             '"2024-06-04 20:20:33.406816", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"predict_fn": "predict", "loader_module": '
                             '"mlflow.sklearn", "python_version": "3.10.13", '
                             '"env": {"conda": "conda.yaml", "virtualenv": '
                             '"python_env.yaml"}}, "sklearn": '
                             '{"pickled_model": "model.pkl", '
                             '"sklearn_version": "1.4.1.post1", '
                             '"serialization_format": "cloudpickle", "code": '
                             'null}}, "model_uuid": '
                             '"01e2

Successfully registered model 'iris-classifier'.
2024/06/04 20:24:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier, version 1
Created version '1' of model 'iris-classifier'.


<ModelVersion: aliases=[], creation_timestamp=1717532669487, current_stage='None', description='', last_updated_timestamp=1717532669487, name='iris-classifier', run_id='4d4a244dda9d456fb24e5f487b9dca7f', run_link='', source='s3://mlflow-datacamp/1/4d4a244dda9d456fb24e5f487b9dca7f/artifacts/models', status='READY', status_message='', tags={}, user_id='', version='1'>