In [1]:
import mlflow

In [2]:
import boto3

In [3]:
TRACKING_SERVER_HOST = "ec2-34-227-160-154.compute-1.amazonaws.com" # fill in with the public DNS of the EC2 instance
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [4]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://ec2-34-227-160-154.compute-1.amazonaws.com:5000'


In [5]:
from mlflow.tracking import MlflowClient

In [9]:
client = MlflowClient()
experiments = (
    client.search_experiments()
) 

In [10]:
experiments

[<Experiment: artifact_location='s3://mlflow-artifacts-remote-41/1', creation_time=1690272477690, experiment_id='1', last_update_time=1690272477690, lifecycle_stage='active', name='my-experiment-1', tags={}>,
 <Experiment: artifact_location='s3://mlflow-artifacts-remote-41/0', creation_time=1690019174127, experiment_id='0', last_update_time=1690019174127, lifecycle_stage='active', name='Default', tags={}>]

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

Exception: Run with UUID e5779458e0814beead50264ed5528f71 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True

In [49]:
mlflow.end_run()

In [50]:
with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

default artifacts URI: 's3://mlflow-artifacts-remote-41/1/6a725572f4e549839b5969de370f3ec0/artifacts'


## Interacting with the model registry

In [16]:
import mlflow.sklearn
from mlflow.models import infer_signature

In [14]:
ap = mlflow.get_artifact_uri()

In [15]:
ap

's3://mlflow-artifacts-remote-41/1/e5779458e0814beead50264ed5528f71/artifacts'

In [17]:
signature = infer_signature(X, y_pred)

In [18]:
mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path=ap,
        signature=signature,
        registered_model_name="sk-learn-linear-regression-reg-model",
    )



Successfully registered model 'sk-learn-linear-regression-reg-model'.
2023/07/25 03:33:40 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-linear-regression-reg-model, version 1
Created version '1' of model 'sk-learn-linear-regression-reg-model'.


<mlflow.models.model.ModelInfo at 0x16a7a8d00>

In [19]:
from pprint import pprint

client = MlflowClient()
for rm in client.search_registered_models():
    pprint(dict(rm), indent=4)

{   'aliases': {},
    'creation_timestamp': 1690274020538,
    'description': '',
    'last_updated_timestamp': 1690274020873,
    'latest_versions': [   <ModelVersion: aliases=[], creation_timestamp=1690274020873, current_stage='None', description='', last_updated_timestamp=1690274020873, name='sk-learn-linear-regression-reg-model', run_id='e5779458e0814beead50264ed5528f71', run_link='', source='s3://mlflow-artifacts-remote-41/1/e5779458e0814beead50264ed5528f71/artifacts/s3://mlflow-artifacts-remote-41/1/e5779458e0814beead50264ed5528f71/artifacts', status='READY', status_message='', tags={}, user_id='', version='1'>],
    'name': 'sk-learn-linear-regression-reg-model',
    'tags': {}}


In [22]:
run_id = "e5779458e0814beead50264ed5528f71"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Successfully registered model 'nyc-taxi-regressor'.
2023/07/25 03:54:31 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-regressor, version 1
Created version '1' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1690275271378, current_stage='None', description='', last_updated_timestamp=1690275271378, name='nyc-taxi-regressor', run_id='e5779458e0814beead50264ed5528f71', run_link='', source='s3://mlflow-artifacts-remote-41/1/e5779458e0814beead50264ed5528f71/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [24]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [26]:
runs = client.search_runs(
    experiment_ids='1'
)

In [38]:
runs[1].info.run_id

'a61f62f64a174de987bbf5f86799bcd3'

In [48]:
for run in runs[1:]:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['accuracy']:.4f}")
    #print(f"run id: {run.info.run_id}")

run id: a61f62f64a174de987bbf5f86799bcd3, rmse: 0.9600
run id: c31f55f2c1654346b7062112e8077f9f, rmse: 0.9600


In [46]:
runs[0].data.metrics['accuracy']

KeyError: 'accuracy'