# MLflow's Model Registry

In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

## Interacting with the MLflow tracking server
The MlflowClient object allows us to interact with:

* an MLflow Tracking Server that creates and manages experiments and runs.
* an MLflow Registry Server that creates and manages registered models and model versions.

To instantiate it we need to pass a tracking URI and/or a registry URI

In [6]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
experiments = client.search_experiments()

for experiment in experiments:
    print(experiment.name)


nyc-taxi-experiment
Default


In [9]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [11]:
for run in runs:
    print(f"run_id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run_id: 06c9b9b0a5734731bfc12155302cb104, rmse: 6.3174
run_id: 76c13515842643e395e733a010902a90, rmse: 6.3263
run_id: 06bf9ccce5704986be6f3c9423112050, rmse: 6.3358
run_id: a8b8440fc65c4a60a1281fd540410a72, rmse: 6.3394
run_id: 905daf30a5434a9090004c3681d56612, rmse: 6.3488


## Interacting with the Model Registry
In this section We will use the MlflowClient instance to:

1. Register a new version for the experiment nyc-taxi-regressor
2. Retrieve the latests versions of the model nyc-taxi-regressor and check that a new version 4 was created.
3. Transition the version 4 to "Staging" and adding annotations to it.

In [12]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [15]:
run_id = "a9331c27a4b746f2a4844bd2e65adf86"
model_uri = f"runs:/{run_id}/models_mlflow"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1727688645761, current_stage='None', description=None, last_updated_timestamp=1727688645761, name='nyc-taxi-regressor', run_id='a9331c27a4b746f2a4844bd2e65adf86', run_link=None, source='/workspaces/mlops-zc/02-experiment-tracking/mlruns/1/a9331c27a4b746f2a4844bd2e65adf86/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [17]:
client.get_registered_model(name="nyc-taxi-regressor")

<RegisteredModel: aliases={}, creation_timestamp=1727687802728, description=None, last_updated_timestamp=1727688645761, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1727688645761, current_stage='None', description=None, last_updated_timestamp=1727688645761, name='nyc-taxi-regressor', run_id='a9331c27a4b746f2a4844bd2e65adf86', run_link=None, source='/workspaces/mlops-zc/02-experiment-tracking/mlruns/1/a9331c27a4b746f2a4844bd2e65adf86/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='nyc-taxi-regressor', tags={}>

In [18]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 3, stage: None


  latest_versions = client.get_latest_versions(name=model_name)


In [19]:
model_version = 2
new_stage = "Staging"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1727688523982, current_stage='Staging', description=None, last_updated_timestamp=1727689493493, name='nyc-taxi-regressor', run_id='a9331c27a4b746f2a4844bd2e65adf86', run_link=None, source='/workspaces/mlops-zc/02-experiment-tracking/mlruns/1/a9331c27a4b746f2a4844bd2e65adf86/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

Bad pipe message: %s [b'"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";']
Bad pipe message: %s [b'"129"\r\nsec-ch-ua-mobile: ?0\r\nsec-ch-ua-platform: "Windows"\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Wi', b'ows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36\r\nAccept: text/']
Bad pipe message: %s [b'ml,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/s']
Bad pipe message: %s [b'ol: max-age=0\r\nsec-ch-ua: "Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"\r\nsec-ch-ua-mobile: ?0\r\ns']
Bad pipe message: %s [b'-ch-ua-platform: "Windows"\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; ', b'n64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36\r\nAccep', b' text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signe']


In [20]:
from datetime import datetime

date = datetime.today().date()
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1727688523982, current_stage='Staging', description='The model version 2 was transitioned to Staging on 2024-09-30', last_updated_timestamp=1727690497025, name='nyc-taxi-regressor', run_id='a9331c27a4b746f2a4844bd2e65adf86', run_link=None, source='/workspaces/mlops-zc/02-experiment-tracking/mlruns/1/a9331c27a4b746f2a4844bd2e65adf86/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

----------------------------------------------------------------------------------------------------------------------------------------------------------

In [21]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [22]:
df= read_dataframe('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-03.parquet')


In [23]:
client.download_artifacts(run_id=run_id, path='preprocessor', dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/workspaces/mlops-zc/02-experiment-tracking/preprocessor'

In [24]:
import pickle

with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [25]:
X_test = preprocess(df, dv)

In [26]:
target = "duration"
y_test = df[target].values

In [29]:
%time test_model(name=model_name, stage=None, X_test=X_test, y_test=y_test)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


CPU times: user 3.01 s, sys: 294 ms, total: 3.3 s
Wall time: 2.86 s




{'rmse': np.float64(6.414347681138584)}