In [1]:
from utils import *

In [2]:
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

List out the existing experiments

In [3]:
client.search_experiments()

[<Experiment: artifact_location='/Users/isham993/Desktop/Programming-Tutorials/2023-Data-Science/mlflow-experiment-intermediate-level/mlruns/2', creation_time=1698735250201, experiment_id='2', last_update_time=1698735250201, lifecycle_stage='active', name='my-new-experiment', tags={}>,
 <Experiment: artifact_location='/Users/isham993/Desktop/Programming-Tutorials/2023-Data-Science/mlflow-experiment-intermediate-level/mlruns/1', creation_time=1696819671257, experiment_id='1', last_update_time=1696819671257, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1696819623362, experiment_id='0', last_update_time=1696819623362, lifecycle_stage='active', name='Default', tags={}>]

Create new experiment

In [6]:
client.create_experiment(name="my-new-experiment")

'2'

Inspecting Experiment Runs on Jupyter Notebook

In [4]:
runs = client.search_runs(
    experiment_ids="1",
    filter_string="metrics.rmse < 6.4",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=10,
    order_by=["metrics.rmse ASC"],
)

In [5]:
for run in runs:
    print(f"run_id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run_id: 8e1de85dc458416c98007748971d4ac0, rmse: 6.3182
run_id: ca2fff84b7674c8d9aa798802593304f, rmse: 6.3182
run_id: 9df74d0eac084f7a9449175793baf19b, rmse: 6.3182
run_id: a72db6a16c704f518a39dd6f58c9ada9, rmse: 6.3531
run_id: ceb093659092406e9fa46067396e522a, rmse: 6.3557


In [6]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

Registering new model

In [7]:
run_id = "8e1de85dc458416c98007748971d4ac0"
model_uri = f"run:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1698778875325, current_stage='None', description=None, last_updated_timestamp=1698778875325, name='nyc-taxi-regressor', run_id=None, run_link=None, source='run:/8e1de85dc458416c98007748971d4ac0/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

Listing out all the registered models

In [8]:
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1698737756737, description=None, last_updated_timestamp=1698778875325, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1698737756764, current_stage='Staging', description=None, last_updated_timestamp=1698778463332, name='nyc-taxi-regressor', run_id=None, run_link=None, source='run:/8e1de85dc458416c98007748971d4ac0/model', status='READY', status_message=None, tags={'model': 'xgboost_1'}, user_id=None, version=1>,
  <ModelVersion: aliases=[], creation_timestamp=1698757029509, current_stage='Production', description='', last_updated_timestamp=1698778453583, name='nyc-taxi-regressor', run_id='8e1de85dc458416c98007748971d4ac0', run_link='', source='/Users/isham993/Desktop/Programming-Tutorials/2023-Data-Science/mlflow-experiment-intermediate-level/mlruns/1/8e1de85dc458416c98007748971d4ac0/artifacts/models_mlflow', status='READY', status_message=None, tags={'model': 'xgboost_2'}, user_id=None, version=2>,
  <ModelVersion: alias

Getting the latest versions of the models

In [11]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)


In [35]:
for latest_version in latest_versions:
    print(f"version: {latest_version.version}, stage: {latest_version.current_stage}")

version: 1, stage: Staging
version: 2, stage: Production
version: 3, stage: None


Transitioning the current stage from None to Staging

In [17]:
model_version = 3
new_stage = "Staging"

client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False,
)

<ModelVersion: aliases=[], creation_timestamp=1698778875325, current_stage='Staging', description=None, last_updated_timestamp=1698781671705, name='nyc-taxi-regressor', run_id=None, run_link=None, source='run:/8e1de85dc458416c98007748971d4ac0/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [16]:
from datetime import datetime

In [18]:
date = datetime.today().date()
date

datetime.date(2023, 10, 31)

In [20]:
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}",
)

<ModelVersion: aliases=[], creation_timestamp=1698778875325, current_stage='Staging', description='The model version 3 was transitioned to Staging on 2023-10-31', last_updated_timestamp=1698781915382, name='nyc-taxi-regressor', run_id=None, run_link=None, source='run:/8e1de85dc458416c98007748971d4ac0/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [23]:
df = read_dataframe("data/green_tripdata_2022-03.parquet")

In [25]:
run_id

'8e1de85dc458416c98007748971d4ac0'

In [26]:
client.download_artifacts(
    run_id=run_id,
    path="preprocessor",
    dst_path=".",
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/Users/isham993/Desktop/Programming-Tutorials/2023-Data-Science/mlflow-experiment-intermediate-level/preprocessor'

In [27]:
import pickle

with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [28]:
X_test = preprocess(df, dv)

In [29]:
target = "duration"
y_test = df[target].values

In [34]:
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=y_test)



CPU times: user 6.28 s, sys: 169 ms, total: 6.45 s
Wall time: 1.13 s


{'rmse': 6.271229087600399}

In [32]:
%time test_model(name=model_name, stage="Staging", X_test=X_test, y_test=y_test)

MlflowException: Could not find a registered artifact repository for: run:/8e1de85dc458416c98007748971d4ac0/model. Currently registered schemes are: ['', 'file', 's3', 'r2', 'gs', 'wasbs', 'ftp', 'sftp', 'dbfs', 'hdfs', 'viewfs', 'runs', 'models', 'http', 'https', 'mlflow-artifacts']