In [66]:
import mlflow
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [67]:
client.list_experiments()

[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>]

In [68]:
EXPERIMENT_NAME="my-cool-experiment"
client.create_experiment(EXPERIMENT_NAME)

MlflowException: Experiment(name=my-cool-experiment) already exists. Error: (raised as a result of Query-invoked autoflush; consider using a session.no_autoflush block if this flush is occurring prematurely)
(sqlite3.IntegrityError) UNIQUE constraint failed: experiments.name
[SQL: INSERT INTO experiments (name, artifact_location, lifecycle_stage) VALUES (?, ?, ?)]
[parameters: ('my-cool-experiment', None, 'active')]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [69]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids=['1'],
    filter_string="metrics.rmse < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=10,
    order_by=["metrics.rmse ASC"],
)
runs = list(runs)
runs2 = client.search_runs(
    experiment_ids=['1'],
    filter_string="metrics.RMSE < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=10,
    order_by=["metrics.RMSE ASC"],
)
runs2 = list(runs2)
all_rmse_runs = runs + runs2

In [70]:
print(type(all_rmse_runs))
print()
print()
all_rmse_runs

<class 'list'>




[<Run: data=<RunData: metrics={'rmse': 6.742303328497425,
  'training_mae': 4.047813167088583,
  'training_mse': 34.561167991285814,
  'training_r2_score': 0.7415116414882994,
  'training_rmse': 5.878874721516509,
  'training_score': 0.7415116414882994}, params={'alpha': '0.9',
  'ccp_alpha': '0.0',
  'criterion': 'friedman_mse',
  'init': 'None',
  'learning_rate': '0.1',
  'loss': 'squared_error',
  'max_depth': '3',
  'max_features': 'None',
  'max_leaf_nodes': 'None',
  'min_impurity_decrease': '0.0',
  'min_samples_leaf': '1',
  'min_samples_split': '2',
  'min_weight_fraction_leaf': '0.0',
  'n_estimators': '100',
  'n_iter_no_change': 'None',
  'random_state': 'None',
  'subsample': '1.0',
  'tol': '0.0001',
  'train-data-path': './data/green_tripdata_2021-01.csv',
  'valid-data-path': './data/green_tripdata_2021-02.csv',
  'validation_fraction': '0.1',
  'verbose': '0',
  'warm_start': 'False'}, tags={'estimator_class': 'sklearn.ensemble._gb.GradientBoostingRegressor',
  'estim

In [71]:
for run in all_rmse_runs:
    print(f"Run ID: {run.info.run_id}")
    # print(f"Tags: {run.data.tags}")
    try:
        rmse = run.data.metrics['rmse']
    except KeyError:
        rmse = run.data.metrics['RMSE']
    print(f"RMSE: {rmse}")

Run ID: 7e1bbc9ea46741ddb27270bd6bab851e
RMSE: 6.742303328497425
Run ID: e893c918d629437fb2b741668fdd7347
RMSE: 6.908208299715754
Run ID: c0fd08f0fd0f4b50aa2a369615e61756
RMSE: 6.918372193230343
Run ID: b9c9e4ad456640799fe8910334c40142
RMSE: 6.290375221281626


In [72]:
client.get_run("e893c918d629437fb2b741668fdd7347")

<Run: data=<RunData: metrics={'rmse': 6.908208299715754,
 'training_mae': 1.6018148734920705,
 'training_mse': 5.791228064455477,
 'training_r2_score': 0.9566865032881576,
 'training_rmse': 2.406497052658797,
 'training_score': 0.9566865032881576}, params={'bootstrap': 'True',
 'ccp_alpha': '0.0',
 'criterion': 'squared_error',
 'max_depth': 'None',
 'max_features': 'auto',
 'max_leaf_nodes': 'None',
 'max_samples': 'None',
 'min_impurity_decrease': '0.0',
 'min_samples_leaf': '1',
 'min_samples_split': '2',
 'min_weight_fraction_leaf': '0.0',
 'n_estimators': '100',
 'n_jobs': 'None',
 'oob_score': 'False',
 'random_state': 'None',
 'train-data-path': './data/green_tripdata_2021-01.csv',
 'valid-data-path': './data/green_tripdata_2021-02.csv',
 'verbose': '0',
 'warm_start': 'False'}, tags={'estimator_class': 'sklearn.ensemble._forest.RandomForestRegressor',
 'estimator_name': 'RandomForestRegressor',
 'mlflow.log-model.history': '[{"run_id": "e893c918d629437fb2b741668fdd7347", '
    

Get the latest version of the named registered model

In [73]:
client.get_registered_model('nyc-taxi-regressor')

<RegisteredModel: creation_timestamp=1757348876080, description='NYC Taxi Regressor for Duration', last_updated_timestamp=1757371072414, latest_versions=[<ModelVersion: creation_timestamp=1757348944034, current_stage='Staging', description='', last_updated_timestamp=1757357994631, name='nyc-taxi-regressor', run_id='7e1bbc9ea46741ddb27270bd6bab851e', run_link='', source='./mlruns/1/7e1bbc9ea46741ddb27270bd6bab851e/artifacts/model', status='READY', status_message=None, tags={'model': 'gradientboostingregressor'}, user_id=None, version=2>,
 <ModelVersion: creation_timestamp=1757371072414, current_stage='None', description=None, last_updated_timestamp=1757371072414, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>], name='nyc-taxi-regressor', tags={}>

In [74]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [80]:
run_id = "e893c918d629437fb2b741668fdd7347"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2025/09/08 15:54:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 6
Created version '6' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1757372044268, current_stage='None', description=None, last_updated_timestamp=1757372044268, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=6>

In [98]:
client.list_registered_models()

[<RegisteredModel: creation_timestamp=1757348876080, description='NYC Taxi Regressor for Duration', last_updated_timestamp=1757435602856, latest_versions=[<ModelVersion: creation_timestamp=1757371072414, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2025-09-09', last_updated_timestamp=1757435431118, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>,
  <ModelVersion: creation_timestamp=1757348944034, current_stage='Production', description='', last_updated_timestamp=1757435602856, name='nyc-taxi-regressor', run_id='7e1bbc9ea46741ddb27270bd6bab851e', run_link='', source='./mlruns/1/7e1bbc9ea46741ddb27270bd6bab851e/artifacts/model', status='READY', status_message=None, tags={'model': 'gradientboostingregressor'}, user_id=None, version=2>,
  <ModelVersion: creation_timestamp

In [99]:
model_name = 'nyc-taxi-regressor'
latest_versions = client.get_latest_versions(model_name)

for version in latest_versions:
    print(f"run_id: {version.run_id}, version: {version.version}, stage: {version.current_stage}")

run_id: e893c918d629437fb2b741668fdd7347, version: 4, stage: Staging
run_id: 7e1bbc9ea46741ddb27270bd6bab851e, version: 2, stage: Production
run_id: e893c918d629437fb2b741668fdd7347, version: 6, stage: None


In [100]:
model_version = "4"
model_stage = "Staging"
client.transition_model_version_stage(name=model_name, 
                                      version=model_version, 
                                      stage=model_stage)

<ModelVersion: creation_timestamp=1757371072414, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2025-09-09', last_updated_timestamp=1757435653304, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [101]:
from datetime import datetime
dt = datetime.today().date()
client.update_model_version(name=model_name,
                            version=model_version,
                            description=f"The model version {model_version} was transitioned to {model_stage} on {dt}")

<ModelVersion: creation_timestamp=1757371072414, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2025-09-09', last_updated_timestamp=1757435655637, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [97]:
from sklearn.metrics import mean_squared_error
import pandas as pd
import pickle

def read_dataframe(filename):
    df = pd.read_parquet(filename)
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    return df

def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO'] #['PULocationID', 'DOLocationID']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {f"rmse: {mean_squared_error(y_pred, y_test, squared=False)}"}

In [None]:
df = read_dataframe("data/green_tripdata_2021-03.parquet")

xgboost_id = "b9c9e4ad456640799fe8910334c40142"
extratrees_id = "c0fd08f0fd0f4b50aa2a369615e61756"
randomforest_id = "e893c918d629437fb2b741668fdd7347"
gradientboosting_id = "7e1bbc9ea46741ddb27270bd6bab851e"

run_ids = [xgboost_id, extratrees_id, randomforest_id, gradientboosting_id]

preprocessor = client.download_artifacts(run_id=run_id, path="preprocessor", dst_path=".")
with open("preprocessor/preprocessor.b", "rb",) as f_in:
    dv = pickle.load(f_in)
X_test = preprocess(df, dv)
target = 'duration'
y_test = df[target].values

# pp_xgboost = client.download_artifacts(run_id=xgboost_id)
# pp_extratrees = client.download_artifacts(run_id=extratrees_id)
# pp_randomforest = client.download_artifacts(run_id=randomforest_id)
# pp_gradientboosting = client.download_artifacts(run_id=gradientboosting_id)


In [103]:
%time test_model("nyc-taxi-regressor", "Production", X_test, y_test)

CPU times: user 358 ms, sys: 96.6 ms, total: 454 ms
Wall time: 3.56 s


{'rmse: 6.659623830022514'}

In [104]:
%time test_model("nyc-taxi-regressor", "Staging", X_test, y_test)

CPU times: user 10 s, sys: 244 ms, total: 10.3 s
Wall time: 10.4 s


{'rmse: 6.884093464270582'}

In [107]:
client.transition_model_version_stage("nyc-taxi-regressor",
                                      "4",
                                      "Production",
                                      archive_existing_versions=True)

<ModelVersion: creation_timestamp=1757371072414, current_stage='Production', description='The model version 4 was transitioned to Production on 2025-09-09', last_updated_timestamp=1757442400996, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [108]:
model_version = "4"
model_stage = "Production"
dt = datetime.today().date()
client.update_model_version(name=model_name,
                            version=model_version,
                            description=f"The model version {model_version} was transitioned to {model_stage} on {dt}")

<ModelVersion: creation_timestamp=1757371072414, current_stage='Production', description='The model version 4 was transitioned to Production on 2025-09-09', last_updated_timestamp=1757442402117, name='nyc-taxi-regressor', run_id='e893c918d629437fb2b741668fdd7347', run_link=None, source='./mlruns/1/e893c918d629437fb2b741668fdd7347/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>