In [1]:
from mlflow.tracking import MlflowClient

In [2]:
mlflow_tracking_uri = "http://127.0.0.1:5000"

In [3]:
client = MlflowClient(tracking_uri=mlflow_tracking_uri)

In [5]:
client.create_experiment('my-trial-experiment')

'3'

In [4]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='2',
    filter_string='metrics.rmse <= 6.1478',
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [5]:
runs

[<Run: data=<RunData: metrics={'rmse': 6.1477211725611305}, params={'learning_rate': '0.07755880320609634',
  'max_depth': '10',
  'min_child_weight': '3.7126270987128547',
  'objective': 'reg:linear',
  'reg_alpha': '0.028088122108329037',
  'reg_lambda': '0.013125534079775656',
  'seed': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "d031ab694f7f4a76988537cd5f7594ce", '
                              '"artifact_path": "models_mlflow", '
                              '"utc_time_created": "2024-05-25 '
                              '01:27:29.975196", "flavors": {"python_function": '
                              '{"loader_module": "mlflow.xgboost", '
                              '"python_version": "3.12.3", "data": "model.xgb", '
                              '"env": {"conda": "conda.yaml", "virtualenv": '
                              '"python_env.yaml"}}, "xgboost": {"xgb_version": '
                              '"2.0.3", "data": "model.xgb", "model_class": '
               

In [45]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}" )
    

run id: d031ab694f7f4a76988537cd5f7594ce, rmse: 6.1477
run id: 8d528c7f662549438fda7ceacd003690, rmse: 6.1477
run id: c35283e3b91e4d47a6f494dc03cac887, rmse: 6.1477
run id: 1187a6ae17f14217a1f643daf7254028, rmse: 6.1477


In [17]:
import mlflow
mlflow.set_tracking_uri(mlflow_tracking_uri)

In [24]:
run_id = "d031ab694f7f4a76988537cd5f7594ce"
model_uri = f"runs:/{run_id}/model"
model_uri

'runs:/d031ab694f7f4a76988537cd5f7594ce/model'

In [25]:
mlflow.register_model(model_uri=model_uri, name = "xgboost18")

Successfully registered model 'xgboost18'.
2024/05/25 04:22:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgboost18, version 1
Created version '1' of model 'xgboost18'.


<ModelVersion: aliases=[], creation_timestamp=1716610922932, current_stage='None', description='', last_updated_timestamp=1716610922932, name='xgboost18', run_id='d031ab694f7f4a76988537cd5f7594ce', run_link='', source='mlflow-artifacts:/2/d031ab694f7f4a76988537cd5f7594ce/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [46]:
model_name = 'xgboost1'
latest_versions = client.get_latest_versions(model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Staging


  latest_versions = client.get_latest_versions(model_name)


In [47]:
client.transition_model_version_stage(
    name=model_name,
    version=1,
    stage='Staging',
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1716603396796, current_stage='Staging', description='', last_updated_timestamp=1716612195769, name='xgboost1', run_id='d031ab694f7f4a76988537cd5f7594ce', run_link='', source='mlflow-artifacts:/2/d031ab694f7f4a76988537cd5f7594ce/artifacts/models_mlflow', status='READY', status_message='', tags={}, user_id='', version='1'>

In [48]:
from datetime import datetime

dt = datetime.today().date()
dt

datetime.date(2024, 5, 25)

In [49]:

client.update_model_version(
    name=model_name,
    version=1,
    description = f"The model version 4 was transitioned to Staging on {dt}"
    
)

<ModelVersion: aliases=[], creation_timestamp=1716603396796, current_stage='Staging', description='The model version 4 was transitioned to Staging on 2024-05-25', last_updated_timestamp=1716612200892, name='xgboost1', run_id='d031ab694f7f4a76988537cd5f7594ce', run_link='', source='mlflow-artifacts:/2/d031ab694f7f4a76988537cd5f7594ce/artifacts/models_mlflow', status='READY', status_message='', tags={}, user_id='', version='1'>

In [62]:
def read_dataFrame(filename):
    df = pd.read_parquet(filename)
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds()/60)
    df = df[(df.duration >=1) & (df.duration <=60)]
    categorical = ['PULocationID','DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

def preprocessor(df,dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

def test_model(name,stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": root_mean_squared_error(y_test, y_pred)}

In [21]:
import pandas as pd
df_test = read_dataFrame('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-03.parquet')

In [51]:
client.download_artifacts(
    run_id = run_id,
    path ='preprocessor',
    dst_path = '.'
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/workspaces/mlops-zoomcamp-arun/01-intro/03-training/experiment-tracking/preprocessor'

In [59]:
import pickle
from sklearn.metrics import root_mean_squared_error

with open('preprocessor/preprocessor.b', 'rb') as f_in:
    dv = pickle.load(f_in)

In [53]:
X_test = preprocessor(df_test,dv)

In [54]:
target = 'duration'
y_test = df_test[target].values

In [63]:
%time test_model(name=model_name, stage='Staging', X_test=X_test, y_test=y_test)

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]



CPU times: user 7.56 s, sys: 72.5 ms, total: 7.63 s
Wall time: 4.66 s


{'rmse': 6.856105855294579}