In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"\

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [7]:
client.search_experiments()

[<Experiment: artifact_location='/workspaces/mlops-camp/notebook/03-experiment-tracking/mlruns/1', creation_time=1716229855130, experiment_id='1', last_update_time=1716229855130, lifecycle_stage='active', name='my-first-experiment', tags={}>]

In [8]:
client.create_experiment(name = "my_cool_experiment")

'2'

In [9]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [10]:
for run in runs:
    print(f"run id: {run.info.run_id},rmse: {run.data.metrics['rmse']:4f}")

run id: b7eed26e9ff2495f9b5e92a6deab969a,rmse: 4.965459
run id: 12d40f85651c4413974cbd05fef76db3,rmse: 4.965459
run id: 13273f90f92e4a4ba02a9518d975ac9a,rmse: 4.965459
run id: 20f2ffe1584241d7a76e8e06c54ac3b4,rmse: 4.966782
run id: d139c463f53b4287b793d96a81c36266,rmse: 4.976702


In [11]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)


In [13]:
run_id = '13273f90f92e4a4ba02a9518d975ac9a'
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri,name="first_model_xgboost")

Registered model 'first_model_xgboost' already exists. Creating a new version of this model...
Created version '1' of model 'first_model_xgboost'.


<ModelVersion: aliases=[], creation_timestamp=1716748174771, current_stage='None', description=None, last_updated_timestamp=1716748174771, name='first_model_xgboost', run_id='13273f90f92e4a4ba02a9518d975ac9a', run_link=None, source='/workspaces/mlops-camp/notebook/03-experiment-tracking/mlruns/1/13273f90f92e4a4ba02a9518d975ac9a/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [15]:
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1716748158161, description=None, last_updated_timestamp=1716748327372, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1716748327372, current_stage='None', description='', last_updated_timestamp=1716748327372, name='first_model_xgboost', run_id='b7eed26e9ff2495f9b5e92a6deab969a', run_link='', source='/workspaces/mlops-camp/notebook/03-experiment-tracking/mlruns/1/b7eed26e9ff2495f9b5e92a6deab969a/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>], name='first_model_xgboost', tags={}>]

In [22]:
model_name = "first_model_xgboost"
latest_versions =client.search_model_versions('name'==model_name)

for version in latest_versions:
    print(f"version:{version.version}, stage:{version.current_stage}")



version:2, stage:None
version:1, stage:None


In [24]:
client.transition_model_version_stage(
    name=model_name,
    version=2,
    stage='Staging',
    archive_existing_versions=True

)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1716748327372, current_stage='Staging', description='', last_updated_timestamp=1716750321652, name='first_model_xgboost', run_id='b7eed26e9ff2495f9b5e92a6deab969a', run_link='', source='/workspaces/mlops-camp/notebook/03-experiment-tracking/mlruns/1/b7eed26e9ff2495f9b5e92a6deab969a/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [27]:
from datetime import datetime
model_version=2
new_stage ='staging'
date = datetime.today().date()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1716748327372, current_stage='Staging', description='The model version 2 was transitioned to staging on 2024-05-26', last_updated_timestamp=1716751349270, name='first_model_xgboost', run_id='b7eed26e9ff2495f9b5e92a6deab969a', run_link='', source='/workspaces/mlops-camp/notebook/03-experiment-tracking/mlruns/1/b7eed26e9ff2495f9b5e92a6deab969a/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [3]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [4]:
df = read_dataframe("data/green_tripdata_2023-03.parquet")

In [5]:
df

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2023-03-01 00:25:10,2023-03-01 00:35:47,N,1.0,82,196,1.0,2.36,13.50,...,0.5,0.00,0.00,,1.0,16.00,2.0,1.0,0.00,10.616667
1,2,2023-03-01 00:14:29,2023-03-01 00:25:04,N,1.0,7,7,1.0,0.78,-6.50,...,-0.5,0.00,0.00,,-1.0,-9.00,3.0,1.0,0.00,10.583333
2,2,2023-03-01 00:14:29,2023-03-01 00:25:04,N,1.0,7,7,1.0,0.78,6.50,...,0.5,0.00,0.00,,1.0,9.00,3.0,1.0,0.00,10.583333
3,2,2023-02-28 22:59:46,2023-02-28 23:08:38,N,1.0,166,74,1.0,1.66,11.40,...,0.5,2.78,0.00,,1.0,16.68,1.0,1.0,0.00,8.866667
4,2,2023-03-01 00:54:03,2023-03-01 01:03:14,N,1.0,236,229,1.0,3.14,15.60,...,0.5,4.17,0.00,,1.0,25.02,1.0,1.0,2.75,9.183333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72039,2,2023-03-31 23:33:00,2023-03-31 23:45:00,,,25,144,,2.93,16.46,...,0.0,4.04,0.00,,1.0,24.25,,,,12.000000
72040,2,2023-03-31 23:27:00,2023-03-31 23:53:00,,,36,236,,8.49,34.66,...,0.0,7.68,0.00,,1.0,46.09,,,,26.000000
72041,2,2023-03-31 23:09:00,2023-03-31 23:40:00,,,42,225,,12.27,41.79,...,0.0,7.40,6.55,,1.0,56.74,,,,31.000000
72042,2,2023-03-31 23:39:00,2023-04-01 00:01:00,,,80,189,,4.42,17.82,...,0.0,1.94,0.00,,1.0,20.76,,,,22.000000
