In [38]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType
import mlflow

import pandas as pd

from sklearn.metrics import mean_squared_error
from sklearn.feature_extraction import DictVectorizer

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [7]:
client.create_experiment("my-experiment")

'2'

In [24]:
runs = client.search_runs(
  experiment_ids=["1"],
  filter_string="",
  run_view_type=ViewType.ACTIVE_ONLY,
  max_results=5,
  order_by=["metrics.rmse ASC"])

In [25]:
runs[-1].info.run_id

'bcf573a33d044be69956d57596b80fd9'

In [27]:
runs[-1].data.metrics["rmse"]

6.436693422952134

In [20]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [29]:
model_uri = f"runs:/{runs[-1].info.run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '2' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1716932798435, current_stage='None', description=None, last_updated_timestamp=1716932798435, name='nyc-taxi-regressor', run_id='bcf573a33d044be69956d57596b80fd9', run_link=None, source='/Users/bastienwinant/Desktop/projects/mlops-zoomcamp/experiment-tracking/mlruns/1/bcf573a33d044be69956d57596b80fd9/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [31]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

  latest_versions = client.get_latest_versions(name=model_name)


In [33]:
client.transition_model_version_stage(
  name=model_name,
  version=latest_versions[0].version,
  stage="Staging",
  archive_existing_versions=False)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1716932798435, current_stage='Staging', description=None, last_updated_timestamp=1716932962609, name='nyc-taxi-regressor', run_id='bcf573a33d044be69956d57596b80fd9', run_link=None, source='/Users/bastienwinant/Desktop/projects/mlops-zoomcamp/experiment-tracking/mlruns/1/bcf573a33d044be69956d57596b80fd9/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [35]:
def read_dataframe(filename):
  df = pd.read_parquet(filename)

  df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
  df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

  df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
  df.duration = df.duration.dt.total_seconds() / 60

  df = df.loc[(df.duration >= 1) & (df.duration <= 60)]

  categorical = ['PULocationID', 'DOLocationID']
  df[categorical] = df[categorical].astype('category')

  return df

def preprocess(df, dv):
  df['PU_DO'] = df['PULocationID'].astype(str) + "_" + df['DOLocationID'].astype(str)
  categorical = ['PULocationID', 'DOLocationID', 'PU_DO']
  numerical = ['trip_distance']

  train_dicts = df[categorical + numerical].to_dict(orient='records')
  return dv.transform(train_dicts)

def test_model(stage, X_test, y_test, name="nyc-taxi-regressor"):
  model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
  predictions = model.predict(X_test)
  rmse = mean_squared_error(y_test, predictions, squared=False)
  return {"rmse": rmse}

In [37]:
ead_dataframe('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2024-01-01 00:46:55,2024-01-01 00:58:25,N,1.0,236,239,1.0,1.98,12.80,...,0.5,3.61,0.0,,1.0,21.66,1.0,1.0,2.75,11.500000
1,2,2024-01-01 00:31:42,2024-01-01 00:52:34,N,1.0,65,170,5.0,6.54,30.30,...,0.5,7.11,0.0,,1.0,42.66,1.0,1.0,2.75,20.866667
2,2,2024-01-01 00:30:21,2024-01-01 00:49:23,N,1.0,74,262,1.0,3.08,19.80,...,0.5,3.00,0.0,,1.0,28.05,1.0,1.0,2.75,19.033333
3,1,2024-01-01 00:30:20,2024-01-01 00:42:12,N,1.0,74,116,1.0,2.40,14.20,...,1.5,0.00,0.0,,1.0,16.70,2.0,1.0,0.00,11.866667
4,2,2024-01-01 00:32:38,2024-01-01 00:43:37,N,1.0,74,243,1.0,5.14,22.60,...,0.5,6.28,0.0,,1.0,31.38,1.0,1.0,0.00,10.983333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56546,2,2024-01-31 20:46:00,2024-01-31 20:55:00,,,33,25,,0.00,11.58,...,0.0,3.14,0.0,,1.0,15.72,,,,9.000000
56547,2,2024-01-31 21:06:00,2024-01-31 21:11:00,,,72,72,,0.49,11.58,...,0.0,0.00,0.0,,1.0,12.58,,,,5.000000
56548,2,2024-01-31 21:36:00,2024-01-31 21:40:00,,,72,72,,0.52,11.58,...,0.0,2.52,0.0,,1.0,15.10,,,,4.000000
56549,2,2024-01-31 22:45:00,2024-01-31 22:51:00,,,41,42,,1.17,14.22,...,0.0,0.00,0.0,,1.0,15.22,,,,6.000000
