In [39]:
!python -V

Python 3.9.21


In [10]:
!pip uninstall scikit-learn -y
!pip install scikit-learn==1.6.1

Found existing installation: scikit-learn 1.6.1
Uninstalling scikit-learn-1.6.1:
  Successfully uninstalled scikit-learn-1.6.1
Collecting scikit-learn==1.6.1
  Using cached scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl.metadata (31 kB)
Using cached scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl (12.1 MB)
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.6.1


In [1]:
import pandas as pd

In [2]:
import pickle

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import root_mean_squared_error

In [5]:
import sklearn
import sys
print("sklearn version:", sklearn.__version__)
print("python executable:", sys.executable)

sklearn version: 1.6.1
python executable: /Users/zihaozhou/.conda/envs/exp-tracking-env/bin/python


In [6]:
from sklearn.metrics import mean_squared_error
import inspect

print(mean_squared_error)
print(inspect.getfile(mean_squared_error))


<function mean_squared_error at 0x14c8c7c10>
/Users/zihaozhou/.conda/envs/exp-tracking-env/lib/python3.9/site-packages/sklearn/utils/_param_validation.py


In [1]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1743464550778, experiment_id='1', last_update_time=1743464550778, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [2]:
print(mlflow.__version__)

2.21.3


In [8]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [21]:
df_train = read_dataframe('data/green_tripdata_2023-01.parquet')
df_val = read_dataframe('data/green_tripdata_2023-02.parquet')

In [22]:
len(df_train), len(df_val)

(65946, 62574)

In [23]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [24]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [25]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [27]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mse = mean_squared_error(y_val, y_pred)**0.5
mse

6.037585607307625

In [28]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [30]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "data/green_tripdata_2023-01.parquet")
    mlflow.log_param("valid-data-path", "data/green_tripdata_2023-02.parquet")

    alpha = 0.01
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred)**0.5
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [31]:
import xgboost as xgb

In [33]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [32]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [37]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred)**0.5
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [38]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:7.78817                           
[1]	validation-rmse:6.81558                           
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[2]	validation-rmse:6.22256                           
[3]	validation-rmse:5.86902                           
[4]	validation-rmse:5.66049                           
[5]	validation-rmse:5.54098                           
[6]	validation-rmse:5.46869                           
[7]	validation-rmse:5.42044                           
[8]	validation-rmse:5.39151                           
[9]	validation-rmse:5.36926                           
[10]	validation-rmse:5.35458                          
[11]	validation-rmse:5.34420                          
[12]	validation-rmse:5.33642                          
[13]	validation-rmse:5.32968                          
[14]	validation-rmse:5.32611                          
[15]	validation-rmse:5.32057                          
[16]	validation-rmse:5.31684                          
[17]	validation-rmse:5.31311                          
[18]	validation-rmse:5.30948                          
[19]	validation-rmse:5.30819                          
[20]	valid




[0]	validation-rmse:5.85340                                                    
[1]	validation-rmse:5.45193                                                    
[2]	validation-rmse:5.39195                                                    
[3]	validation-rmse:5.38008                                                    
[4]	validation-rmse:5.36756                                                    
[5]	validation-rmse:5.36670                                                    
[6]	validation-rmse:5.36663                                                    
[7]	validation-rmse:5.35902                                                    
[8]	validation-rmse:5.35670                                                    
[9]	validation-rmse:5.35448                                                    
[10]	validation-rmse:5.34931                                                   
[11]	validation-rmse:5.34756                                                   
[12]	validation-rmse:5.33260            




[0]	validation-rmse:6.14895                                                    
[1]	validation-rmse:5.39926                                                    
[2]	validation-rmse:5.24579                                                    
[3]	validation-rmse:5.21091                                                    
[4]	validation-rmse:5.19524                                                    
[5]	validation-rmse:5.18790                                                    
[6]	validation-rmse:5.18970                                                    
[7]	validation-rmse:5.18547                                                    
[8]	validation-rmse:5.18343                                                    
[9]	validation-rmse:5.18053                                                    
[10]	validation-rmse:5.17986                                                   
[11]	validation-rmse:5.17886                                                   
[12]	validation-rmse:5.17920            




[0]	validation-rmse:7.43975                                                    
[1]	validation-rmse:6.40168                                                    
[2]	validation-rmse:5.88062                                                    
[3]	validation-rmse:5.61331                                                    
[4]	validation-rmse:5.46371                                                    
[5]	validation-rmse:5.39683                                                    
[6]	validation-rmse:5.35779                                                    
[7]	validation-rmse:5.32972                                                    
[8]	validation-rmse:5.31703                                                    
[9]	validation-rmse:5.30415                                                    
[10]	validation-rmse:5.29852                                                   
[11]	validation-rmse:5.29712                                                   
[12]	validation-rmse:5.29302            




[0]	validation-rmse:6.46189                                                    
[1]	validation-rmse:5.55834                                                    
[2]	validation-rmse:5.30971                                                    
[3]	validation-rmse:5.22657                                                    
[4]	validation-rmse:5.19807                                                    
[5]	validation-rmse:5.18567                                                    
[6]	validation-rmse:5.18183                                                    
[7]	validation-rmse:5.17781                                                    
[8]	validation-rmse:5.17866                                                    
[9]	validation-rmse:5.17708                                                    
[10]	validation-rmse:5.17739                                                   
[11]	validation-rmse:5.17641                                                   
[12]	validation-rmse:5.17662            




[7]	validation-rmse:5.99425                                                    
[8]	validation-rmse:5.87027                                                    
[9]	validation-rmse:5.77516                                                    
[10]	validation-rmse:5.70316                                                   
[11]	validation-rmse:5.64537                                                   
[12]	validation-rmse:5.60082                                                   
[13]	validation-rmse:5.56478                                                   
[14]	validation-rmse:5.53795                                                   
[15]	validation-rmse:5.51662                                                   
[16]	validation-rmse:5.49692                                                   
[17]	validation-rmse:5.48224                                                   
[18]	validation-rmse:5.46936                                                   
[19]	validation-rmse:5.45903            




[0]	validation-rmse:5.61417                                                    
[1]	validation-rmse:5.39884                                                    
[2]	validation-rmse:5.37947                                                    
[3]	validation-rmse:5.37240                                                    
[4]	validation-rmse:5.35907                                                    
[5]	validation-rmse:5.35259                                                    
[6]	validation-rmse:5.35081                                                    
[7]	validation-rmse:5.35348                                                    
[8]	validation-rmse:5.35206                                                    
[9]	validation-rmse:5.35281                                                    
[10]	validation-rmse:5.35077                                                   
[11]	validation-rmse:5.35228                                                   
[12]	validation-rmse:5.35332            




[0]	validation-rmse:8.82195                                                    
[1]	validation-rmse:8.37727                                                    
[2]	validation-rmse:7.98448                                                    
[3]	validation-rmse:7.63522                                                    
[4]	validation-rmse:7.33258                                                    
[5]	validation-rmse:7.05864                                                    
[6]	validation-rmse:6.82636                                                    
[7]	validation-rmse:6.61396                                                    
[8]	validation-rmse:6.43654                                                    
[9]	validation-rmse:6.28326                                                    
[10]	validation-rmse:6.14767                                                   
[11]	validation-rmse:6.03355                                                   
[12]	validation-rmse:5.92638            




[0]	validation-rmse:8.13889                                                    
[1]	validation-rmse:7.26726                                                    
[2]	validation-rmse:6.63912                                                    
[3]	validation-rmse:6.19529                                                    
[4]	validation-rmse:5.88652                                                    
[5]	validation-rmse:5.67336                                                    
[6]	validation-rmse:5.52804                                                    
[7]	validation-rmse:5.42904                                                    
[8]	validation-rmse:5.35926                                                    
[9]	validation-rmse:5.31023                                                    
[10]	validation-rmse:5.27668                                                   
[11]	validation-rmse:5.25342                                                   
[12]	validation-rmse:5.23737            




[0]	validation-rmse:7.53471                                                    
[1]	validation-rmse:6.51087                                                    
[2]	validation-rmse:5.94355                                                    
[3]	validation-rmse:5.63865                                                    
[4]	validation-rmse:5.46609                                                    
[5]	validation-rmse:5.38641                                                    
[6]	validation-rmse:5.33586                                                    
[7]	validation-rmse:5.29748                                                    
[8]	validation-rmse:5.28532                                                    
[9]	validation-rmse:5.28057                                                    
[10]	validation-rmse:5.27487                                                   
[11]	validation-rmse:5.26819                                                   
[12]	validation-rmse:5.26313            




[0]	validation-rmse:8.54184                                                     
[1]	validation-rmse:7.90033                                                     
[2]	validation-rmse:7.37583                                                     
[3]	validation-rmse:6.94867                                                     
[4]	validation-rmse:6.60893                                                     
[5]	validation-rmse:6.33356                                                     
[6]	validation-rmse:6.11315                                                     
[7]	validation-rmse:5.94370                                                     
[8]	validation-rmse:5.80920                                                     
[9]	validation-rmse:5.69548                                                     
[10]	validation-rmse:5.61365                                                    
[11]	validation-rmse:5.55113                                                    
[12]	validation-rmse:5.50098




[1]	validation-rmse:5.34416                                                     
[2]	validation-rmse:5.26620                                                     
[3]	validation-rmse:5.25112                                                     
[4]	validation-rmse:5.23836                                                     
[5]	validation-rmse:5.23062                                                     
[6]	validation-rmse:5.22699                                                     
[7]	validation-rmse:5.22154                                                     
[8]	validation-rmse:5.21697                                                     
[9]	validation-rmse:5.21237                                                     
[10]	validation-rmse:5.20993                                                    
[11]	validation-rmse:5.20690                                                    
[12]	validation-rmse:5.20020                                                    
[13]	validation-rmse:5.19916




[0]	validation-rmse:8.92408                                                     
[1]	validation-rmse:8.56174                                                     
[2]	validation-rmse:8.23308                                                     
[3]	validation-rmse:7.93241                                                     
[4]	validation-rmse:7.66485                                                     
[5]	validation-rmse:7.41753                                                     
[6]	validation-rmse:7.19987                                                     
[7]	validation-rmse:6.99260                                                     
[8]	validation-rmse:6.81695                                                     
[9]	validation-rmse:6.65816                                                     
[10]	validation-rmse:6.50806                                                    
[11]	validation-rmse:6.37899                                                    
[12]	validation-rmse:6.26497




[0]	validation-rmse:8.92781                                                     
[1]	validation-rmse:8.56631                                                     
[2]	validation-rmse:8.23541                                                     
[3]	validation-rmse:7.93298                                                     
[4]	validation-rmse:7.65770                                                     
[5]	validation-rmse:7.40718                                                     
[6]	validation-rmse:7.17979                                                     
[7]	validation-rmse:6.97394                                                     
[8]	validation-rmse:6.78746                                                     
[9]	validation-rmse:6.61933                                                     
[10]	validation-rmse:6.46800                                                    
[11]	validation-rmse:6.33140                                                    
[12]	validation-rmse:6.20880




[0]	validation-rmse:6.14663                                                      
[1]	validation-rmse:5.40671                                                      
[2]	validation-rmse:5.25199                                                      
[3]	validation-rmse:5.20689                                                      
[4]	validation-rmse:5.18733                                                      
[5]	validation-rmse:5.18698                                                      
[6]	validation-rmse:5.18947                                                      
[7]	validation-rmse:5.18975                                                      
[8]	validation-rmse:5.18957                                                      
[9]	validation-rmse:5.19029                                                      
[10]	validation-rmse:5.18771                                                     
[11]	validation-rmse:5.19020                                                     
[12]	validation-




[3]	validation-rmse:5.47601                                                      
[4]	validation-rmse:5.39693                                                      
[5]	validation-rmse:5.36786                                                      
[6]	validation-rmse:5.34306                                                      
[7]	validation-rmse:5.33516                                                      
[8]	validation-rmse:5.33088                                                      
[9]	validation-rmse:5.32874                                                      
[10]	validation-rmse:5.32616                                                     
[11]	validation-rmse:5.31982                                                     
[12]	validation-rmse:5.31471                                                     
[13]	validation-rmse:5.31064                                                     
[14]	validation-rmse:5.30457                                                     
[15]	validation-




[0]	validation-rmse:5.30068                                                      
[1]	validation-rmse:5.24450                                                      
[2]	validation-rmse:5.25420                                                      
[3]	validation-rmse:5.24920                                                      
[4]	validation-rmse:5.24486                                                      
[5]	validation-rmse:5.23780                                                      
[6]	validation-rmse:5.23206                                                      
[7]	validation-rmse:5.22692                                                      
[8]	validation-rmse:5.22853                                                      
[9]	validation-rmse:5.23812                                                      
[10]	validation-rmse:5.23959                                                     
[11]	validation-rmse:5.23487                                                     
[12]	validation-




[1]	validation-rmse:6.96965                                                      
[2]	validation-rmse:6.34256                                                      
[3]	validation-rmse:5.94002                                                      
[4]	validation-rmse:5.68841                                                      
[5]	validation-rmse:5.53002                                                      
[6]	validation-rmse:5.42690                                                      
[7]	validation-rmse:5.36127                                                      
[8]	validation-rmse:5.31902                                                      
[9]	validation-rmse:5.28964                                                      
[10]	validation-rmse:5.26832                                                     
[11]	validation-rmse:5.24991                                                     
[12]	validation-rmse:5.23969                                                     
[13]	validation-




[0]	validation-rmse:8.60614                                                      
[1]	validation-rmse:8.00768                                                      
[2]	validation-rmse:7.50785                                                      
[3]	validation-rmse:7.09311                                                      
[4]	validation-rmse:6.75018                                                      
[5]	validation-rmse:6.47520                                                      
[6]	validation-rmse:6.23396                                                      
[7]	validation-rmse:6.05703                                                      
[8]	validation-rmse:5.90400                                                      
[9]	validation-rmse:5.78263                                                      
[10]	validation-rmse:5.68320                                                     
[11]	validation-rmse:5.59898                                                     
[12]	validation-




[0]	validation-rmse:6.50863                                                      
[1]	validation-rmse:5.64697                                                      
[2]	validation-rmse:5.39830                                                      
[3]	validation-rmse:5.32505                                                      
[4]	validation-rmse:5.26912                                                      
[5]	validation-rmse:5.26219                                                      
[6]	validation-rmse:5.25777                                                      
[7]	validation-rmse:5.25331                                                      
[8]	validation-rmse:5.25434                                                      
[9]	validation-rmse:5.24952                                                      
[10]	validation-rmse:5.25133                                                     
[11]	validation-rmse:5.25027                                                     
[12]	validation-




[0]	validation-rmse:8.29402                                                      
[1]	validation-rmse:7.49783                                                      
[2]	validation-rmse:6.89166                                                      
[3]	validation-rmse:6.43707                                                      
[4]	validation-rmse:6.10163                                                      
[5]	validation-rmse:5.85626                                                      
[6]	validation-rmse:5.67911                                                      
[7]	validation-rmse:5.55112                                                      
[8]	validation-rmse:5.45855                                                      
[9]	validation-rmse:5.39208                                                      
[10]	validation-rmse:5.34393                                                     
[11]	validation-rmse:5.30566                                                     
[12]	validation-




[0]	validation-rmse:8.99961                                                      
[1]	validation-rmse:8.69962                                                      
[2]	validation-rmse:8.42087                                                      
[3]	validation-rmse:8.16209                                                      
[4]	validation-rmse:7.92176                                                      
[5]	validation-rmse:7.69917                                                      
[6]	validation-rmse:7.49329                                                      
[7]	validation-rmse:7.30347                                                      
[8]	validation-rmse:7.12823                                                      
[9]	validation-rmse:6.96689                                                      
[10]	validation-rmse:6.81834                                                     
[11]	validation-rmse:6.68221                                                     
[12]	validation-




[0]	validation-rmse:8.79796                                                      
[1]	validation-rmse:8.33411                                                      
[2]	validation-rmse:7.92404                                                      
[3]	validation-rmse:7.56371                                                      
[4]	validation-rmse:7.24734                                                      
[5]	validation-rmse:6.97166                                                      
[6]	validation-rmse:6.73132                                                      
[7]	validation-rmse:6.52317                                                      
[8]	validation-rmse:6.34325                                                      
[9]	validation-rmse:6.18823                                                      
[10]	validation-rmse:6.05477                                                     
[11]	validation-rmse:5.94049                                                     
[12]	validation-




[1]	validation-rmse:7.42947                                                      
[2]	validation-rmse:6.83357                                                      
[3]	validation-rmse:6.40254                                                      
[4]	validation-rmse:6.09477                                                      
[5]	validation-rmse:5.87885                                                      
[6]	validation-rmse:5.72621                                                      
[7]	validation-rmse:5.61751                                                      
[8]	validation-rmse:5.54349                                                      
[9]	validation-rmse:5.48797                                                      
[10]	validation-rmse:5.45025                                                     
[11]	validation-rmse:5.41987                                                     
[12]	validation-rmse:5.39822                                                     
[13]	validation-




[0]	validation-rmse:8.76587                                                      
[1]	validation-rmse:8.27527                                                      
[2]	validation-rmse:7.84488                                                      
[3]	validation-rmse:7.46792                                                      
[4]	validation-rmse:7.13928                                                      
[5]	validation-rmse:6.85402                                                      
[6]	validation-rmse:6.60689                                                      
[7]	validation-rmse:6.39428                                                      
[8]	validation-rmse:6.21203                                                      
[9]	validation-rmse:6.05630                                                      
[10]	validation-rmse:5.92379                                                     
[11]	validation-rmse:5.81011                                                     
[12]	validation-




[0]	validation-rmse:8.81077                                                     
[1]	validation-rmse:8.35503                                                     
[2]	validation-rmse:7.94957                                                     
[3]	validation-rmse:7.59041                                                     
[4]	validation-rmse:7.27377                                                     
[5]	validation-rmse:6.99532                                                     
[6]	validation-rmse:6.75136                                                     
[7]	validation-rmse:6.53841                                                     
[8]	validation-rmse:6.35326                                                     
[9]	validation-rmse:6.19146                                                     
[10]	validation-rmse:6.05081                                                    
[11]	validation-rmse:5.92887                                                    
[12]	validation-rmse:5.82369




[0]	validation-rmse:9.00520                                                     
[1]	validation-rmse:8.70971                                                     
[2]	validation-rmse:8.43453                                                     
[3]	validation-rmse:8.17877                                                     
[4]	validation-rmse:7.94097                                                     
[5]	validation-rmse:7.71996                                                     
[6]	validation-rmse:7.51546                                                     
[7]	validation-rmse:7.32555                                                     
[8]	validation-rmse:7.14980                                                     
[9]	validation-rmse:6.98743                                                     
[10]	validation-rmse:6.83766                                                    
[11]	validation-rmse:6.69946                                                    
[12]	validation-rmse:6.57252




[1]	validation-rmse:8.53670                                                     
[2]	validation-rmse:8.19793                                                     
[3]	validation-rmse:7.89163                                                     
[4]	validation-rmse:7.61554                                                     
[5]	validation-rmse:7.36680                                                     
[6]	validation-rmse:7.14367                                                     
[7]	validation-rmse:6.94373                                                     
[8]	validation-rmse:6.76471                                                     
[9]	validation-rmse:6.60486                                                     
[10]	validation-rmse:6.46249                                                    
[11]	validation-rmse:6.33589                                                    
[12]	validation-rmse:6.22350                                                    
[13]	validation-rmse:6.12347




[0]	validation-rmse:8.66720                                                     
[1]	validation-rmse:8.10783                                                     
[2]	validation-rmse:7.63343                                                     
[3]	validation-rmse:7.23321                                                     
[4]	validation-rmse:6.89659                                                     
[5]	validation-rmse:6.61610                                                     
[6]	validation-rmse:6.38304                                                     
[7]	validation-rmse:6.18979                                                     
[8]	validation-rmse:6.03019                                                     
[9]	validation-rmse:5.89799                                                     
[10]	validation-rmse:5.78959                                                    
[11]	validation-rmse:5.70202                                                    
[12]	validation-rmse:5.63001




[0]	validation-rmse:8.73391                                                     
[1]	validation-rmse:8.22111                                                     
[2]	validation-rmse:7.77521                                                     
[3]	validation-rmse:7.38974                                                     
[4]	validation-rmse:7.05776                                                     
[5]	validation-rmse:6.77380                                                     
[6]	validation-rmse:6.53180                                                     
[7]	validation-rmse:6.32612                                                     
[8]	validation-rmse:6.15232                                                     
[9]	validation-rmse:6.00552                                                     
[10]	validation-rmse:5.88201                                                    
[11]	validation-rmse:5.77821                                                    
[12]	validation-rmse:5.69079




[2]	validation-rmse:8.32061                                                     
[3]	validation-rmse:8.04152                                                     
[4]	validation-rmse:7.78661                                                     
[5]	validation-rmse:7.55446                                                     
[6]	validation-rmse:7.34305                                                     
[7]	validation-rmse:7.15114                                                     
[8]	validation-rmse:6.97706                                                     
[9]	validation-rmse:6.81942                                                     
[10]	validation-rmse:6.67691                                                    
[11]	validation-rmse:6.54804                                                    
[12]	validation-rmse:6.43203                                                    
[13]	validation-rmse:6.32768                                                    
[14]	validation-rmse:6.23397




[0]	validation-rmse:8.34762                                                     
[1]	validation-rmse:7.57717                                                     
[2]	validation-rmse:6.97674                                                     
[3]	validation-rmse:6.51551                                                     
[4]	validation-rmse:6.16747                                                     
[5]	validation-rmse:5.90730                                                     
[6]	validation-rmse:5.71234                                                     
[7]	validation-rmse:5.57081                                                     
[8]	validation-rmse:5.46590                                                     
[9]	validation-rmse:5.38785                                                     
[10]	validation-rmse:5.33036                                                    
[11]	validation-rmse:5.28970                                                    
[12]	validation-rmse:5.25882




[0]	validation-rmse:8.87073                                                     
[1]	validation-rmse:8.46227                                                     
[2]	validation-rmse:8.09342                                                     
[3]	validation-rmse:7.76199                                                     
[4]	validation-rmse:7.46419                                                     
[5]	validation-rmse:7.19694                                                     
[6]	validation-rmse:6.95875                                                     
[7]	validation-rmse:6.74582                                                     
[8]	validation-rmse:6.55677                                                     
[9]	validation-rmse:6.38938                                                     
[10]	validation-rmse:6.24101                                                    
[11]	validation-rmse:6.11012                                                    
[12]	validation-rmse:5.99489




[0]	validation-rmse:8.70034                                                     
[1]	validation-rmse:8.16357                                                     
[2]	validation-rmse:7.69928                                                     
[3]	validation-rmse:7.30354                                                     
[4]	validation-rmse:6.96159                                                     
[5]	validation-rmse:6.67349                                                     
[6]	validation-rmse:6.43192                                                     
[7]	validation-rmse:6.22250                                                     
[8]	validation-rmse:6.05262                                                     
[9]	validation-rmse:5.90428                                                     
[10]	validation-rmse:5.78278                                                    
[11]	validation-rmse:5.68329                                                    
[12]	validation-rmse:5.59791




[1]	validation-rmse:6.98693                                                     
[2]	validation-rmse:6.35637                                                     
[3]	validation-rmse:5.94964                                                     
[4]	validation-rmse:5.68803                                                     
[5]	validation-rmse:5.52183                                                     
[6]	validation-rmse:5.41704                                                     
[7]	validation-rmse:5.34835                                                     
[8]	validation-rmse:5.30211                                                     
[9]	validation-rmse:5.27094                                                     
[10]	validation-rmse:5.25196                                                    
[11]	validation-rmse:5.23560                                                    
[12]	validation-rmse:5.22333                                                    
[13]	validation-rmse:5.21604




[0]	validation-rmse:8.86158                                                     
[1]	validation-rmse:8.44751                                                     
[2]	validation-rmse:8.07623                                                     
[3]	validation-rmse:7.74329                                                     
[4]	validation-rmse:7.44422                                                     
[5]	validation-rmse:7.17802                                                     
[6]	validation-rmse:6.94148                                                     
[7]	validation-rmse:6.72921                                                     
[8]	validation-rmse:6.54344                                                     
[9]	validation-rmse:6.37700                                                     
[10]	validation-rmse:6.23074                                                    
[11]	validation-rmse:6.10270                                                    
[12]	validation-rmse:5.98743




[0]	validation-rmse:8.99869                                                     
[1]	validation-rmse:8.69774                                                     
[2]	validation-rmse:8.41917                                                     
[3]	validation-rmse:8.16018                                                     
[4]	validation-rmse:7.91886                                                     
[5]	validation-rmse:7.69569                                                     
[6]	validation-rmse:7.48864                                                     
[7]	validation-rmse:7.29774                                                     
[8]	validation-rmse:7.12159                                                     
[9]	validation-rmse:6.95766                                                     
[10]	validation-rmse:6.80956                                                    
[11]	validation-rmse:6.67160                                                    
[12]	validation-rmse:6.54315




[0]	validation-rmse:8.48691                                                     
[1]	validation-rmse:7.80037                                                     
[2]	validation-rmse:7.24200                                                     
[3]	validation-rmse:6.79286                                                     
[4]	validation-rmse:6.43385                                                     
[5]	validation-rmse:6.15108                                                     
[6]	validation-rmse:5.92855                                                     
[7]	validation-rmse:5.75489                                                     
[8]	validation-rmse:5.62028                                                     
[9]	validation-rmse:5.51757                                                     
[10]	validation-rmse:5.43927                                                    
[11]	validation-rmse:5.37705                                                    
[12]	validation-rmse:5.32990




[1]	validation-rmse:6.63522                                                     
[2]	validation-rmse:6.04797                                                     
[3]	validation-rmse:5.71642                                                     
[4]	validation-rmse:5.53495                                                     
[5]	validation-rmse:5.42997                                                     
[6]	validation-rmse:5.37173                                                     
[7]	validation-rmse:5.33337                                                     
[8]	validation-rmse:5.31020                                                     
[9]	validation-rmse:5.29630                                                     
[10]	validation-rmse:5.28470                                                    
[11]	validation-rmse:5.27621                                                    
[12]	validation-rmse:5.27007                                                    
[13]	validation-rmse:5.26291




[4]	validation-rmse:5.45813                                                     
[5]	validation-rmse:5.40563                                                     
[6]	validation-rmse:5.37843                                                     
[7]	validation-rmse:5.36365                                                     
[8]	validation-rmse:5.35275                                                     
[9]	validation-rmse:5.34709                                                     
[10]	validation-rmse:5.34130                                                    
[11]	validation-rmse:5.33646                                                    
[12]	validation-rmse:5.32955                                                    
[13]	validation-rmse:5.32606                                                    
[14]	validation-rmse:5.31815                                                    
[15]	validation-rmse:5.31676                                                    
[16]	validation-rmse:5.30960




[0]	validation-rmse:8.42713                                                     
[1]	validation-rmse:7.71961                                                     
[2]	validation-rmse:7.16338                                                     
[3]	validation-rmse:6.71992                                                     
[4]	validation-rmse:6.38591                                                     
[5]	validation-rmse:6.12313                                                     
[6]	validation-rmse:5.92355                                                     
[7]	validation-rmse:5.76891                                                     
[8]	validation-rmse:5.65760                                                     
[9]	validation-rmse:5.57226                                                     
[10]	validation-rmse:5.50135                                                    
[11]	validation-rmse:5.45208                                                    
[12]	validation-rmse:5.41458




[0]	validation-rmse:8.09369                                                     
[1]	validation-rmse:7.20339                                                     
[2]	validation-rmse:6.57405                                                     
[3]	validation-rmse:6.13877                                                     
[4]	validation-rmse:5.84410                                                     
[5]	validation-rmse:5.64452                                                     
[6]	validation-rmse:5.51192                                                     
[7]	validation-rmse:5.42368                                                     
[8]	validation-rmse:5.35936                                                     
[9]	validation-rmse:5.31648                                                     
[10]	validation-rmse:5.28790                                                    
[11]	validation-rmse:5.26759                                                    
[12]	validation-rmse:5.25302




[0]	validation-rmse:8.59573                                                     
[1]	validation-rmse:7.98580                                                     
[2]	validation-rmse:7.47397                                                     
[3]	validation-rmse:7.04915                                                     
[4]	validation-rmse:6.69859                                                     
[5]	validation-rmse:6.40777                                                     
[6]	validation-rmse:6.17676                                                     
[7]	validation-rmse:5.98379                                                     
[8]	validation-rmse:5.82717                                                     
[9]	validation-rmse:5.70257                                                     
[10]	validation-rmse:5.60102                                                    
[11]	validation-rmse:5.51915                                                    
[12]	validation-rmse:5.45895




[0]	validation-rmse:8.96149                                                     
[1]	validation-rmse:8.62728                                                     
[2]	validation-rmse:8.31914                                                     
[3]	validation-rmse:8.03502                                                     
[4]	validation-rmse:7.77361                                                     
[5]	validation-rmse:7.53356                                                     
[6]	validation-rmse:7.31346                                                     
[7]	validation-rmse:7.11176                                                     
[8]	validation-rmse:6.92713                                                     
[9]	validation-rmse:6.75936                                                     
[10]	validation-rmse:6.60579                                                    
[11]	validation-rmse:6.46718                                                    
[12]	validation-rmse:6.34040




[2]	validation-rmse:8.18430                                                     
[3]	validation-rmse:7.87697                                                     
[4]	validation-rmse:7.60109                                                     
[5]	validation-rmse:7.35382                                                     
[6]	validation-rmse:7.13293                                                     
[7]	validation-rmse:6.93580                                                     
[8]	validation-rmse:6.76038                                                     
[9]	validation-rmse:6.60461                                                     
[10]	validation-rmse:6.46592                                                    
[11]	validation-rmse:6.34376                                                    
[12]	validation-rmse:6.23540                                                    
[13]	validation-rmse:6.13976                                                    
[14]	validation-rmse:6.05539




[0]	validation-rmse:8.75190                                                     
[1]	validation-rmse:8.25251                                                     
[2]	validation-rmse:7.81542                                                     
[3]	validation-rmse:7.43602                                                     
[4]	validation-rmse:7.10675                                                     
[5]	validation-rmse:6.82331                                                     
[6]	validation-rmse:6.57866                                                     
[7]	validation-rmse:6.37034                                                     
[8]	validation-rmse:6.19239                                                     
[9]	validation-rmse:6.04067                                                     
[10]	validation-rmse:5.91083                                                    
[11]	validation-rmse:5.80076                                                    
[12]	validation-rmse:5.70727




[3]	validation-rmse:5.94839                                                    
[4]	validation-rmse:5.73745                                                    
[5]	validation-rmse:5.61409                                                    
[6]	validation-rmse:5.53558                                                    
[7]	validation-rmse:5.49057                                                    
[8]	validation-rmse:5.46416                                                    
[9]	validation-rmse:5.44357                                                    
[10]	validation-rmse:5.42967                                                   
[11]	validation-rmse:5.41967                                                   
[12]	validation-rmse:5.41551                                                   
[13]	validation-rmse:5.41147                                                   
[14]	validation-rmse:5.40869                                                   
[15]	validation-rmse:5.40694            




[1]	validation-rmse:7.95272                                                    
[2]	validation-rmse:7.43779                                                    
[3]	validation-rmse:7.01481                                                    
[4]	validation-rmse:6.67103                                                    
[5]	validation-rmse:6.39203                                                    
[6]	validation-rmse:6.16629                                                    
[7]	validation-rmse:5.98500                                                    
[8]	validation-rmse:5.83836                                                    
[9]	validation-rmse:5.72225                                                    
[10]	validation-rmse:5.62765                                                   
[11]	validation-rmse:5.55328                                                   
[12]	validation-rmse:5.49443                                                   
[13]	validation-rmse:5.44658            




[1]	validation-rmse:7.53923                                                    
[2]	validation-rmse:6.94862                                                    
[3]	validation-rmse:6.50632                                                    
[4]	validation-rmse:6.17614                                                    
[5]	validation-rmse:5.93231                                                    
[6]	validation-rmse:5.75273                                                    
[7]	validation-rmse:5.62167                                                    
[8]	validation-rmse:5.52731                                                    
[9]	validation-rmse:5.45591                                                    
[10]	validation-rmse:5.40296                                                   
[11]	validation-rmse:5.36007                                                   
[12]	validation-rmse:5.33040                                                   
[13]	validation-rmse:5.30789            




[2]	validation-rmse:5.85879                                                    
[3]	validation-rmse:5.60857                                                    
[4]	validation-rmse:5.47860                                                    
[5]	validation-rmse:5.40802                                                    
[6]	validation-rmse:5.37134                                                    
[7]	validation-rmse:5.34965                                                    
[8]	validation-rmse:5.33747                                                    
[9]	validation-rmse:5.32974                                                    
[10]	validation-rmse:5.32394                                                   
[11]	validation-rmse:5.32005                                                   
[12]	validation-rmse:5.31743                                                   
[13]	validation-rmse:5.31531                                                   
[14]	validation-rmse:5.31330            

In [47]:
mlflow.xgboost.autolog(disable=True)

In [53]:
with mlflow.start_run(nested=True):
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params= {
       "learning_rate":0.0923004677969142,
       "max_depth":43,
       "min_child_weight":3.158825903573778,
       "objective": "reg:linear",
       "reg_alpha": 0.020676403448522406,
       "reg_lambda": 0.2007432700080768,
       "seed": 42
    }

    # mlflow.xgboost.autolog()
    
    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred)**0.5
    mlflow.log_metric("rmse", rmse)


with open("models/preprocessor.b", "wb") as f_out:
    pickle.dump(dv, f_out)  # store the dv for future reuse
    
mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



# mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



[0]	validation-rmse:8.75190
[1]	validation-rmse:8.25251
[2]	validation-rmse:7.81542
[3]	validation-rmse:7.43602
[4]	validation-rmse:7.10675
[5]	validation-rmse:6.82331
[6]	validation-rmse:6.57866
[7]	validation-rmse:6.37034
[8]	validation-rmse:6.19239
[9]	validation-rmse:6.04067
[10]	validation-rmse:5.91083
[11]	validation-rmse:5.80076
[12]	validation-rmse:5.70727
[13]	validation-rmse:5.62761
[14]	validation-rmse:5.56129
[15]	validation-rmse:5.50389
[16]	validation-rmse:5.45596
[17]	validation-rmse:5.41620
[18]	validation-rmse:5.38238
[19]	validation-rmse:5.35287
[20]	validation-rmse:5.32736
[21]	validation-rmse:5.30604
[22]	validation-rmse:5.28679
[23]	validation-rmse:5.27148
[24]	validation-rmse:5.25761
[25]	validation-rmse:5.24528
[26]	validation-rmse:5.23584
[27]	validation-rmse:5.22704
[28]	validation-rmse:5.22049
[29]	validation-rmse:5.21424
[30]	validation-rmse:5.20864
[31]	validation-rmse:5.20435
[32]	validation-rmse:5.20088
[33]	validation-rmse:5.19687
[34]	validation-rmse:5.1



TypeError: cannot pickle 'module' object

In [70]:
with mlflow.start_run():
    

   best_params= {"objective": 'reg:squarederror',
                 "n_estimators" :100,
                 "learning_rate":0.0923004677969142,
                 "max_depth":40,
                 "min_child_weight":3.158825903573778,
                 "objective": "reg:linear",
                 "reg_alpha": 0.020676403448522406,
                 "reg_lambda": 0.2007432700080768,
                 "seed": 42
                 }


   xgb_regressor = xgb.XGBRegressor(**best_params)

   # mlflow.xgboost.autolog()
   
   xgb_regressor.fit(X_train, y_train)
   y_pred = xgb_regressor.predict(X_val)

   # Evaluate the model

   rmse = mean_squared_error(y_val, y_pred)**0.5
   mlflow.log_metric("rmse", rmse)
   with open("models/xgb.bin", "wb") as f_out:
      pickle.dump((dv,xgb_regressor), f_out)  

mlflow.log_artifact("models/xgb.bin", artifact_path="models_pickle")




In [59]:
#load model from saved artifact 
logged_model ='runs:/c5fd346b28394d5faf39d881a163b7b4/models_mlflow'
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [60]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: c5fd346b28394d5faf39d881a163b7b4

In [72]:
xgboost_model = mlflow.xgboost.load_model(logged_model)
xgboost_model

<xgboost.core.Booster at 0x15ff72820>

In [88]:
y_pred= xgboost_model.predict(valid)



In [89]:
y_pred[:3]

array([22.335491, 20.831606, 24.71197 ], dtype=float32)

## Options 2 

In [None]:
#if we use artifact instead
artifact_path = "models_pickle/xgb.bin"
run_id = "50215fbe793a49f7bc383d7c5a347586"
local_path = mlflow.artifacts.download_artifacts(
    run_id=run_id,
    artifact_path=artifact_path
    )

In [80]:
# Load with XGBoost
with open(local_path, "rb") as f:
    model = pickle.load(f)

In [85]:
dv, xgb_model = model 

In [91]:

y_pred_bin= xgb_model.predict(X_val)

In [92]:
y_pred_bin[:3]

array([21.646782, 21.360643, 24.527328], dtype=float32)

In [79]:
import os
print("File path:", local_path)
print("File size:", os.path.getsize(local_path), "bytes")

File path: /Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/50215fbe793a49f7bc383d7c5a347586/artifacts/models_pickle/xgb.bin
File size: 4387188 bytes


##  Log serveral models  for  prepairation of model registry 

In [153]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()
dv = DictVectorizer()
train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "data/green_tripdata_2023-01.parquet")
        mlflow.log_param("valid-data-path", "data/green_tripdata_2023-02.parquet")
    

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred)**0.5
        mlflow.log_metric("rmse", rmse)
        with open("models/preprocessor.b", "wb") as f_out:
            pickle.dump((dv,mlmodel), f_out)  

        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [97]:
from mlflow.tracking import MlflowClient
mlflow_tracking_uri = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=mlflow_tracking_uri)

In [100]:
experiments = client.search_experiments()
for exp in experiments:
    print (f'Nane: {exp.name} , ID:{exp.experiment_id}, artifact location :{exp.artifact_location}')

Nane: nyc-taxi-experiment , ID:1, artifact location :/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1
Nane: Default , ID:0, artifact location :mlflow-artifacts:/0


In [105]:
from mlflow.entities import ViewType
runs = client.search_runs(experiment_ids='1',
                          filter_string='',
                          run_view_type= ViewType.ACTIVE_ONLY,
                          max_results=5,
                          order_by=['metrics.rmse ASC']
                          )
for run in runs:
    print(f"run id: {run.info.run_id}, rmse : {run.data.metrics['rmse']:.4f}")

run id: 23b3fda0ae8a483c9cd0f59389d7a4e4, rmse : 5.1147
run id: f10256a9713c443e977523ac4a695a1d, rmse : 5.1147
run id: 1cbcfc48cbd24c7ab4afab6656c2c242, rmse : 5.1147
run id: f9610d3a40c64bdfadaab62e54ba8eb0, rmse : 5.1147
run id: 9388a15b64f443c695cd88f8fb136afa, rmse : 5.1147


In [107]:
from mlflow.entities import ViewType
runs = client.search_runs(experiment_ids='1',
                          filter_string='metric.rmse < 5.12',
                          run_view_type= ViewType.ACTIVE_ONLY,
                          max_results=10,
                          order_by=['metrics.rmse ASC']
                          )
for run in runs:
    print(f"run id: {run.info.run_id}, rmse : {run.data.metrics['rmse']:.4f}")

run id: 23b3fda0ae8a483c9cd0f59389d7a4e4, rmse : 5.1147
run id: f10256a9713c443e977523ac4a695a1d, rmse : 5.1147
run id: 1cbcfc48cbd24c7ab4afab6656c2c242, rmse : 5.1147
run id: f9610d3a40c64bdfadaab62e54ba8eb0, rmse : 5.1147
run id: 9388a15b64f443c695cd88f8fb136afa, rmse : 5.1147
run id: 97fbf6c3cba14d4e9a64df93b2c47593, rmse : 5.1157


In [122]:
# next to find models hving lowest rmse and duration 
run_infos = []
runs = client.search_runs(experiment_ids='1',
                          filter_string='',
                          run_view_type= ViewType.ACTIVE_ONLY,
                          max_results=10,
                          order_by=['metrics.rmse ASC']
                          )
for run in runs:
    rmse = run.data.metrics.get("rmse")
    start = run.info.start_time
    end = run.info.end_time
    if rmse is not None and start is not None and end is not None:
        duration = (end - start) / 1000  # Convert ms to seconds
        run_infos.append({
            "run_id": run.info.run_id,
            "rmse": rmse,
            "duration_sec": duration
        })

run_infos

[{'run_id': '23b3fda0ae8a483c9cd0f59389d7a4e4',
  'rmse': 5.11472819026649,
  'duration_sec': 33.625},
 {'run_id': 'f10256a9713c443e977523ac4a695a1d',
  'rmse': 5.11472819026649,
  'duration_sec': 33.245},
 {'run_id': '1cbcfc48cbd24c7ab4afab6656c2c242',
  'rmse': 5.11472819026649,
  'duration_sec': 43.873},
 {'run_id': 'f9610d3a40c64bdfadaab62e54ba8eb0',
  'rmse': 5.11472819026649,
  'duration_sec': 36.964},
 {'run_id': '9388a15b64f443c695cd88f8fb136afa',
  'rmse': 5.11472819026649,
  'duration_sec': 262.577},
 {'run_id': '97fbf6c3cba14d4e9a64df93b2c47593',
  'rmse': 5.115681014129676,
  'duration_sec': 18.229},
 {'run_id': '4b784513b8e746489ae29d9e632dd551',
  'rmse': 5.12187644499111,
  'duration_sec': 13.135},
 {'run_id': 'd0a87225223f4dc88837b3c9e74289b7',
  'rmse': 5.126511500125477,
  'duration_sec': 26.849},
 {'run_id': 'f39a48180fa74a54a0652fe896686c32',
  'rmse': 5.128890829962695,
  'duration_sec': 25.31},
 {'run_id': 'b9c4d8c6330a4f7aa10338363e9fd7de',
  'rmse': 5.1293643771

In [125]:
#sort 
sorted_runs = sorted(
    run_infos,
    key=lambda x: (x["rmse"], x["duration_sec"]))[:3]  # First by rmse, then duration


In [126]:
sorted_runs 

[{'run_id': 'f10256a9713c443e977523ac4a695a1d',
  'rmse': 5.11472819026649,
  'duration_sec': 33.245},
 {'run_id': '23b3fda0ae8a483c9cd0f59389d7a4e4',
  'rmse': 5.11472819026649,
  'duration_sec': 33.625},
 {'run_id': 'f9610d3a40c64bdfadaab62e54ba8eb0',
  'rmse': 5.11472819026649,
  'duration_sec': 36.964}]

In [130]:
# I would register the 3 models with least rsme and duration 
for i in sorted_runs:
    model_uri = f"runs:/{i.get('run_id')}/model"
    mlflow.register_model(model_uri=model_uri,)name='nyc-taxi-regiessor'

Registered model 'nyc-taxi-regiessor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regiessor'.
Registered model 'nyc-taxi-regiessor' already exists. Creating a new version of this model...
Created version '4' of model 'nyc-taxi-regiessor'.
Registered model 'nyc-taxi-regiessor' already exists. Creating a new version of this model...
Created version '5' of model 'nyc-taxi-regiessor'.


In [137]:
# let check what models we have under registry
all_versions = client.search_model_versions(filter_string= "name='nyc-taxi-regiessor'")

for v in all_versions:
    print(f"Version: {v.version}, Stage: {v.current_stage}, Status: {v.status}, Aliases: {v.aliases}")

Version: 5, Stage: None, Status: READY, Aliases: []
Version: 4, Stage: None, Status: READY, Aliases: []
Version: 3, Stage: None, Status: READY, Aliases: []
Version: 2, Stage: None, Status: READY, Aliases: []
Version: 1, Stage: None, Status: READY, Aliases: []


In [142]:
client.transition_model_version_stage(name= 'nyc-taxi-regiessor',
                                      version=3,
                                      stage='production',
                                      archive_existing_versions=False)


  client.transition_model_version_stage(name= 'nyc-taxi-regiessor',


<ModelVersion: aliases=[], creation_timestamp=1743718988470, current_stage='Production', description=None, last_updated_timestamp=1743721981238, name='nyc-taxi-regiessor', run_id='f10256a9713c443e977523ac4a695a1d', run_link=None, source='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/f10256a9713c443e977523ac4a695a1d/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [143]:
client.transition_model_version_stage(name= 'nyc-taxi-regiessor',
                                      version=4,
                                      stage='staging',
                                      archive_existing_versions=False)

  client.transition_model_version_stage(name= 'nyc-taxi-regiessor',


<ModelVersion: aliases=[], creation_timestamp=1743718988487, current_stage='Staging', description=None, last_updated_timestamp=1743722134451, name='nyc-taxi-regiessor', run_id='23b3fda0ae8a483c9cd0f59389d7a4e4', run_link=None, source='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/23b3fda0ae8a483c9cd0f59389d7a4e4/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [144]:
# we can also update the description 
client.update_registered_model(name='nyc-taxi-regiessor',
                               description='This is the registry for ny taxi fare project'
                               )

<RegisteredModel: aliases={'challenger': 1, 'champion': 2}, creation_timestamp=1743650476176, description='This is the registry for ny taxi fare project', last_updated_timestamp=1743722673960, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1743718988498, current_stage='None', description=None, last_updated_timestamp=1743718988498, name='nyc-taxi-regiessor', run_id='f9610d3a40c64bdfadaab62e54ba8eb0', run_link=None, source='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/f9610d3a40c64bdfadaab62e54ba8eb0/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=5>,
 <ModelVersion: aliases=[], creation_timestamp=1743718988470, current_stage='Production', description=None, last_updated_timestamp=1743721981238, name='nyc-taxi-regiessor', run_id='f10256a9713c443e977523ac4a695a1d', run_link=None, source='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/f10256a9713c443e977523ac4a695a1d/artifacts/model', s

In [145]:
#we can also update the description for each version of model 
client.update_model_version(name='nyc-taxi-regiessor',
                            version= 3,
                            description='The model is transit to production'
                            )

<ModelVersion: aliases=[], creation_timestamp=1743718988470, current_stage='Production', description='The model is transit to production', last_updated_timestamp=1743722789499, name='nyc-taxi-regiessor', run_id='f10256a9713c443e977523ac4a695a1d', run_link=None, source='/Users/zihaozhou/VS_code/mlops-zoomcamp/02-experiment-tracking/mlruns/1/f10256a9713c443e977523ac4a695a1d/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

### Comparing versions and selecting the new "Production" model

In the last section, we will retrieve models registered in the model registry and compare their performance on an unseen test set. The idea is to simulate the scenario in which a deployment engineer has to interact with the model registry to decide whether to update the model version that is in production or not.

These are the steps:

1. Load the test dataset, which corresponds to the NYC Green Taxi data from the month of March 2021.
2. Download the `DictVectorizer` that was fitted using the training data and saved to MLflow as an artifact, and load it with pickle.
3. Preprocess the test set using the `DictVectorizer` so we can properly feed the regressors.
4. Make predictions on the test set using the model versions that are currently in the "Staging" and "Production" stages, and compare their performance.
5. Based on the results, update the "Production" model version accordingly.


**Note: the model registry doesn't actually deploy the model to production when you transition a model to the "Production" stage, it just assign a label to that model version. You should complement the registry with some CI/CD code that does the actual deployment.**

In [147]:
from sklearn.metrics import root_mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": root_mean_squared_error(y_test, y_pred)}

In [148]:
df = read_dataframe('data/green_tripdata_2023-03.parquet')

In [154]:
with open('models/preprocessor.b','rb') as f:
    dv,model = pickle.load(f)

In [155]:
X_test = preprocess(df,dv)

In [156]:
target = "duration"
y_test = df[target].values

In [157]:
%time test_model(name='nyc-taxi-regiessor', stage="Production", X_test=X_test, y_test=y_test)

 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: user 16.5 s, sys: 391 ms, total: 16.9 s
Wall time: 1.89 s


{'rmse': 5.344813683865913}

In [158]:
%time test_model(name='nyc-taxi-regiessor', stage="Staging", X_test=X_test, y_test=y_test)

 - mlflow (current: 2.21.3, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: user 15.8 s, sys: 292 ms, total: 16.1 s
Wall time: 1.78 s


{'rmse': 5.344813683865913}