In [2]:
!python -V

Python 3.9.19


In [3]:
import pandas as pd

In [4]:
import pickle

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error

In [7]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-Experiment-tracking/mlruns/1', creation_time=1726570807518, experiment_id='1', last_update_time=1726570807518, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [8]:
def read_dataframe(filename):
    df = pd.read_csv(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [9]:
df_train = read_dataframe('./data/green_tripdata_2023-01.csv')
df_val = read_dataframe('./data/green_tripdata_2023-02.csv')

  df = pd.read_csv(filename)


In [10]:
len(df_train), len(df_val)

(65946, 62574)

In [11]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [12]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [13]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [14]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



np.float64(6.037803764245777)

In [15]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [23]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2023-01.csv")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2023-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")



In [17]:
import xgboost as xgb

In [18]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [19]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [20]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [23]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:8.61299                           
[1]	validation-rmse:8.01900                           
[2]	validation-rmse:7.52461                           
[3]	validation-rmse:7.11607                           
[4]	validation-rmse:6.78018                           
[5]	validation-rmse:6.50608                           
[6]	validation-rmse:6.28445                           
[7]	validation-rmse:6.10537                           
[8]	validation-rmse:5.96202                           
[9]	validation-rmse:5.84661                           
[10]	validation-rmse:5.75391                          
[11]	validation-rmse:5.67745                          
[12]	validation-rmse:5.61782                          
[13]	validation-rmse:5.56949                          
[14]	validation-rmse:5.52962                          
[15]	validation-rmse:5.49889                          
[16]	validation-rmse:5.47407                          
[17]	validation-rmse:5.45360                          
[18]	valid





[0]	validation-rmse:8.25517                                                        
[1]	validation-rmse:7.43436                                                        
[2]	validation-rmse:6.81593                                                        
[3]	validation-rmse:6.35783                                                        
[4]	validation-rmse:6.02007                                                        
[5]	validation-rmse:5.77782                                                        
[6]	validation-rmse:5.60235                                                        
[7]	validation-rmse:5.47707                                                        
[8]	validation-rmse:5.38814                                                        
[9]	validation-rmse:5.32723                                                        
[10]	validation-rmse:5.28184                                                       
[11]	validation-rmse:5.25049                                                





[0]	validation-rmse:8.17513                                                      
[1]	validation-rmse:7.33599                                                      
[2]	validation-rmse:6.73746                                                      
[3]	validation-rmse:6.31013                                                      
[4]	validation-rmse:6.01384                                                      
[5]	validation-rmse:5.80466                                                      
[6]	validation-rmse:5.66148                                                      
[7]	validation-rmse:5.56685                                                      
[8]	validation-rmse:5.49489                                                      
[9]	validation-rmse:5.44319                                                      
[10]	validation-rmse:5.41024                                                     
[11]	validation-rmse:5.38628                                                     
[12]	validation-





[0]	validation-rmse:5.61898                                                    
[1]	validation-rmse:5.25227                                                    
[2]	validation-rmse:5.20660                                                    
[3]	validation-rmse:5.19231                                                    
[4]	validation-rmse:5.19135                                                    
[5]	validation-rmse:5.18781                                                    
[6]	validation-rmse:5.18361                                                    
[7]	validation-rmse:5.18273                                                    
[8]	validation-rmse:5.18421                                                    
[9]	validation-rmse:5.18346                                                    
[10]	validation-rmse:5.18608                                                   
[11]	validation-rmse:5.18373                                                   
[12]	validation-rmse:5.18686            





[0]	validation-rmse:6.89815                                                    
[1]	validation-rmse:5.91661                                                    
[2]	validation-rmse:5.53909                                                    
[3]	validation-rmse:5.37273                                                    
[4]	validation-rmse:5.33194                                                    
[5]	validation-rmse:5.31085                                                    
[6]	validation-rmse:5.29216                                                    
[7]	validation-rmse:5.28870                                                    
[8]	validation-rmse:5.28556                                                    
[9]	validation-rmse:5.28178                                                    
[10]	validation-rmse:5.27825                                                   
[11]	validation-rmse:5.27307                                                   
[12]	validation-rmse:5.27206            





[0]	validation-rmse:8.03507                                                    
[1]	validation-rmse:7.14544                                                    
[2]	validation-rmse:6.54685                                                    
[3]	validation-rmse:6.15243                                                    
[4]	validation-rmse:5.89441                                                    
[5]	validation-rmse:5.72944                                                    
[6]	validation-rmse:5.62157                                                    
[7]	validation-rmse:5.55065                                                    
[8]	validation-rmse:5.50286                                                    
[9]	validation-rmse:5.47067                                                    
[10]	validation-rmse:5.44558                                                   
[11]	validation-rmse:5.42642                                                   
[12]	validation-rmse:5.41162            





[0]	validation-rmse:6.87863                                                    
[1]	validation-rmse:5.93934                                                    
[2]	validation-rmse:5.61541                                                    
[3]	validation-rmse:5.49569                                                    
[4]	validation-rmse:5.43477                                                    
[5]	validation-rmse:5.40308                                                    
[6]	validation-rmse:5.38154                                                    
[7]	validation-rmse:5.36294                                                    
[8]	validation-rmse:5.34777                                                    
[9]	validation-rmse:5.34154                                                    
[10]	validation-rmse:5.34006                                                   
[11]	validation-rmse:5.33596                                                   
[12]	validation-rmse:5.33507            





[0]	validation-rmse:5.48092                                                    
[1]	validation-rmse:5.42013                                                    
[2]	validation-rmse:5.41476                                                    
[3]	validation-rmse:5.41053                                                    
[4]	validation-rmse:5.40377                                                    
[5]	validation-rmse:5.39589                                                    
[6]	validation-rmse:5.39137                                                    
[7]	validation-rmse:5.38638                                                    
[8]	validation-rmse:5.38323                                                    
[9]	validation-rmse:5.38180                                                    
[10]	validation-rmse:5.37920                                                   
[11]	validation-rmse:5.36625                                                   
[12]	validation-rmse:5.36048            





[0]	validation-rmse:5.46022                                                    
[1]	validation-rmse:5.40151                                                    
[2]	validation-rmse:5.39057                                                    
[3]	validation-rmse:5.39097                                                    
[4]	validation-rmse:5.36962                                                    
[5]	validation-rmse:5.36686                                                    
[6]	validation-rmse:5.36474                                                    
[7]	validation-rmse:5.36844                                                    
[8]	validation-rmse:5.36805                                                    
[9]	validation-rmse:5.35932                                                    
[10]	validation-rmse:5.36180                                                   
[11]	validation-rmse:5.36457                                                   
[12]	validation-rmse:5.36752            





[0]	validation-rmse:6.51491                                                    
[1]	validation-rmse:5.68422                                                    
[2]	validation-rmse:5.45364                                                    
[3]	validation-rmse:5.37637                                                    
[4]	validation-rmse:5.34385                                                    
[5]	validation-rmse:5.32926                                                    
[6]	validation-rmse:5.31969                                                    
[7]	validation-rmse:5.31248                                                    
[8]	validation-rmse:5.30873                                                    
[9]	validation-rmse:5.30368                                                    
[10]	validation-rmse:5.29874                                                   
[11]	validation-rmse:5.29490                                                   
[12]	validation-rmse:5.29076            




[0]	validation-rmse:9.00393                                                     
[1]	validation-rmse:8.70841                                                     
 20%|██        | 10/50 [06:40<17:36, 26.41s/trial, best loss: 5.138534068751411]




[2]	validation-rmse:8.43538                                                     
[3]	validation-rmse:8.18276                                                     
[4]	validation-rmse:7.95016                                                     
[5]	validation-rmse:7.73548                                                     
[6]	validation-rmse:7.53667                                                     
[7]	validation-rmse:7.35556                                                     
[8]	validation-rmse:7.18742                                                     
[9]	validation-rmse:7.03399                                                     
[10]	validation-rmse:6.89255                                                    
[11]	validation-rmse:6.76396                                                    
[12]	validation-rmse:6.64541                                                    
[13]	validation-rmse:6.53748                                                    
[14]	validation-rmse:6.43943





[0]	validation-rmse:5.51620                                                     
[1]	validation-rmse:5.27598                                                     
[2]	validation-rmse:5.24720                                                     
[3]	validation-rmse:5.22867                                                     
[4]	validation-rmse:5.22178                                                     
[5]	validation-rmse:5.21922                                                     
[6]	validation-rmse:5.20944                                                     
[7]	validation-rmse:5.20394                                                     
[8]	validation-rmse:5.20686                                                     
[9]	validation-rmse:5.19772                                                     
[10]	validation-rmse:5.19645                                                    
[11]	validation-rmse:5.18916                                                    
[12]	validation-rmse:5.19101





[0]	validation-rmse:8.33952                                                     
[1]	validation-rmse:7.57625                                                     
[2]	validation-rmse:6.99172                                                     
[3]	validation-rmse:6.54912                                                     
[4]	validation-rmse:6.21865                                                     
[5]	validation-rmse:5.97353                                                     
[6]	validation-rmse:5.79325                                                     
[7]	validation-rmse:5.66262                                                     
[8]	validation-rmse:5.56657                                                     
[9]	validation-rmse:5.49526                                                     
[10]	validation-rmse:5.43987                                                    
[11]	validation-rmse:5.40173                                                    
[12]	validation-rmse:5.37148





[0]	validation-rmse:8.31350                                                     
[1]	validation-rmse:7.52909                                                     
[2]	validation-rmse:6.92711                                                     
[3]	validation-rmse:6.47350                                                     
[4]	validation-rmse:6.13443                                                     
[5]	validation-rmse:5.88565                                                     
[6]	validation-rmse:5.70366                                                     
[7]	validation-rmse:5.57072                                                     
[8]	validation-rmse:5.47427                                                     
[9]	validation-rmse:5.40506                                                     
[10]	validation-rmse:5.35435                                                    
[11]	validation-rmse:5.31567                                                    
[12]	validation-rmse:5.28596





[0]	validation-rmse:8.54318                                                     
[1]	validation-rmse:7.89784                                                     
[2]	validation-rmse:7.36813                                                     
[3]	validation-rmse:6.93562                                                     
[4]	validation-rmse:6.58731                                                     
[5]	validation-rmse:6.30855                                                     
[6]	validation-rmse:6.08593                                                     
[7]	validation-rmse:5.90910                                                     
[8]	validation-rmse:5.76804                                                     
[9]	validation-rmse:5.65837                                                     
[10]	validation-rmse:5.57043                                                    
[11]	validation-rmse:5.50186                                                    
[12]	validation-rmse:5.44473





[0]	validation-rmse:5.65662                                                     
[1]	validation-rmse:5.43659                                                     
[2]	validation-rmse:5.42511                                                     
[3]	validation-rmse:5.40778                                                     
[4]	validation-rmse:5.40576                                                     
[5]	validation-rmse:5.40600                                                     
[6]	validation-rmse:5.39713                                                     
[7]	validation-rmse:5.40106                                                     
[8]	validation-rmse:5.39379                                                     
[9]	validation-rmse:5.39559                                                     
[10]	validation-rmse:5.39128                                                    
[11]	validation-rmse:5.38648                                                    
[12]	validation-rmse:5.38316





[0]	validation-rmse:8.59769                                                     
[1]	validation-rmse:7.99151                                                     
[2]	validation-rmse:7.48438                                                     
[3]	validation-rmse:7.06547                                                     
[4]	validation-rmse:6.72098                                                     
[5]	validation-rmse:6.44045                                                     
[6]	validation-rmse:6.20996                                                     
[7]	validation-rmse:6.02485                                                     
[8]	validation-rmse:5.87513                                                     
[9]	validation-rmse:5.75172                                                     
[10]	validation-rmse:5.65235                                                    
[11]	validation-rmse:5.57360                                                    
[12]	validation-rmse:5.50920





[0]	validation-rmse:5.31552                                                      
[1]	validation-rmse:5.26597                                                      
[2]	validation-rmse:5.26173                                                      
[3]	validation-rmse:5.25429                                                      
[4]	validation-rmse:5.25287                                                      
[5]	validation-rmse:5.25353                                                      
[6]	validation-rmse:5.24443                                                      
[7]	validation-rmse:5.24082                                                      
[8]	validation-rmse:5.23474                                                      
[9]	validation-rmse:5.22942                                                      
[10]	validation-rmse:5.23056                                                     
[11]	validation-rmse:5.22621                                                     
[12]	validation-





[0]	validation-rmse:7.85619                                                      
[1]	validation-rmse:6.86148                                                      
[2]	validation-rmse:6.21548                                                      
[3]	validation-rmse:5.81071                                                      
[4]	validation-rmse:5.55810                                                      
[5]	validation-rmse:5.40507                                                      
[6]	validation-rmse:5.31406                                                      
[7]	validation-rmse:5.25758                                                      
[8]	validation-rmse:5.22551                                                      
[9]	validation-rmse:5.20483                                                      
[10]	validation-rmse:5.19196                                                     
[11]	validation-rmse:5.18479                                                     
[12]	validation-





[0]	validation-rmse:6.58673                                                      
[1]	validation-rmse:5.75516                                                      
[2]	validation-rmse:5.52904                                                      
[3]	validation-rmse:5.44459                                                      
[4]	validation-rmse:5.39977                                                      
[5]	validation-rmse:5.37737                                                      
[6]	validation-rmse:5.36062                                                      
[7]	validation-rmse:5.34679                                                      
[8]	validation-rmse:5.34365                                                      
[9]	validation-rmse:5.33688                                                      
[10]	validation-rmse:5.33656                                                     
[11]	validation-rmse:5.33251                                                     
[12]	validation-





[0]	validation-rmse:8.86577                                                      
[1]	validation-rmse:8.45496                                                      
[2]	validation-rmse:8.08598                                                      
[3]	validation-rmse:7.75582                                                      
[4]	validation-rmse:7.46028                                                      
[5]	validation-rmse:7.19765                                                      
[6]	validation-rmse:6.96427                                                      
[7]	validation-rmse:6.75728                                                      
[8]	validation-rmse:6.57430                                                      
[9]	validation-rmse:6.41267                                                      
[10]	validation-rmse:6.27075                                                     
[11]	validation-rmse:6.14597                                                     
[12]	validation-





[0]	validation-rmse:8.74437                                                      
[1]	validation-rmse:8.24024                                                      
[2]	validation-rmse:7.80272                                                      
[3]	validation-rmse:7.42306                                                      
[4]	validation-rmse:7.09549                                                      
[5]	validation-rmse:6.81353                                                      
[6]	validation-rmse:6.57466                                                      
[7]	validation-rmse:6.36889                                                      
[8]	validation-rmse:6.19452                                                      
[9]	validation-rmse:6.04608                                                      
[10]	validation-rmse:5.92123                                                     
[11]	validation-rmse:5.81667                                                     
[12]	validation-





[0]	validation-rmse:8.84300                                                      
[1]	validation-rmse:8.41373                                                      
[2]	validation-rmse:8.03039                                                      
[3]	validation-rmse:7.68937                                                      
[4]	validation-rmse:7.38614                                                      
[5]	validation-rmse:7.11855                                                      
[6]	validation-rmse:6.88185                                                      
[7]	validation-rmse:6.67177                                                      
[8]	validation-rmse:6.48693                                                      
[9]	validation-rmse:6.32668                                                      
[10]	validation-rmse:6.18542                                                     
[11]	validation-rmse:6.06264                                                     
[12]	validation-





[0]	validation-rmse:8.79966                                                      
[1]	validation-rmse:8.33907                                                      
[2]	validation-rmse:7.93115                                                      
[3]	validation-rmse:7.57577                                                      
[4]	validation-rmse:7.26203                                                      
[5]	validation-rmse:6.99099                                                      
[6]	validation-rmse:6.75246                                                      
[7]	validation-rmse:6.54765                                                      
[8]	validation-rmse:6.36666                                                      
[9]	validation-rmse:6.21368                                                      
[10]	validation-rmse:6.08015                                                     
[11]	validation-rmse:5.96823                                                     
[12]	validation-




[0]	validation-rmse:9.01664                                                      
[1]	validation-rmse:8.73343                                                      
[2]	validation-rmse:8.47140                                                      
[3]	validation-rmse:8.22875                                                      
 48%|████▊     | 24/50 [18:42<31:16, 72.18s/trial, best loss: 5.1195708485333205]




[4]	validation-rmse:8.00384                                                      
[5]	validation-rmse:7.79741                                                      
[6]	validation-rmse:7.60642                                                      
[7]	validation-rmse:7.43052                                                      
[8]	validation-rmse:7.26872                                                      
[9]	validation-rmse:7.12030                                                      
[10]	validation-rmse:6.98414                                                     
[11]	validation-rmse:6.86025                                                     
[12]	validation-rmse:6.74625                                                     
[13]	validation-rmse:6.64184                                                     
[14]	validation-rmse:6.54608                                                     
[15]	validation-rmse:6.45951                                                     
[16]	validation-





[0]	validation-rmse:8.67576                                                      
[1]	validation-rmse:8.12305                                                      
[2]	validation-rmse:7.65321                                                      
[3]	validation-rmse:7.25675                                                      
[4]	validation-rmse:6.92334                                                      
[5]	validation-rmse:6.64433                                                      
[6]	validation-rmse:6.41275                                                      
[7]	validation-rmse:6.22010                                                      
[8]	validation-rmse:6.06133                                                      
[9]	validation-rmse:5.93038                                                      
[10]	validation-rmse:5.82255                                                     
[11]	validation-rmse:5.73420                                                     
[12]	validation-





[0]	validation-rmse:8.92920                                                      
[1]	validation-rmse:8.56914                                                      
[2]	validation-rmse:8.23995                                                      
[3]	validation-rmse:7.93935                                                      
[4]	validation-rmse:7.66533                                                      
[5]	validation-rmse:7.41617                                                      
[6]	validation-rmse:7.18995                                                      
[7]	validation-rmse:6.98521                                                      
[8]	validation-rmse:6.79965                                                      
[9]	validation-rmse:6.63266                                                      
[10]	validation-rmse:6.48169                                                     
[11]	validation-rmse:6.34575                                                     
[12]	validation-





[0]	validation-rmse:8.91330                                                     
[1]	validation-rmse:8.53968                                                     
[2]	validation-rmse:8.19918                                                     
[3]	validation-rmse:7.89014                                                     
[4]	validation-rmse:7.60924                                                     
[5]	validation-rmse:7.35578                                                     
[6]	validation-rmse:7.12525                                                     
[7]	validation-rmse:6.91898                                                     
[8]	validation-rmse:6.73512                                                     
[9]	validation-rmse:6.56658                                                     
[10]	validation-rmse:6.41606                                                    
[11]	validation-rmse:6.28361                                                    
[12]	validation-rmse:6.16198





[0]	validation-rmse:8.52603                                                     
[1]	validation-rmse:7.86819                                                     
[2]	validation-rmse:7.32932                                                     
[3]	validation-rmse:6.89083                                                     
[4]	validation-rmse:6.53712                                                     
[5]	validation-rmse:6.25510                                                     
[6]	validation-rmse:6.03024                                                     
[7]	validation-rmse:5.85052                                                     
[8]	validation-rmse:5.70788                                                     
[9]	validation-rmse:5.59658                                                     
[10]	validation-rmse:5.50893                                                    
[11]	validation-rmse:5.43891                                                    
[12]	validation-rmse:5.38564





[0]	validation-rmse:7.75230                                                     
[1]	validation-rmse:6.76138                                                     
[2]	validation-rmse:6.15903                                                     
[3]	validation-rmse:5.80143                                                     
[4]	validation-rmse:5.60113                                                     
[5]	validation-rmse:5.47439                                                     
[6]	validation-rmse:5.40863                                                     
[7]	validation-rmse:5.35213                                                     
[8]	validation-rmse:5.32395                                                     
[9]	validation-rmse:5.30421                                                     
[10]	validation-rmse:5.29425                                                    
[11]	validation-rmse:5.28741                                                    
[12]	validation-rmse:5.28149





[0]	validation-rmse:8.91050                                                     
[1]	validation-rmse:8.53367                                                     
[2]	validation-rmse:8.19038                                                     
[3]	validation-rmse:7.87786                                                     
[4]	validation-rmse:7.59428                                                     
[5]	validation-rmse:7.33723                                                     
[6]	validation-rmse:7.10420                                                     
[7]	validation-rmse:6.89488                                                     
[8]	validation-rmse:6.70591                                                     
[9]	validation-rmse:6.53661                                                     
[10]	validation-rmse:6.38467                                                    
[11]	validation-rmse:6.24871                                                    
[12]	validation-rmse:6.12782





[0]	validation-rmse:7.50564                                                     
[1]	validation-rmse:6.48604                                                     
[2]	validation-rmse:5.94479                                                     
[3]	validation-rmse:5.66763                                                     
[4]	validation-rmse:5.51855                                                     
[5]	validation-rmse:5.44353                                                     
[6]	validation-rmse:5.39705                                                     
[7]	validation-rmse:5.36625                                                     
[8]	validation-rmse:5.34543                                                     
[9]	validation-rmse:5.33152                                                     
[10]	validation-rmse:5.32071                                                    
[11]	validation-rmse:5.31515                                                    
[12]	validation-rmse:5.30975





[0]	validation-rmse:8.48846                                                     
[1]	validation-rmse:7.80575                                                     
[2]	validation-rmse:7.25425                                                     
[3]	validation-rmse:6.80833                                                     
[4]	validation-rmse:6.45556                                                     
[5]	validation-rmse:6.17023                                                     
[6]	validation-rmse:5.95473                                                     
[7]	validation-rmse:5.77716                                                     
[8]	validation-rmse:5.64382                                                     
[9]	validation-rmse:5.53962                                                     
[10]	validation-rmse:5.46192                                                    
[11]	validation-rmse:5.39955                                                    
[12]	validation-rmse:5.35221





[0]	validation-rmse:8.72920                                                     
[1]	validation-rmse:8.21357                                                     
[2]	validation-rmse:7.76903                                                     
[3]	validation-rmse:7.38738                                                     
[4]	validation-rmse:7.05782                                                     
[5]	validation-rmse:6.77417                                                     
[6]	validation-rmse:6.54350                                                     
[7]	validation-rmse:6.33404                                                     
[8]	validation-rmse:6.16658                                                     
[9]	validation-rmse:6.02328                                                     
[10]	validation-rmse:5.90945                                                    
[11]	validation-rmse:5.80529                                                    
[12]	validation-rmse:5.72245





[0]	validation-rmse:8.39474                                                     
[1]	validation-rmse:7.65197                                                     
[2]	validation-rmse:7.06419                                                     
[3]	validation-rmse:6.60546                                                     
[4]	validation-rmse:6.25230                                                     
[5]	validation-rmse:5.98272                                                     
[6]	validation-rmse:5.77767                                                     
[7]	validation-rmse:5.62173                                                     
[8]	validation-rmse:5.50525                                                     
[9]	validation-rmse:5.42021                                                     
[10]	validation-rmse:5.35750                                                    
[11]	validation-rmse:5.30959                                                    
[12]	validation-rmse:5.27540





[0]	validation-rmse:8.07757                                                     
[1]	validation-rmse:7.18214                                                     
[2]	validation-rmse:6.55379                                                     
[3]	validation-rmse:6.12221                                                     
[4]	validation-rmse:5.83074                                                     
[5]	validation-rmse:5.63433                                                     
[6]	validation-rmse:5.50543                                                     
[7]	validation-rmse:5.41465                                                     
[8]	validation-rmse:5.35354                                                     
[9]	validation-rmse:5.31073                                                     
[10]	validation-rmse:5.28224                                                    
[11]	validation-rmse:5.26016                                                    
[12]	validation-rmse:5.24542





[0]	validation-rmse:7.52138                                                     
[1]	validation-rmse:6.47786                                                     
[2]	validation-rmse:5.90778                                                     
[3]	validation-rmse:5.60405                                                     
[4]	validation-rmse:5.44433                                                     
[5]	validation-rmse:5.35499                                                     
[6]	validation-rmse:5.30612                                                     
[7]	validation-rmse:5.27257                                                     
[8]	validation-rmse:5.24961                                                     
[9]	validation-rmse:5.23790                                                     
[10]	validation-rmse:5.22880                                                    
[11]	validation-rmse:5.22248                                                    
[12]	validation-rmse:5.21510





[0]	validation-rmse:8.18763                                                     
[1]	validation-rmse:7.35443                                                     
[2]	validation-rmse:6.74183                                                     
[3]	validation-rmse:6.29911                                                     
[4]	validation-rmse:6.00068                                                     
[5]	validation-rmse:5.77103                                                     
[6]	validation-rmse:5.63138                                                     
[7]	validation-rmse:5.52625                                                     
[8]	validation-rmse:5.44694                                                     
[9]	validation-rmse:5.39674                                                     
[10]	validation-rmse:5.36538                                                    
[11]	validation-rmse:5.33252                                                    
[12]	validation-rmse:5.31792





[0]	validation-rmse:8.92655                                                     
[1]	validation-rmse:8.56721                                                     
[2]	validation-rmse:8.24119                                                     
[3]	validation-rmse:7.94632                                                     
[4]	validation-rmse:7.67257                                                     
[5]	validation-rmse:7.43129                                                     
[6]	validation-rmse:7.20584                                                     
[7]	validation-rmse:7.01138                                                     
[8]	validation-rmse:6.83073                                                     
[9]	validation-rmse:6.66976                                                     
[10]	validation-rmse:6.52499                                                    
[11]	validation-rmse:6.38849                                                    
[12]	validation-rmse:6.27526





[0]	validation-rmse:9.00821                                                     
[1]	validation-rmse:8.71627                                                     
[2]	validation-rmse:8.44511                                                     
[3]	validation-rmse:8.19359                                                     
[4]	validation-rmse:7.96054                                                     
[5]	validation-rmse:7.74483                                                     
[6]	validation-rmse:7.54555                                                     
[7]	validation-rmse:7.36155                                                     
[8]	validation-rmse:7.19196                                                     
[9]	validation-rmse:7.03588                                                     
[10]	validation-rmse:6.89224                                                    
[11]	validation-rmse:6.75995                                                    
[12]	validation-rmse:6.63863





[0]	validation-rmse:8.68796                                                      
[1]	validation-rmse:8.14334                                                      
[2]	validation-rmse:7.67873                                                      
[3]	validation-rmse:7.28369                                                      
[4]	validation-rmse:6.95140                                                      
[5]	validation-rmse:6.67047                                                      
[6]	validation-rmse:6.43446                                                      
[7]	validation-rmse:6.23953                                                      
[8]	validation-rmse:6.07607                                                      
[9]	validation-rmse:5.93763                                                      
[10]	validation-rmse:5.82442                                                     
[11]	validation-rmse:5.73009                                                     
[12]	validation-





[0]	validation-rmse:8.43686                                                      
[1]	validation-rmse:7.72313                                                      
[2]	validation-rmse:7.15339                                                      
[3]	validation-rmse:6.70448                                                      
[4]	validation-rmse:6.35227                                                      
[5]	validation-rmse:6.08033                                                      
[6]	validation-rmse:5.86953                                                      
[7]	validation-rmse:5.70795                                                      
[8]	validation-rmse:5.58456                                                      
[9]	validation-rmse:5.49079                                                      
[10]	validation-rmse:5.41837                                                     
[11]	validation-rmse:5.36308                                                     
[12]	validation-





[0]	validation-rmse:8.86151                                                     
[1]	validation-rmse:8.44939                                                     
[2]	validation-rmse:8.08155                                                     
[3]	validation-rmse:7.75408                                                     
[4]	validation-rmse:7.46367                                                     
[5]	validation-rmse:7.20659                                                     
[6]	validation-rmse:6.97991                                                     
[7]	validation-rmse:6.78030                                                     
[8]	validation-rmse:6.60479                                                     
[9]	validation-rmse:6.45089                                                     
[10]	validation-rmse:6.31668                                                    
[11]	validation-rmse:6.19958                                                    
[12]	validation-rmse:6.09781





[0]	validation-rmse:7.14954                                                      
[1]	validation-rmse:6.09000                                                      
[2]	validation-rmse:5.61185                                                      
[3]	validation-rmse:5.39583                                                      
[4]	validation-rmse:5.29955                                                      
[5]	validation-rmse:5.25011                                                      
[6]	validation-rmse:5.22177                                                      
[7]	validation-rmse:5.20144                                                      
[8]	validation-rmse:5.19628                                                      
[9]	validation-rmse:5.19039                                                      
[10]	validation-rmse:5.18948                                                     
[11]	validation-rmse:5.18466                                                     
[12]	validation-





[0]	validation-rmse:8.21957                                                     
[1]	validation-rmse:7.39154                                                     
[2]	validation-rmse:6.78290                                                     
[3]	validation-rmse:6.34345                                                     
[4]	validation-rmse:6.02959                                                     
[5]	validation-rmse:5.80815                                                     
[6]	validation-rmse:5.65335                                                     
[7]	validation-rmse:5.54545                                                     
[8]	validation-rmse:5.46537                                                     
[9]	validation-rmse:5.40931                                                     
[10]	validation-rmse:5.37094                                                    
[11]	validation-rmse:5.34138                                                    
[12]	validation-rmse:5.31986





[0]	validation-rmse:8.97623                                                     
[1]	validation-rmse:8.65719                                                     
[2]	validation-rmse:8.36391                                                     
[3]	validation-rmse:8.09466                                                     
[4]	validation-rmse:7.84715                                                     
[5]	validation-rmse:7.62017                                                     
[6]	validation-rmse:7.41307                                                     
[7]	validation-rmse:7.22323                                                     
[8]	validation-rmse:7.05043                                                     
[9]	validation-rmse:6.89337                                                     
[10]	validation-rmse:6.74982                                                    
[11]	validation-rmse:6.61951                                                    
[12]	validation-rmse:6.50022





[0]	validation-rmse:5.98126                                                     
[1]	validation-rmse:5.35313                                                     
[2]	validation-rmse:5.24703                                                     
[3]	validation-rmse:5.21328                                                     
[4]	validation-rmse:5.20533                                                     
[5]	validation-rmse:5.20176                                                     
[6]	validation-rmse:5.19784                                                     
[7]	validation-rmse:5.19582                                                     
[8]	validation-rmse:5.19548                                                     
[9]	validation-rmse:5.19289                                                     
[10]	validation-rmse:5.19077                                                    
[11]	validation-rmse:5.18922                                                    
[12]	validation-rmse:5.18801





[0]	validation-rmse:8.79048                                                     
[1]	validation-rmse:8.32201                                                     
[2]	validation-rmse:7.91135                                                     
[3]	validation-rmse:7.55263                                                     
[4]	validation-rmse:7.24061                                                     
[5]	validation-rmse:6.97024                                                     
[6]	validation-rmse:6.73585                                                     
[7]	validation-rmse:6.53469                                                     
[8]	validation-rmse:6.36149                                                     
[9]	validation-rmse:6.21273                                                     
[10]	validation-rmse:6.08564                                                    
[11]	validation-rmse:5.97763                                                    
[12]	validation-rmse:5.88546





[0]	validation-rmse:8.62016                                                     
[1]	validation-rmse:8.02645                                                     
[2]	validation-rmse:7.52760                                                     
[3]	validation-rmse:7.11007                                                     
[4]	validation-rmse:6.76464                                                     
[5]	validation-rmse:6.47974                                                     
[6]	validation-rmse:6.24565                                                     
[7]	validation-rmse:6.05600                                                     
[8]	validation-rmse:5.90170                                                     
[9]	validation-rmse:5.77533                                                     
[10]	validation-rmse:5.67318                                                    
[11]	validation-rmse:5.59177                                                    
[12]	validation-rmse:5.52449





[0]	validation-rmse:7.91303                                                     
[1]	validation-rmse:6.95978                                                     
[2]	validation-rmse:6.32727                                                     
[3]	validation-rmse:5.92380                                                     
[4]	validation-rmse:5.66991                                                     
[5]	validation-rmse:5.50473                                                     
[6]	validation-rmse:5.40704                                                     
[7]	validation-rmse:5.33940                                                     
[8]	validation-rmse:5.29346                                                     
[9]	validation-rmse:5.26351                                                     
[10]	validation-rmse:5.24350                                                    
[11]	validation-rmse:5.23107                                                    
[12]	validation-rmse:5.21479




In [21]:
mlflow.xgboost.autolog(disable=True)

In [22]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.06273447100027489,
        'max_depth': 52,
        'min_child_weight': 3.372889853990348,
        'objective': 'reg:linear',
        'reg_alpha': 0.06919062960886975,
        'reg_lambda': 0.024791137697948,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



[0]	validation-rmse:8.92920
[1]	validation-rmse:8.56914
[2]	validation-rmse:8.23995
[3]	validation-rmse:7.93935
[4]	validation-rmse:7.66533
[5]	validation-rmse:7.41617
[6]	validation-rmse:7.18995
[7]	validation-rmse:6.98521
[8]	validation-rmse:6.79965
[9]	validation-rmse:6.63266
[10]	validation-rmse:6.48169
[11]	validation-rmse:6.34575
[12]	validation-rmse:6.22383
[13]	validation-rmse:6.11404
[14]	validation-rmse:6.01495
[15]	validation-rmse:5.92690
[16]	validation-rmse:5.84817
[17]	validation-rmse:5.77689
[18]	validation-rmse:5.71396
[19]	validation-rmse:5.65699
[20]	validation-rmse:5.60614
[21]	validation-rmse:5.56119
[22]	validation-rmse:5.52026
[23]	validation-rmse:5.48388
[24]	validation-rmse:5.45155
[25]	validation-rmse:5.42243
[26]	validation-rmse:5.39671
[27]	validation-rmse:5.37452
[28]	validation-rmse:5.35457
[29]	validation-rmse:5.33566
[30]	validation-rmse:5.31984
[31]	validation-rmse:5.30507
[32]	validation-rmse:5.29177
[33]	validation-rmse:5.28022
[34]	validation-rmse:5.2



In [25]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2023-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2023-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        



KeyboardInterrupt: 

In [26]:

logged_model = 'runs:/920b8deac31a43cba7d748b5332733f7/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)


In [28]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 920b8deac31a43cba7d748b5332733f7