In [1]:
!python -V

Python 3.13.2


# Run MLFlow service with Sqlite backend

In [None]:
!mlflow ui --backend-store-uri sqlite:///mlflow.db

SyntaxError: invalid syntax (2109785657.py, line 1)

# Import and Setup MLFlow library

In [3]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("mlops-experiment")

<Experiment: artifact_location='/Users/sstamkulov/Documents/ml/jupyter_notebooks_archive/datatalks_ml_zoomcamp/module_2_mlflow/mlruns/1', creation_time=1748153971145, experiment_id='1', last_update_time=1748153971145, lifecycle_stage='active', name='mlops-experiment', tags={}>

# Train and Evaluate model

In [4]:
import pandas as pd

In [5]:
import pickle

In [6]:
import seaborn as sns
import matplotlib.pyplot as plt

In [15]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import root_mean_squared_error

In [None]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [27]:
train_file = '../data/green_tripdata_2025-01.parquet'
val_file = '../data/green_tripdata_2025-02.parquet'

df_train = read_dataframe(train_file)
df_val = read_dataframe(val_file)

In [10]:
len(df_train), len(df_val)

(46307, 44218)

In [11]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [12]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [13]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

6.098871741054109

In [17]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [None]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", train_file)
    mlflow.log_param("valid-data-path", val_file)

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [20]:
import xgboost as xgb

In [21]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [22]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [25]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [26]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:7.36228                           
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.42930                           
[2]	validation-rmse:5.96119                           
[3]	validation-rmse:5.72710                           
[4]	validation-rmse:5.60752                           
[5]	validation-rmse:5.54008                           
[6]	validation-rmse:5.51165                           
[7]	validation-rmse:5.49392                           
[8]	validation-rmse:5.48451                           
[9]	validation-rmse:5.47454                           
[10]	validation-rmse:5.46993                          
[11]	validation-rmse:5.46716                          
[12]	validation-rmse:5.46599                          
[13]	validation-rmse:5.46557                          
[14]	validation-rmse:5.46580                          
[15]	validation-rmse:5.46457                          
[16]	validation-rmse:5.46440                          
[17]	validation-rmse:5.46204                          
[18]	validation-rmse:5.46064                          
[19]	valid

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.67639                                                    
[2]	validation-rmse:7.16716                                                    
[3]	validation-rmse:6.76328                                                    
[4]	validation-rmse:6.44801                                                    
[5]	validation-rmse:6.20255                                                    
[6]	validation-rmse:6.01144                                                    
[7]	validation-rmse:5.86590                                                    
[8]	validation-rmse:5.75407                                                    
[9]	validation-rmse:5.66970                                                    
[10]	validation-rmse:5.60485                                                   
[11]	validation-rmse:5.55757                                                   
[12]	validation-rmse:5.52099                                                   
[13]	validation-rmse:5.49167            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.28902                                                    
[2]	validation-rmse:7.94321                                                    
[3]	validation-rmse:7.63595                                                    
[4]	validation-rmse:7.36260                                                    
[5]	validation-rmse:7.12000                                                    
[6]	validation-rmse:6.90360                                                    
[7]	validation-rmse:6.71418                                                    
[8]	validation-rmse:6.54728                                                    
[9]	validation-rmse:6.40048                                                    
[10]	validation-rmse:6.27141                                                   
[11]	validation-rmse:6.15754                                                   
[12]	validation-rmse:6.05845                                                   
[13]	validation-rmse:5.97110            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.37252                                                    
[1]	validation-rmse:5.70559                                                    
[2]	validation-rmse:5.55838                                                    
[3]	validation-rmse:5.52600                                                    
[4]	validation-rmse:5.50840                                                    
[5]	validation-rmse:5.49966                                                    
[6]	validation-rmse:5.49861                                                    
[7]	validation-rmse:5.48624                                                    
[8]	validation-rmse:5.48578                                                    
[9]	validation-rmse:5.48849                                                    
[10]	validation-rmse:5.48972                                                   
[11]	validation-rmse:5.49115                                                   
[12]	validation-rmse:5.48769            

  self.starting_round = model.num_boosted_rounds()



[20]	validation-rmse:5.60202                                                   
[21]	validation-rmse:5.59729                                                   
[22]	validation-rmse:5.59411                                                   
[23]	validation-rmse:5.59224                                                   
[24]	validation-rmse:5.58808                                                   
[25]	validation-rmse:5.58522                                                   
[26]	validation-rmse:5.58197                                                   
[27]	validation-rmse:5.58076                                                   
[28]	validation-rmse:5.57616                                                   
[29]	validation-rmse:5.57407                                                   
[30]	validation-rmse:5.57299                                                   
[31]	validation-rmse:5.57186                                                   
[32]	validation-rmse:5.57021            

  self.starting_round = model.num_boosted_rounds()



[18]	validation-rmse:6.07240                                                   
[19]	validation-rmse:6.02679                                                   
[20]	validation-rmse:5.98407                                                   
[21]	validation-rmse:5.94685                                                   
[22]	validation-rmse:5.91337                                                   
[23]	validation-rmse:5.88292                                                   
[24]	validation-rmse:5.85656                                                   
[25]	validation-rmse:5.83217                                                   
[26]	validation-rmse:5.81019                                                   
[27]	validation-rmse:5.79111                                                   
[28]	validation-rmse:5.77362                                                   
[29]	validation-rmse:5.75678                                                   
[30]	validation-rmse:5.74263            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.51040                                                    
[1]	validation-rmse:8.00049                                                    
[2]	validation-rmse:7.56602                                                    
[3]	validation-rmse:7.19788                                                    
[4]	validation-rmse:6.88693                                                    
[5]	validation-rmse:6.62707                                                    
[6]	validation-rmse:6.41023                                                    
[7]	validation-rmse:6.23063                                                    
[8]	validation-rmse:6.08071                                                    
[9]	validation-rmse:5.95837                                                    
[10]	validation-rmse:5.85768                                                   
[11]	validation-rmse:5.77442                                                   
[12]	validation-rmse:5.70634            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.36506                                                    
[1]	validation-rmse:7.76245                                                    
[2]	validation-rmse:7.27970                                                    
[3]	validation-rmse:6.89716                                                    
[4]	validation-rmse:6.57704                                                    
[5]	validation-rmse:6.33703                                                    
[6]	validation-rmse:6.14976                                                    
[7]	validation-rmse:5.99834                                                    
[8]	validation-rmse:5.87638                                                    
[9]	validation-rmse:5.79431                                                    
[10]	validation-rmse:5.72253                                                   
[11]	validation-rmse:5.66684                                                   
[12]	validation-rmse:5.61620            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:5.53419                                                    
[2]	validation-rmse:5.49220                                                    
[3]	validation-rmse:5.47699                                                    
[4]	validation-rmse:5.45293                                                    
[5]	validation-rmse:5.44768                                                    
[6]	validation-rmse:5.43145                                                    
[7]	validation-rmse:5.43038                                                    
[8]	validation-rmse:5.42585                                                    
[9]	validation-rmse:5.41997                                                    
[10]	validation-rmse:5.41666                                                   
[11]	validation-rmse:5.41537                                                   
[12]	validation-rmse:5.41366                                                   
[13]	validation-rmse:5.41407            

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:5.53513                                                    
[3]	validation-rmse:5.50493                                                    
[4]	validation-rmse:5.48994                                                    
[5]	validation-rmse:5.47539                                                    
[6]	validation-rmse:5.45924                                                    
[7]	validation-rmse:5.45466                                                    
[8]	validation-rmse:5.45244                                                    
[9]	validation-rmse:5.44940                                                    
[10]	validation-rmse:5.44979                                                   
[11]	validation-rmse:5.44690                                                   
[12]	validation-rmse:5.43413                                                   
[13]	validation-rmse:5.43405                                                   
[14]	validation-rmse:5.42522            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.58004                                                     
[1]	validation-rmse:8.12020                                                     
[2]	validation-rmse:7.72401                                                     
[3]	validation-rmse:7.37938                                                     
[4]	validation-rmse:7.08485                                                     
[5]	validation-rmse:6.83479                                                     
[6]	validation-rmse:6.62121                                                     
[7]	validation-rmse:6.44702                                                     
[8]	validation-rmse:6.29660                                                     
[9]	validation-rmse:6.17069                                                     
[10]	validation-rmse:6.05863                                                    
[11]	validation-rmse:5.96311                                                    
[12]	validation-rmse:5.88418

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.14119                                                     
[4]	validation-rmse:6.83696                                                     
[5]	validation-rmse:6.58636                                                     
[6]	validation-rmse:6.38090                                                     
[7]	validation-rmse:6.21296                                                     
[8]	validation-rmse:6.07490                                                     
[9]	validation-rmse:5.96373                                                     
[10]	validation-rmse:5.87405                                                    
[11]	validation-rmse:5.80128                                                    
[12]	validation-rmse:5.74183                                                    
[13]	validation-rmse:5.69392                                                    
[14]	validation-rmse:5.65516                                                    
[15]	validation-rmse:5.62366

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.72550                                                     
[1]	validation-rmse:8.37943                                                     
[2]	validation-rmse:8.06502                                                     
[3]	validation-rmse:7.77996                                                     
[4]	validation-rmse:7.52230                                                     
[5]	validation-rmse:7.29069                                                     
[6]	validation-rmse:7.08098                                                     
[7]	validation-rmse:6.89307                                                     
[8]	validation-rmse:6.72421                                                     
[9]	validation-rmse:6.57345                                                     
[10]	validation-rmse:6.43955                                                    
[11]	validation-rmse:6.31983                                                    
[12]	validation-rmse:6.21247

  self.starting_round = model.num_boosted_rounds()



[25]	validation-rmse:5.58976                                                    
[26]	validation-rmse:5.58731                                                    
[27]	validation-rmse:5.58540                                                    
[28]	validation-rmse:5.58024                                                    
[29]	validation-rmse:5.57880                                                    
[30]	validation-rmse:5.57766                                                    
[31]	validation-rmse:5.57625                                                    
[32]	validation-rmse:5.57475                                                    
[33]	validation-rmse:5.57329                                                    
[34]	validation-rmse:5.57211                                                    
[35]	validation-rmse:5.57132                                                    
[36]	validation-rmse:5.56983                                                    
[37]	validation-rmse:5.56893

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.18851                                                     
[2]	validation-rmse:7.81039                                                     
[3]	validation-rmse:7.47866                                                     
[4]	validation-rmse:7.19108                                                     
[5]	validation-rmse:6.93843                                                     
[6]	validation-rmse:6.72026                                                     
[7]	validation-rmse:6.53095                                                     
[8]	validation-rmse:6.36868                                                     
[9]	validation-rmse:6.22796                                                     
[10]	validation-rmse:6.10710                                                    
[11]	validation-rmse:6.00225                                                    
[12]	validation-rmse:5.91300                                                    
[13]	validation-rmse:5.83563

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:5.80981                                                     
[3]	validation-rmse:5.65299                                                     
[4]	validation-rmse:5.57633                                                     
[5]	validation-rmse:5.54210                                                     
[6]	validation-rmse:5.51854                                                     
[7]	validation-rmse:5.50122                                                     
[8]	validation-rmse:5.49011                                                     
[9]	validation-rmse:5.47269                                                     
[10]	validation-rmse:5.46557                                                    
[11]	validation-rmse:5.46370                                                    
[12]	validation-rmse:5.45197                                                    
[13]	validation-rmse:5.44970                                                    
[14]	validation-rmse:5.44786

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.98067                                                     
[4]	validation-rmse:7.75419                                                     
[5]	validation-rmse:7.54639                                                     
[6]	validation-rmse:7.35622                                                     
[7]	validation-rmse:7.18205                                                     
[8]	validation-rmse:7.02200                                                     
[9]	validation-rmse:6.87650                                                     
[10]	validation-rmse:6.74440                                                    
[11]	validation-rmse:6.62268                                                    
[12]	validation-rmse:6.51295                                                    
[13]	validation-rmse:6.41220                                                    
[14]	validation-rmse:6.32147                                                    
[15]	validation-rmse:6.23921

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:5.44328                                                     
[3]	validation-rmse:5.43107                                                     
[4]	validation-rmse:5.43142                                                     
[5]	validation-rmse:5.42441                                                     
[6]	validation-rmse:5.42123                                                     
[7]	validation-rmse:5.41649                                                     
[8]	validation-rmse:5.41069                                                     
[9]	validation-rmse:5.40968                                                     
[10]	validation-rmse:5.41153                                                    
[11]	validation-rmse:5.40851                                                    
[12]	validation-rmse:5.40386                                                    
[13]	validation-rmse:5.40126                                                    
[14]	validation-rmse:5.40205

  self.starting_round = model.num_boosted_rounds()



[7]	validation-rmse:6.39351                                                     
[8]	validation-rmse:6.24230                                                     
[9]	validation-rmse:6.11982                                                     
[10]	validation-rmse:6.01337                                                    
[11]	validation-rmse:5.92466                                                    
[12]	validation-rmse:5.85280                                                    
[13]	validation-rmse:5.79222                                                    
[14]	validation-rmse:5.74084                                                    
[15]	validation-rmse:5.69681                                                    
[16]	validation-rmse:5.66027                                                    
[17]	validation-rmse:5.62898                                                    
[18]	validation-rmse:5.60164                                                    
[19]	validation-rmse:5.57746

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:5.71547                                                      
[5]	validation-rmse:5.65536                                                      
[6]	validation-rmse:5.62172                                                      
[7]	validation-rmse:5.60219                                                      
[8]	validation-rmse:5.59020                                                      
[9]	validation-rmse:5.57360                                                      
[10]	validation-rmse:5.55877                                                     
[11]	validation-rmse:5.54049                                                     
[12]	validation-rmse:5.53009                                                     
[13]	validation-rmse:5.52363                                                     
[14]	validation-rmse:5.51932                                                     
[15]	validation-rmse:5.51723                                                     
[16]	validation-

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:7.80725                                                      
[5]	validation-rmse:7.60519                                                      
[6]	validation-rmse:7.42028                                                      
[7]	validation-rmse:7.24957                                                      
[8]	validation-rmse:7.09365                                                      
[9]	validation-rmse:6.94826                                                      
[10]	validation-rmse:6.81704                                                     
[11]	validation-rmse:6.69572                                                     
[12]	validation-rmse:6.58697                                                     
[13]	validation-rmse:6.48582                                                     
[14]	validation-rmse:6.39428                                                     
[15]	validation-rmse:6.31063                                                     
[16]	validation-

  self.starting_round = model.num_boosted_rounds()



[5]	validation-rmse:7.65255                                                      
[6]	validation-rmse:7.47001                                                      
[7]	validation-rmse:7.30134                                                      
[8]	validation-rmse:7.14525                                                      
[9]	validation-rmse:7.00247                                                      
[10]	validation-rmse:6.87065                                                     
[11]	validation-rmse:6.75119                                                     
[12]	validation-rmse:6.64012                                                     
[13]	validation-rmse:6.53770                                                     
[14]	validation-rmse:6.44415                                                     
[15]	validation-rmse:6.36028                                                     
[16]	validation-rmse:6.28143                                                     
[17]	validation-

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:6.93847                                                      
[3]	validation-rmse:6.53443                                                      
[4]	validation-rmse:6.23640                                                      
[5]	validation-rmse:6.01533                                                      
[6]	validation-rmse:5.85610                                                      
[7]	validation-rmse:5.73967                                                      
[8]	validation-rmse:5.65403                                                      
[9]	validation-rmse:5.58993                                                      
[10]	validation-rmse:5.54366                                                     
[11]	validation-rmse:5.50804                                                     
[12]	validation-rmse:5.48264                                                     
[13]	validation-rmse:5.46182                                                     
[14]	validation-

  self.starting_round = model.num_boosted_rounds()



[6]	validation-rmse:5.76735                                                      
[7]	validation-rmse:5.68117                                                      
[8]	validation-rmse:5.62060                                                      
[9]	validation-rmse:5.57860                                                      
[10]	validation-rmse:5.54696                                                     
[11]	validation-rmse:5.52006                                                     
[12]	validation-rmse:5.50221                                                     
[13]	validation-rmse:5.49034                                                     
[14]	validation-rmse:5.47657                                                     
[15]	validation-rmse:5.46992                                                     
[16]	validation-rmse:5.46650                                                     
[17]	validation-rmse:5.46034                                                     
[18]	validation-

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:7.62942                                                      
[3]	validation-rmse:7.27282                                                      
[4]	validation-rmse:6.96892                                                      
[5]	validation-rmse:6.71265                                                      
[6]	validation-rmse:6.49560                                                      
[7]	validation-rmse:6.31403                                                      
[8]	validation-rmse:6.16083                                                      
[9]	validation-rmse:6.03234                                                      
[10]	validation-rmse:5.92436                                                     
[11]	validation-rmse:5.83607                                                     
[12]	validation-rmse:5.75943                                                     
[13]	validation-rmse:5.69822                                                     
[14]	validation-

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:7.23877                                                      
[9]	validation-rmse:7.09947                                                      
[10]	validation-rmse:6.97281                                                     
[11]	validation-rmse:6.85411                                                     
[12]	validation-rmse:6.74614                                                     
[13]	validation-rmse:6.64692                                                     
[14]	validation-rmse:6.55427                                                     
[15]	validation-rmse:6.47140                                                     
[16]	validation-rmse:6.39455                                                     
[17]	validation-rmse:6.32164                                                     
[18]	validation-rmse:6.25747                                                     
[19]	validation-rmse:6.19819                                                     
[20]	validation-

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.39274                                                      
[2]	validation-rmse:8.08398                                                      
[3]	validation-rmse:7.80415                                                      
[4]	validation-rmse:7.55087                                                      
[5]	validation-rmse:7.32263                                                      
[6]	validation-rmse:7.11653                                                      
[7]	validation-rmse:6.93131                                                      
[8]	validation-rmse:6.76436                                                      
[9]	validation-rmse:6.61549                                                      
[10]	validation-rmse:6.48189                                                     
[11]	validation-rmse:6.36286                                                     
[12]	validation-rmse:6.25679                                                     
[13]	validation-

  self.starting_round = model.num_boosted_rounds()



[16]	validation-rmse:5.51373                                                     
[17]	validation-rmse:5.51121                                                     
[18]	validation-rmse:5.50764                                                     
[19]	validation-rmse:5.50402                                                     
[20]	validation-rmse:5.50295                                                     
[21]	validation-rmse:5.50074                                                     
[22]	validation-rmse:5.49831                                                     
[23]	validation-rmse:5.49679                                                     
[24]	validation-rmse:5.49395                                                     
[25]	validation-rmse:5.49297                                                     
[26]	validation-rmse:5.49184                                                     
[27]	validation-rmse:5.49011                                                     
[28]	validation-

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:5.57415                                                     
[9]	validation-rmse:5.54616                                                     
[10]	validation-rmse:5.52457                                                    
[11]	validation-rmse:5.51043                                                    
[12]	validation-rmse:5.49704                                                    
[13]	validation-rmse:5.49053                                                    
[14]	validation-rmse:5.48628                                                    
[15]	validation-rmse:5.47925                                                    
[16]	validation-rmse:5.47372                                                    
[17]	validation-rmse:5.47040                                                    
[18]	validation-rmse:5.46384                                                    
[19]	validation-rmse:5.46192                                                    
[20]	validation-rmse:5.45955

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:6.21490                                                     
[3]	validation-rmse:5.91398                                                     
[4]	validation-rmse:5.74311                                                     
[5]	validation-rmse:5.65003                                                     
[6]	validation-rmse:5.58003                                                     
[7]	validation-rmse:5.54540                                                     
[8]	validation-rmse:5.51842                                                     
[9]	validation-rmse:5.50355                                                     
[10]	validation-rmse:5.49347                                                    
[11]	validation-rmse:5.48562                                                    
[12]	validation-rmse:5.48148                                                    
[13]	validation-rmse:5.47943                                                    
[14]	validation-rmse:5.47946

  self.starting_round = model.num_boosted_rounds()



[23]	validation-rmse:5.59492
[24]	validation-rmse:5.59194                                                    
[25]	validation-rmse:5.58133                                                    
[26]	validation-rmse:5.57880                                                    
[27]	validation-rmse:5.57556                                                    
[28]	validation-rmse:5.57353                                                    
[29]	validation-rmse:5.56955                                                    
[30]	validation-rmse:5.56735                                                    
[31]	validation-rmse:5.56096                                                    
[32]	validation-rmse:5.55831                                                    
[33]	validation-rmse:5.55721                                                    
[34]	validation-rmse:5.55447                                                    
[35]	validation-rmse:5.55131                                                    

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.19376                                                     
[2]	validation-rmse:6.62574                                                     
[3]	validation-rmse:6.23068                                                     
[4]	validation-rmse:5.95744                                                     
[5]	validation-rmse:5.77254                                                     
[6]	validation-rmse:5.64341                                                     
[7]	validation-rmse:5.55999                                                     
[8]	validation-rmse:5.50226                                                     
[9]	validation-rmse:5.46288                                                     
[10]	validation-rmse:5.43299                                                    
[11]	validation-rmse:5.40902                                                    
[12]	validation-rmse:5.39275                                                    
[13]	validation-rmse:5.38189

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.44406                                                     
[1]	validation-rmse:6.49149                                                     
[2]	validation-rmse:5.97163                                                     
[3]	validation-rmse:5.68847                                                     
[4]	validation-rmse:5.54445                                                     
[5]	validation-rmse:5.46367                                                     
[6]	validation-rmse:5.41782                                                     
[7]	validation-rmse:5.39470                                                     
[8]	validation-rmse:5.38015                                                     
[9]	validation-rmse:5.36677                                                     
[10]	validation-rmse:5.36164                                                    
[11]	validation-rmse:5.35847                                                    
[12]	validation-rmse:5.35506

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.65546                                                     
[2]	validation-rmse:7.15528                                                     
[3]	validation-rmse:6.76537                                                     
[4]	validation-rmse:6.44880                                                     
[5]	validation-rmse:6.21361                                                     
[6]	validation-rmse:6.04046                                                     
[7]	validation-rmse:5.90018                                                     
[8]	validation-rmse:5.79591                                                     
[9]	validation-rmse:5.71815                                                     
[10]	validation-rmse:5.65449                                                    
[11]	validation-rmse:5.61028                                                    
[12]	validation-rmse:5.57993                                                    
[13]	validation-rmse:5.55028

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:5.71428                                                     
[9]	validation-rmse:5.65977                                                     
[10]	validation-rmse:5.62019                                                    
[11]	validation-rmse:5.58725                                                    
[12]	validation-rmse:5.56106                                                    
[13]	validation-rmse:5.54348                                                    
[14]	validation-rmse:5.52806                                                    
[15]	validation-rmse:5.51518                                                    
[16]	validation-rmse:5.50823                                                    
[17]	validation-rmse:5.49678                                                    
[18]	validation-rmse:5.49108                                                    
[19]	validation-rmse:5.48784                                                    
[20]	validation-rmse:5.48347

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.04379                                                     
[2]	validation-rmse:5.66782                                                     
[3]	validation-rmse:5.51231                                                     
[4]	validation-rmse:5.45202                                                     
[5]	validation-rmse:5.42130                                                     
[6]	validation-rmse:5.40403                                                     
[7]	validation-rmse:5.39414                                                     
[8]	validation-rmse:5.39039                                                     
[9]	validation-rmse:5.38365                                                     
[10]	validation-rmse:5.37612                                                    
[11]	validation-rmse:5.36844                                                    
[12]	validation-rmse:5.36643                                                    
[13]	validation-rmse:5.36551

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.91404                                                     
[2]	validation-rmse:7.46163                                                     
[3]	validation-rmse:7.09651                                                     
[4]	validation-rmse:6.78512                                                     
[5]	validation-rmse:6.54510                                                     
[6]	validation-rmse:6.33467                                                     
[7]	validation-rmse:6.17192                                                     
[8]	validation-rmse:6.04042                                                     
[9]	validation-rmse:5.93306                                                     
[10]	validation-rmse:5.84183                                                    
[11]	validation-rmse:5.77464                                                    
[12]	validation-rmse:5.71078                                                    
[13]	validation-rmse:5.66744

  self.starting_round = model.num_boosted_rounds()



[21]	validation-rmse:5.72690                                                    
[22]	validation-rmse:5.71983                                                    
[23]	validation-rmse:5.71520                                                    
[24]	validation-rmse:5.71082                                                    
[25]	validation-rmse:5.70332                                                    
[26]	validation-rmse:5.69986                                                    
[27]	validation-rmse:5.69482                                                    
[28]	validation-rmse:5.69275                                                    
[29]	validation-rmse:5.68985                                                    
[30]	validation-rmse:5.68891                                                    
[31]	validation-rmse:5.68855                                                    
[32]	validation-rmse:5.68802                                                    
[33]	validation-rmse:5.68676

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:5.54856                                                     
[4]	validation-rmse:5.52028                                                     
[5]	validation-rmse:5.50111                                                     
[6]	validation-rmse:5.47770                                                     
[7]	validation-rmse:5.47322                                                     
[8]	validation-rmse:5.46965                                                     
[9]	validation-rmse:5.46685                                                     
[10]	validation-rmse:5.45987                                                    
[11]	validation-rmse:5.45684                                                    
[12]	validation-rmse:5.44977                                                    
[13]	validation-rmse:5.44839                                                    
[14]	validation-rmse:5.44672                                                    
[15]	validation-rmse:5.44498

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:7.90088                                                     
[3]	validation-rmse:7.58671                                                     
[4]	validation-rmse:7.31026                                                     
[5]	validation-rmse:7.06652                                                     
[6]	validation-rmse:6.85415                                                     
[7]	validation-rmse:6.66566                                                     
[8]	validation-rmse:6.50055                                                     
[9]	validation-rmse:6.35719                                                     
[10]	validation-rmse:6.23299                                                    
[11]	validation-rmse:6.12321                                                    
[12]	validation-rmse:6.02830                                                    
[13]	validation-rmse:5.94600                                                    
[14]	validation-rmse:5.87574

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.61034                                                     
[1]	validation-rmse:6.67886                                                     
[2]	validation-rmse:6.12406                                                     
[3]	validation-rmse:5.80304                                                     
[4]	validation-rmse:5.61956                                                     
[5]	validation-rmse:5.51968                                                     
[6]	validation-rmse:5.46358                                                     
[7]	validation-rmse:5.43378                                                     
[8]	validation-rmse:5.41465                                                     
[9]	validation-rmse:5.40617                                                     
[10]	validation-rmse:5.39997                                                    
[11]	validation-rmse:5.39637                                                    
[12]	validation-rmse:5.39263

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.35747                                                     
[4]	validation-rmse:7.06114                                                     
[5]	validation-rmse:6.80843                                                     
[6]	validation-rmse:6.59252                                                     
[7]	validation-rmse:6.41002                                                     
[8]	validation-rmse:6.25602                                                     
[9]	validation-rmse:6.12551                                                     
[10]	validation-rmse:6.01505                                                    
[11]	validation-rmse:5.92254                                                    
[12]	validation-rmse:5.84395                                                    
[13]	validation-rmse:5.77699                                                    
[14]	validation-rmse:5.72154                                                    
[15]	validation-rmse:5.67442

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.59086                                                     
[2]	validation-rmse:7.06426                                                     
[3]	validation-rmse:6.65528                                                     
[4]	validation-rmse:6.33980                                                     
[5]	validation-rmse:6.10010                                                     
[6]	validation-rmse:5.91566                                                     
[7]	validation-rmse:5.77763                                                     
[8]	validation-rmse:5.67528                                                     
[9]	validation-rmse:5.59840                                                     
[10]	validation-rmse:5.53947                                                    
[11]	validation-rmse:5.49627                                                    
[12]	validation-rmse:5.46321                                                    
[13]	validation-rmse:5.43864

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:5.97264                                                     
[5]	validation-rmse:5.80438                                                     
[6]	validation-rmse:5.68798                                                     
[7]	validation-rmse:5.61349                                                     
[8]	validation-rmse:5.55798                                                     
[9]	validation-rmse:5.51952                                                     
[10]	validation-rmse:5.49461                                                    
[11]	validation-rmse:5.47247                                                    
[12]	validation-rmse:5.45861                                                    
[13]	validation-rmse:5.44588                                                    
[14]	validation-rmse:5.43812                                                    
[15]	validation-rmse:5.43255                                                    
[16]	validation-rmse:5.42774

  self.starting_round = model.num_boosted_rounds()



[19]	validation-rmse:5.91133                                                    
[20]	validation-rmse:5.87526                                                    
[21]	validation-rmse:5.84341                                                    
[22]	validation-rmse:5.81413                                                    
[23]	validation-rmse:5.78902                                                    
[24]	validation-rmse:5.76705                                                    
[25]	validation-rmse:5.74732                                                    
[26]	validation-rmse:5.72893                                                    
[27]	validation-rmse:5.71232                                                    
[28]	validation-rmse:5.69791                                                    
[29]	validation-rmse:5.68490                                                    
[30]	validation-rmse:5.67314                                                    
[31]	validation-rmse:5.66148

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.37574                                                     
[1]	validation-rmse:7.78502                                                     
[2]	validation-rmse:7.29274                                                     
[3]	validation-rmse:6.91350                                                     
[4]	validation-rmse:6.61477                                                     
[5]	validation-rmse:6.35674                                                     
[6]	validation-rmse:6.17019                                                     
[7]	validation-rmse:6.02058                                                     
[8]	validation-rmse:5.90506                                                     
[9]	validation-rmse:5.80713                                                     
[10]	validation-rmse:5.73976                                                    
[11]	validation-rmse:5.68475                                                    
[12]	validation-rmse:5.63312

  self.starting_round = model.num_boosted_rounds()



[17]	validation-rmse:5.46528                                                    
[18]	validation-rmse:5.46494                                                    
[19]	validation-rmse:5.45658                                                    
[20]	validation-rmse:5.45587                                                    
[21]	validation-rmse:5.45436                                                    
[22]	validation-rmse:5.45165                                                    
[23]	validation-rmse:5.44734                                                    
[24]	validation-rmse:5.44604                                                    
[25]	validation-rmse:5.44506                                                    
[26]	validation-rmse:5.44331                                                    
[27]	validation-rmse:5.44119                                                    
[28]	validation-rmse:5.44041                                                    
[29]	validation-rmse:5.43840

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.36629                                                     
[2]	validation-rmse:6.81865                                                     
[3]	validation-rmse:6.42193                                                     
[4]	validation-rmse:6.13731                                                     
[5]	validation-rmse:5.93441                                                     
[6]	validation-rmse:5.79336                                                     
[7]	validation-rmse:5.69625                                                     
[8]	validation-rmse:5.62531                                                     
[9]	validation-rmse:5.57595                                                     
[10]	validation-rmse:5.54167                                                    
[11]	validation-rmse:5.51563                                                    
[12]	validation-rmse:5.49770                                                    
[13]	validation-rmse:5.48271

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:5.75782                                                     
[4]	validation-rmse:5.63784                                                     
[5]	validation-rmse:5.57783                                                     
[6]	validation-rmse:5.52729                                                     
[7]	validation-rmse:5.50883                                                     
[8]	validation-rmse:5.49260                                                     
[9]	validation-rmse:5.48536                                                     
[10]	validation-rmse:5.48358                                                    
[11]	validation-rmse:5.48439                                                    
[12]	validation-rmse:5.48563                                                    
[13]	validation-rmse:5.48574                                                    
[14]	validation-rmse:5.48086                                                    
[15]	validation-rmse:5.47995

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:5.58324                                                     
[4]	validation-rmse:5.54042                                                     
[5]	validation-rmse:5.51767                                                     
[6]	validation-rmse:5.50187                                                     
[7]	validation-rmse:5.47968                                                     
[8]	validation-rmse:5.47584                                                     
[9]	validation-rmse:5.46958                                                     
[10]	validation-rmse:5.46551                                                    
[11]	validation-rmse:5.46196                                                    
[12]	validation-rmse:5.45890                                                    
[13]	validation-rmse:5.45689                                                    
[14]	validation-rmse:5.45510                                                    
[15]	validation-rmse:5.45343

In [28]:
mlflow.xgboost.autolog(disable=True)

In [30]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

[0]	validation-rmse:8.56745


  self.starting_round = model.num_boosted_rounds()


[1]	validation-rmse:8.10033
[2]	validation-rmse:7.69739
[3]	validation-rmse:7.35105
[4]	validation-rmse:7.05239
[5]	validation-rmse:6.80020
[6]	validation-rmse:6.58162
[7]	validation-rmse:6.39726
[8]	validation-rmse:6.24197
[9]	validation-rmse:6.11059
[10]	validation-rmse:6.00193
[11]	validation-rmse:5.90700
[12]	validation-rmse:5.82752
[13]	validation-rmse:5.76123
[14]	validation-rmse:5.70306
[15]	validation-rmse:5.65560
[16]	validation-rmse:5.61619
[17]	validation-rmse:5.58065
[18]	validation-rmse:5.55157
[19]	validation-rmse:5.52734
[20]	validation-rmse:5.50858
[21]	validation-rmse:5.48872
[22]	validation-rmse:5.47379
[23]	validation-rmse:5.45957
[24]	validation-rmse:5.44705
[25]	validation-rmse:5.43706
[26]	validation-rmse:5.42713
[27]	validation-rmse:5.41891
[28]	validation-rmse:5.41283
[29]	validation-rmse:5.40819
[30]	validation-rmse:5.40244
[31]	validation-rmse:5.39854
[32]	validation-rmse:5.39446
[33]	validation-rmse:5.39004
[34]	validation-rmse:5.38685
[35]	validation-rmse:5.

  xgb_model.save_model(model_data_path)


In [32]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        

TypeError: got an unexpected keyword argument 'squared'