In [1]:
!python -V

Python 3.12.7


# Run MLFlow service with Sqlite backend

In [None]:
!mlflow ui --backend-store-uri sqlite:///mlflow.db

SyntaxError: invalid syntax (2109785657.py, line 1)

# Import and Setup MLFlow library

In [2]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("mlops-experiment")

2025/05/25 11:52:31 INFO mlflow.tracking.fluent: Experiment with name 'mlops-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/ubuntu/jupyter_notebooks_archive/datatalks_mlops_zoomcamp/module_2_mlflow/mlruns/1', creation_time=1748173951772, experiment_id='1', last_update_time=1748173951772, lifecycle_stage='active', name='mlops-experiment', tags={}>

# Train and Evaluate model

In [3]:
import pandas as pd

In [4]:
import pickle

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import root_mean_squared_error

In [7]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [9]:
train_file = '../data/green_tripdata_2025-01.parquet'
val_file = '../data/green_tripdata_2025-02.parquet'

df_train = read_dataframe(train_file)
df_val = read_dataframe(val_file)

In [10]:
len(df_train), len(df_val)

(46307, 44218)

In [11]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [12]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [13]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [14]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

6.0988780535504485

In [16]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [17]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", train_file)
    mlflow.log_param("valid-data-path", val_file)

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [18]:
import xgboost as xgb

In [19]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [20]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [21]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [22]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|                                                                                                | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.71909                                                                                                                 
[1]	validation-rmse:8.36982                                                                                                                 
[2]	validation-rmse:8.05425                                                                                                                 
[3]	validation-rmse:7.77024                                                                                                                 
[4]	validation-rmse:7.51461                                                                                                                 
[5]	validation-rmse:7.28642                                                                                                                 
[6]	validation-rmse:7.07874                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.55599                                                                                                                 
[1]	validation-rmse:5.82795                                                                                                                 
[2]	validation-rmse:5.63709                                                                                                                 
[3]	validation-rmse:5.56119                                                                                                                 
[4]	validation-rmse:5.53140                                                                                                                 
[5]	validation-rmse:5.50929                                                                                                                 
[6]	validation-rmse:5.48364                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.21196                                                                                                                 
[1]	validation-rmse:7.53148                                                                                                                 
[2]	validation-rmse:7.01406                                                                                                                 
[3]	validation-rmse:6.61708                                                                                                                 
[4]	validation-rmse:6.33536                                                                                                                 
[5]	validation-rmse:6.12472                                                                                                                 
[6]	validation-rmse:5.96154                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.68787                                                                                                                 
[2]	validation-rmse:6.18511                                                                                                                 
[3]	validation-rmse:5.90993                                                                                                                 
[4]	validation-rmse:5.76081                                                                                                                 
[5]	validation-rmse:5.67100                                                                                                                 
[6]	validation-rmse:5.61329                                                                                                                 
[7]	validation-rmse:5.57876                                                                                                                 
[8]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:5.59918                                                                                                                 
[1]	validation-rmse:5.42649                                                                                                                 
[2]	validation-rmse:5.41361                                                                                                                 
[3]	validation-rmse:5.41078                                                                                                                 
[4]	validation-rmse:5.40193                                                                                                                 
[5]	validation-rmse:5.40277                                                                                                                 
[6]	validation-rmse:5.40282                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.59980                                                                                                                 
[1]	validation-rmse:8.15705                                                                                                                 
[2]	validation-rmse:7.77055                                                                                                                 
[3]	validation-rmse:7.43435                                                                                                                 
[4]	validation-rmse:7.14258                                                                                                                 
[5]	validation-rmse:6.89013                                                                                                                 
[6]	validation-rmse:6.67405                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.88740                                                                                                                 
[1]	validation-rmse:7.04723                                                                                                                 
[2]	validation-rmse:6.48645                                                                                                                 
[3]	validation-rmse:6.11851                                                                                                                 
[4]	validation-rmse:5.87876                                                                                                                 
[5]	validation-rmse:5.72786                                                                                                                 
[6]	validation-rmse:5.63167                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.34461                                                                                                                 
[1]	validation-rmse:7.72558                                                                                                                 
[2]	validation-rmse:7.22779                                                                                                                 
[3]	validation-rmse:6.83023                                                                                                                 
[4]	validation-rmse:6.51527                                                                                                                 
[5]	validation-rmse:6.26804                                                                                                                 
[6]	validation-rmse:6.07408                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.27794                                                                                                                 
[1]	validation-rmse:6.32485                                                                                                                 
[2]	validation-rmse:5.85489                                                                                                                 
[3]	validation-rmse:5.63268                                                                                                                 
[4]	validation-rmse:5.52053                                                                                                                 
[5]	validation-rmse:5.46354                                                                                                                 
[6]	validation-rmse:5.43318                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.43767                                                                                                                 
[1]	validation-rmse:7.88298                                                                                                                 
[2]	validation-rmse:7.42443                                                                                                                 
[3]	validation-rmse:7.05387                                                                                                                 
[4]	validation-rmse:6.74707                                                                                                                 
[5]	validation-rmse:6.49826                                                                                                                 
[6]	validation-rmse:6.30553                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.67537                                                                                                                 
[1]	validation-rmse:8.29117                                                                                                                 
[2]	validation-rmse:7.94954                                                                                                                 
[3]	validation-rmse:7.64712                                                                                                                 
[4]	validation-rmse:7.38002                                                                                                                 
[5]	validation-rmse:7.14445                                                                                                                 
[6]	validation-rmse:6.93735                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.77614                                                                                                                 
[1]	validation-rmse:8.47206                                                                                                                 
[2]	validation-rmse:8.19251                                                                                                                 
[3]	validation-rmse:7.93565                                                                                                                 
[4]	validation-rmse:7.69972                                                                                                                 
[5]	validation-rmse:7.48357                                                                                                                 
[6]	validation-rmse:7.28600                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.56498                                                                                                                 
[2]	validation-rmse:8.32259                                                                                                                 
[3]	validation-rmse:8.09764                                                                                                                 
[4]	validation-rmse:7.88899                                                                                                                 
[5]	validation-rmse:7.69586                                                                                                                 
[6]	validation-rmse:7.51734                                                                                                                 
[7]	validation-rmse:7.35216                                                                                                                 
[8]	validatio

  self.starting_round = model.num_boosted_rounds()



[13]	validation-rmse:5.54066                                                                                                                
[14]	validation-rmse:5.53587                                                                                                                
[15]	validation-rmse:5.53162                                                                                                                
[16]	validation-rmse:5.52776                                                                                                                
[17]	validation-rmse:5.52467                                                                                                                
[18]	validation-rmse:5.52067                                                                                                                
[19]	validation-rmse:5.51797                                                                                                                
[20]	validati

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:5.81353                                                                                                                 
[1]	validation-rmse:5.61523                                                                                                                 
[2]	validation-rmse:5.59351                                                                                                                 
[3]	validation-rmse:5.59209                                                                                                                 
[4]	validation-rmse:5.58091                                                                                                                 
[5]	validation-rmse:5.56465                                                                                                                 
[6]	validation-rmse:5.56725                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.84280                                                                                                                 
[2]	validation-rmse:7.37826                                                                                                                 
[3]	validation-rmse:7.00296                                                                                                                 
[4]	validation-rmse:6.70164                                                                                                                 
[5]	validation-rmse:6.46098                                                                                                                 
[6]	validation-rmse:6.27002                                                                                                                 
[7]	validation-rmse:6.11890                                                                                                                 
[8]	validatio

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.32153                                                                                                                 
[4]	validation-rmse:7.03095                                                                                                                 
[5]	validation-rmse:6.78659                                                                                                                 
[6]	validation-rmse:6.58279                                                                                                                 
[7]	validation-rmse:6.41357                                                                                                                 
[8]	validation-rmse:6.27186                                                                                                                 
[9]	validation-rmse:6.15464                                                                                                                 
[10]	validati

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.07845                                                                                                                 
[1]	validation-rmse:7.30934                                                                                                                 
[2]	validation-rmse:6.74625                                                                                                                 
[3]	validation-rmse:6.33791                                                                                                                 
[4]	validation-rmse:6.04932                                                                                                                 
[5]	validation-rmse:5.84801                                                                                                                 
[6]	validation-rmse:5.70484                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.83425                                                                                                                 
[1]	validation-rmse:6.98316                                                                                                                 
[2]	validation-rmse:6.43255                                                                                                                 
[3]	validation-rmse:6.08972                                                                                                                 
[4]	validation-rmse:5.86473                                                                                                                 
[5]	validation-rmse:5.72759                                                                                                                 
[6]	validation-rmse:5.64278                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.35260                                                                                                                 
[1]	validation-rmse:6.38632                                                                                                                 
[2]	validation-rmse:5.88236                                                                                                                 
[3]	validation-rmse:5.62979                                                                                                                 
[4]	validation-rmse:5.50344                                                                                                                 
[5]	validation-rmse:5.44345                                                                                                                 
[6]	validation-rmse:5.41666                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.57900                                                                                                                 
[1]	validation-rmse:8.11862                                                                                                                 
[2]	validation-rmse:7.71903                                                                                                                 
[3]	validation-rmse:7.37209                                                                                                                 
[4]	validation-rmse:7.07441                                                                                                                 
[5]	validation-rmse:6.81735                                                                                                                 
[6]	validation-rmse:6.59891                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.25271                                                                                                                 
[1]	validation-rmse:7.57965                                                                                                                 
[2]	validation-rmse:7.05630                                                                                                                 
[3]	validation-rmse:6.65426                                                                                                                 
[4]	validation-rmse:6.34799                                                                                                                 
[5]	validation-rmse:6.11645                                                                                                                 
[6]	validation-rmse:5.94250                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.83184
[4]	validation-rmse:7.58615                                                                                                                 
[5]	validation-rmse:7.36485                                                                                                                 
[6]	validation-rmse:7.16676                                                                                                                 
[7]	validation-rmse:6.98788                                                                                                                 
[8]	validation-rmse:6.82785                                                                                                                 
[9]	validation-rmse:6.68515                                                                                                                 
[10]	validation-rmse:6.55684                                                                                                  

  self.starting_round = model.num_boosted_rounds()



[11]	validation-rmse:6.55229                                                                                                                
[12]	validation-rmse:6.45284                                                                                                                
[13]	validation-rmse:6.36357                                                                                                                
[14]	validation-rmse:6.28394                                                                                                                
[15]	validation-rmse:6.21409                                                                                                                
[16]	validation-rmse:6.15114                                                                                                                
[17]	validation-rmse:6.09460                                                                                                                
[18]	validati

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:8.07888                                                                                                                 
[4]	validation-rmse:7.86852                                                                                                                 
[5]	validation-rmse:7.67461                                                                                                                 
[6]	validation-rmse:7.49460                                                                                                                 
[7]	validation-rmse:7.32892                                                                                                                 
[8]	validation-rmse:7.17583                                                                                                                 
[9]	validation-rmse:7.03565                                                                                                                 
[10]	validati

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.62271                                                                                                                 
[1]	validation-rmse:8.19507                                                                                                                 
[2]	validation-rmse:7.81878                                                                                                                 
[3]	validation-rmse:7.48761                                                                                                                 
[4]	validation-rmse:7.19803                                                                                                                 
[5]	validation-rmse:6.94573                                                                                                                 
[6]	validation-rmse:6.72730                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.50163                                                                                                                 
[1]	validation-rmse:7.98698                                                                                                                 
[2]	validation-rmse:7.54778                                                                                                                 
[3]	validation-rmse:7.17790                                                                                                                 
[4]	validation-rmse:6.86622                                                                                                                 
[5]	validation-rmse:6.60644                                                                                                                 
[6]	validation-rmse:6.39009                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.52419                                                                                                                 
[1]	validation-rmse:8.02413                                                                                                                 
[2]	validation-rmse:7.59631                                                                                                                 
[3]	validation-rmse:7.23134                                                                                                                 
[4]	validation-rmse:6.92344                                                                                                                 
[5]	validation-rmse:6.66268                                                                                                                 
[6]	validation-rmse:6.44424                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.09880                                                                                                                 
[1]	validation-rmse:7.35109                                                                                                                 
[2]	validation-rmse:6.80748                                                                                                                 
[3]	validation-rmse:6.41756                                                                                                                 
[4]	validation-rmse:6.14134                                                                                                                 
[5]	validation-rmse:5.94470                                                                                                                 
[6]	validation-rmse:5.80909                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.51482                                                                                                                 
[1]	validation-rmse:8.01742                                                                                                                 
[2]	validation-rmse:7.58611                                                                                                                 
[3]	validation-rmse:7.23208                                                                                                                 
[4]	validation-rmse:6.92269                                                                                                                 
[5]	validation-rmse:6.67802                                                                                                                 
[6]	validation-rmse:6.46712                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.71688                                                                                                                 
[1]	validation-rmse:8.36425                                                                                                                 
[2]	validation-rmse:8.04583                                                                                                                 
[3]	validation-rmse:7.75931                                                                                                                 
[4]	validation-rmse:7.50136                                                                                                                 
[5]	validation-rmse:7.27016                                                                                                                 
[6]	validation-rmse:7.06259                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.76404                                                                                                                 
[1]	validation-rmse:8.45029                                                                                                                 
[2]	validation-rmse:8.16327                                                                                                                 
[3]	validation-rmse:7.90129                                                                                                                 
[4]	validation-rmse:7.66235                                                                                                                 
[5]	validation-rmse:7.44503                                                                                                                 
[6]	validation-rmse:7.24716                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.66607                                                                                                                 
[1]	validation-rmse:8.27336                                                                                                                 
[2]	validation-rmse:7.92344                                                                                                                 
[3]	validation-rmse:7.61388                                                                                                                 
[4]	validation-rmse:7.34041                                                                                                                 
[5]	validation-rmse:7.09967                                                                                                                 
[6]	validation-rmse:6.88537                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:5.59009                                                                                                                 
[3]	validation-rmse:5.52646                                                                                                                 
[4]	validation-rmse:5.50033                                                                                                                 
[5]	validation-rmse:5.48222                                                                                                                 
[6]	validation-rmse:5.47458                                                                                                                 
[7]	validation-rmse:5.46791                                                                                                                 
[8]	validation-rmse:5.46362                                                                                                                 
[9]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.81640                                                                                                                 
[1]	validation-rmse:8.54658                                                                                                                 
[2]	validation-rmse:8.29528                                                                                                                 
[3]	validation-rmse:8.06182                                                                                                                 
[4]	validation-rmse:7.84493                                                                                                                 
[5]	validation-rmse:7.64409                                                                                                                 
[6]	validation-rmse:7.45791                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[16]	validation-rmse:5.66316                                                                                                                
[17]	validation-rmse:5.65782                                                                                                                
[18]	validation-rmse:5.65636                                                                                                                
[19]	validation-rmse:5.65527                                                                                                                
[20]	validation-rmse:5.65193                                                                                                                
[21]	validation-rmse:5.64844                                                                                                                
[22]	validation-rmse:5.64351                                                                                                                
[23]	validati

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:5.79877                                                                                                                 
[3]	validation-rmse:5.64164                                                                                                                 
[4]	validation-rmse:5.56777                                                                                                                 
[5]	validation-rmse:5.53412                                                                                                                 
[6]	validation-rmse:5.51452                                                                                                                 
[7]	validation-rmse:5.50117                                                                                                                 
[8]	validation-rmse:5.48907                                                                                                                 
[9]	validatio

  self.starting_round = model.num_boosted_rounds()



[16]	validation-rmse:5.66695                                                                                                                
[17]	validation-rmse:5.66056                                                                                                                
[18]	validation-rmse:5.65848                                                                                                                
[19]	validation-rmse:5.65591                                                                                                                
[20]	validation-rmse:5.65393                                                                                                                
[21]	validation-rmse:5.65124                                                                                                                
[22]	validation-rmse:5.64766                                                                                                                
[23]	validati

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:5.47850                                                                                                                 
[9]	validation-rmse:5.47087                                                                                                                 
[10]	validation-rmse:5.46445                                                                                                                
[11]	validation-rmse:5.45886                                                                                                                
[12]	validation-rmse:5.45282                                                                                                                
[13]	validation-rmse:5.44762                                                                                                                
[14]	validation-rmse:5.44162                                                                                                                
[15]	validati

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.75791                                                                                                                 
[2]	validation-rmse:6.22054                                                                                                                 
[3]	validation-rmse:5.90821                                                                                                                 
[4]	validation-rmse:5.72199                                                                                                                 
[5]	validation-rmse:5.61567                                                                                                                 
[6]	validation-rmse:5.54445                                                                                                                 
[7]	validation-rmse:5.50220                                                                                                                 
[8]	validatio

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:6.34150                                                                                                                 
[3]	validation-rmse:6.03979                                                                                                                 
[4]	validation-rmse:5.86426                                                                                                                 
[5]	validation-rmse:5.75898                                                                                                                 
[6]	validation-rmse:5.69381                                                                                                                 
[7]	validation-rmse:5.65296                                                                                                                 
[8]	validation-rmse:5.62707                                                                                                                 
[9]	validatio

  self.starting_round = model.num_boosted_rounds()



[15]	validation-rmse:5.65225
[16]	validation-rmse:5.64575                                                                                                                
[17]	validation-rmse:5.63874                                                                                                                
[18]	validation-rmse:5.63561                                                                                                                
[19]	validation-rmse:5.62950                                                                                                                
[20]	validation-rmse:5.62829                                                                                                                
[21]	validation-rmse:5.62336                                                                                                                
[22]	validation-rmse:5.62176                                                                                                 

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.06434                                                                                                                 
[1]	validation-rmse:6.10153                                                                                                                 
[2]	validation-rmse:5.68291                                                                                                                 
[3]	validation-rmse:5.50524                                                                                                                 
[4]	validation-rmse:5.43160                                                                                                                 
[5]	validation-rmse:5.40373                                                                                                                 
[6]	validation-rmse:5.39351                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.04532                                                                                                                 
[1]	validation-rmse:5.51304                                                                                                                 
[2]	validation-rmse:5.42835                                                                                                                 
[3]	validation-rmse:5.41027                                                                                                                 
[4]	validation-rmse:5.40035                                                                                                                 
[5]	validation-rmse:5.39375                                                                                                                 
[6]	validation-rmse:5.38434                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.73617                                                                                                                 
[1]	validation-rmse:5.92285                                                                                                                 
[2]	validation-rmse:5.66236                                                                                                                 
[3]	validation-rmse:5.56491                                                                                                                 
[4]	validation-rmse:5.52573                                                                                                                 
[5]	validation-rmse:5.50196                                                                                                                 
[6]	validation-rmse:5.48672                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.26277                                                                                                                 
[1]	validation-rmse:7.59940                                                                                                                 
[2]	validation-rmse:7.08511                                                                                                                 
[3]	validation-rmse:6.69102                                                                                                                 
[4]	validation-rmse:6.39133                                                                                                                 
[5]	validation-rmse:6.16582                                                                                                                 
[6]	validation-rmse:5.99378                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:5.47005                                                                                                                 
[2]	validation-rmse:5.45585                                                                                                                 
[3]	validation-rmse:5.44653                                                                                                                 
[4]	validation-rmse:5.43920                                                                                                                 
[5]	validation-rmse:5.43330                                                                                                                 
[6]	validation-rmse:5.42488                                                                                                                 
[7]	validation-rmse:5.41629                                                                                                                 
[8]	validatio

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.90250                                                                                                                 
[1]	validation-rmse:7.06758                                                                                                                 
[2]	validation-rmse:6.52096                                                                                                                 
[3]	validation-rmse:6.14485                                                                                                                 
[4]	validation-rmse:5.90939                                                                                                                 
[5]	validation-rmse:5.76120                                                                                                                 
[6]	validation-rmse:5.66180                                                                                                                 
[7]	validatio

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:5.52888                                                                                                                 
[9]	validation-rmse:5.52410                                                                                                                 
[10]	validation-rmse:5.52026                                                                                                                
[11]	validation-rmse:5.51575                                                                                                                
[12]	validation-rmse:5.51186                                                                                                                
[13]	validation-rmse:5.50816                                                                                                                
[14]	validation-rmse:5.50264                                                                                                                
[15]	validati

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.04894                                                                                                                 
[1]	validation-rmse:5.60835                                                                                                                 
[2]	validation-rmse:5.51883                                                                                                                 
[3]	validation-rmse:5.49896                                                                                                                 
[4]	validation-rmse:5.49686                                                                                                                 
[5]	validation-rmse:5.48705                                                                                                                 
[6]	validation-rmse:5.48400                                                                                                                 
[7]	validatio

In [24]:
mlflow.xgboost.autolog(disable=True)

In [25]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

  self.starting_round = model.num_boosted_rounds()


[0]	validation-rmse:8.56745
[1]	validation-rmse:8.10033
[2]	validation-rmse:7.69739
[3]	validation-rmse:7.35105
[4]	validation-rmse:7.05239
[5]	validation-rmse:6.80020
[6]	validation-rmse:6.58162
[7]	validation-rmse:6.39726
[8]	validation-rmse:6.24197
[9]	validation-rmse:6.11059
[10]	validation-rmse:6.00193
[11]	validation-rmse:5.90700
[12]	validation-rmse:5.82752
[13]	validation-rmse:5.76123
[14]	validation-rmse:5.70306
[15]	validation-rmse:5.65560
[16]	validation-rmse:5.61619
[17]	validation-rmse:5.58065
[18]	validation-rmse:5.55157
[19]	validation-rmse:5.52734
[20]	validation-rmse:5.50858
[21]	validation-rmse:5.48872
[22]	validation-rmse:5.47379
[23]	validation-rmse:5.45957
[24]	validation-rmse:5.44705
[25]	validation-rmse:5.43706
[26]	validation-rmse:5.42713
[27]	validation-rmse:5.41891
[28]	validation-rmse:5.41283
[29]	validation-rmse:5.40819
[30]	validation-rmse:5.40244
[31]	validation-rmse:5.39854
[32]	validation-rmse:5.39446
[33]	validation-rmse:5.39004
[34]	validation-rmse:5.3

  xgb_model.save_model(model_data_path)


In [27]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)
        

