# Hyperopt and MLFlow implementation

## 1. Simple MLFlow implementation

In [10]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import root_mean_squared_error

In [2]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

2025/05/08 11:33:53 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/Users/gabi/codes/ml_ops/02_experiment_tracking/mlflow-examples/mlruns/2', creation_time=1746696833594, experiment_id='2', last_update_time=1746696833594, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [3]:
# read the data and treat it
# change compression if needed

def read_dataframe(filename, compression='gzip'):
    df = pd.read_csv(filename, compression=compression)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)

    return df

In [4]:
df_train = read_dataframe('./data/green_tripdata_2021-01.csv.gz')
df_val = read_dataframe('./data/green_tripdata_2021-02.csv.gz')

  df = pd.read_csv(filename, compression=compression)


In [5]:
len(df_train), len(df_val)

(73908, 61921)

In [6]:
# We will use feature engineering to create a new feature called PU_DO 
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [7]:
# set the features we will use
categorical = ['PU_DO'] 
numerical = ['trip_distance']

# We will use the DictVectorizer to convert categorical variables into a format that can be used by the model
dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [None]:
# set the target variable

target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [11]:
# We will use the LinearRegression model to train the data
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

7.758715210533294

In [13]:
# save the model
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [15]:
# save the model with mlflow
with mlflow.start_run():

    mlflow.set_tag("developer", "gabi")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

## 2.0 Using xgb and Hyperopt

In [16]:
import xgboost as xgb

In [17]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [18]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [19]:
# first we need to define the objective function
def objective(params):
    # the parameters are passed both to mlflow and to xgboost
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)

        # configure the parameters of the model
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )

        # once the model is trained, we can save it and make predictions
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)

        mlflow.log_metric("rmse", rmse)
        
    # we return the loss, which is the value we want to minimize
    # and the status of the optimization
    return {'loss': rmse, 'status': STATUS_OK}

In [20]:
# define the search space

# note: when we use hp.loguniform, we are exploring the range exponention(low value, high value)

search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),  # here we need to convert to int, as this value only takes int
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

# we can use the fmin function to minimize the objective function
best_result = fmin(
    fn=objective, # the function we defined above
    space=search_space, # the search space we defined above
    algo=tpe.suggest, # the algorithm we will use to search the space
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.67987                          
[1]	validation-rmse:11.19362                          
[2]	validation-rmse:10.74946                          
[3]	validation-rmse:10.34531                          
[4]	validation-rmse:9.97723                           
[5]	validation-rmse:9.64195                           
[6]	validation-rmse:9.34106                           
[7]	validation-rmse:9.06745                           
[8]	validation-rmse:8.81763                           
[9]	validation-rmse:8.59417                           
[10]	validation-rmse:8.39069                          
[11]	validation-rmse:8.20894                          
[12]	validation-rmse:8.04273                          
[13]	validation-rmse:7.89479                          
[14]	validation-rmse:7.76631                          
[15]	validation-rmse:7.64668                          
[16]	validation-rmse:7.53888                          
[17]	validation-rmse:7.44458                          
[18]	valid

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.08251                                                      
[1]	validation-rmse:7.61887                                                      
[2]	validation-rmse:6.98531                                                      
[3]	validation-rmse:6.71374                                                      
[4]	validation-rmse:6.58413                                                      
[5]	validation-rmse:6.51805                                                      
[6]	validation-rmse:6.48089                                                      
[7]	validation-rmse:6.46163                                                      
[8]	validation-rmse:6.44859                                                      
[9]	validation-rmse:6.43788                                                      
[10]	validation-rmse:6.43118                                                     
[11]	validation-rmse:6.42704                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.12811                                                    
[1]	validation-rmse:7.72188                                                    
[2]	validation-rmse:7.12558                                                    
[3]	validation-rmse:6.87123                                                    
[4]	validation-rmse:6.75176                                                    
[5]	validation-rmse:6.69132                                                    
[6]	validation-rmse:6.65606                                                    
[7]	validation-rmse:6.63473                                                    
[8]	validation-rmse:6.61820                                                    
[9]	validation-rmse:6.60748                                                    
[10]	validation-rmse:6.59622                                                   
[11]	validation-rmse:6.59251                                                   
[12]	validation-rmse:6.58734            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.32202                                                   
[1]	validation-rmse:9.03115                                                    
[2]	validation-rmse:8.17487                                                    
[3]	validation-rmse:7.61653                                                    
[4]	validation-rmse:7.25275                                                    
[5]	validation-rmse:7.01557                                                    
[6]	validation-rmse:6.86913                                                    
[7]	validation-rmse:6.76621                                                    
[8]	validation-rmse:6.70361                                                    
[9]	validation-rmse:6.65650                                                    
[10]	validation-rmse:6.61898                                                   
[11]	validation-rmse:6.59735                                                   
[12]	validation-rmse:6.57886            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.04569                                                   
[1]	validation-rmse:8.66197                                                    
[2]	validation-rmse:7.80675                                                    
[3]	validation-rmse:7.29402                                                    
[4]	validation-rmse:6.98695                                                    
[5]	validation-rmse:6.80524                                                    
[6]	validation-rmse:6.69061                                                    
[7]	validation-rmse:6.61871                                                    
[8]	validation-rmse:6.57079                                                    
[9]	validation-rmse:6.53870                                                    
[10]	validation-rmse:6.51784                                                   
[11]	validation-rmse:6.50152                                                   
[12]	validation-rmse:6.48860            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.56758                                                    
[1]	validation-rmse:9.38020                                                     
[2]	validation-rmse:8.53865                                                     
[3]	validation-rmse:7.93205                                                     
[4]	validation-rmse:7.52099                                                     
[5]	validation-rmse:7.22951                                                     
[6]	validation-rmse:7.02353                                                     
[7]	validation-rmse:6.88822                                                     
[8]	validation-rmse:6.79837                                                     
[9]	validation-rmse:6.72556                                                     
[10]	validation-rmse:6.67690                                                    
[11]	validation-rmse:6.63952                                                    
[12]	validation-rmse:6.60894

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.13369                                                    
[1]	validation-rmse:10.24764                                                    
[2]	validation-rmse:9.52523                                                     
[3]	validation-rmse:8.94146                                                     
[4]	validation-rmse:8.47387                                                     
[5]	validation-rmse:8.10120                                                     
[6]	validation-rmse:7.80562                                                     
[7]	validation-rmse:7.57209                                                     
[8]	validation-rmse:7.38849                                                     
[9]	validation-rmse:7.24339                                                     
[10]	validation-rmse:7.12938                                                    
[11]	validation-rmse:7.03984                                                    
[12]	validation-rmse:6.96614

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:7.29033                                                     
[5]	validation-rmse:7.09517                                                     
[6]	validation-rmse:6.97317                                                     
[7]	validation-rmse:6.89446                                                     
[8]	validation-rmse:6.84542                                                     
[9]	validation-rmse:6.81180                                                     
[10]	validation-rmse:6.78793                                                    
[11]	validation-rmse:6.76984                                                    
[12]	validation-rmse:6.75605                                                    
[13]	validation-rmse:6.74873                                                    
[14]	validation-rmse:6.74007                                                    
[15]	validation-rmse:6.73531                                                    
[16]	validation-rmse:6.73042

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.31649                                                    
[1]	validation-rmse:9.02650                                                     
[2]	validation-rmse:8.18290                                                     
[3]	validation-rmse:7.62687                                                     
[4]	validation-rmse:7.27865                                                     
[5]	validation-rmse:7.05215                                                     
[6]	validation-rmse:6.90819                                                     
[7]	validation-rmse:6.80579                                                     
[8]	validation-rmse:6.74054                                                     
[9]	validation-rmse:6.69416                                                     
[10]	validation-rmse:6.65929                                                    
[11]	validation-rmse:6.63724                                                    
[12]	validation-rmse:6.61918

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.10920                                                     
[1]	validation-rmse:6.60549                                                     
[2]	validation-rmse:6.52727                                                     
[3]	validation-rmse:6.51464                                                     
[4]	validation-rmse:6.50698                                                     
[5]	validation-rmse:6.49821                                                     
[6]	validation-rmse:6.48883                                                     
[7]	validation-rmse:6.48065                                                     
[8]	validation-rmse:6.47343                                                     
[9]	validation-rmse:6.46687                                                     
[10]	validation-rmse:6.46475                                                    
[11]	validation-rmse:6.45626                                                    
[12]	validation-rmse:6.45387

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.89613                                                      
[1]	validation-rmse:6.70503                                                      
[2]	validation-rmse:6.67226                                                      
[3]	validation-rmse:6.66162                                                      
[4]	validation-rmse:6.65109                                                      
[5]	validation-rmse:6.64586                                                      
[6]	validation-rmse:6.63969                                                      
[7]	validation-rmse:6.63876                                                      
[8]	validation-rmse:6.62918                                                      
[9]	validation-rmse:6.62776                                                      
[10]	validation-rmse:6.62276                                                     
[11]	validation-rmse:6.61813                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.05085                                                      
[1]	validation-rmse:6.95272                                                      
[2]	validation-rmse:6.66782                                                      
[3]	validation-rmse:6.57967                                                      
[4]	validation-rmse:6.54138                                                      
[5]	validation-rmse:6.52355                                                      
[6]	validation-rmse:6.51573                                                      
[7]	validation-rmse:6.50795                                                      
[8]	validation-rmse:6.49925                                                      
[9]	validation-rmse:6.49483                                                      
[10]	validation-rmse:6.49168                                                     
[11]	validation-rmse:6.48678                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.26447                                                      
[1]	validation-rmse:7.78678                                                      
[2]	validation-rmse:7.09572                                                      
[3]	validation-rmse:6.77690                                                      
[4]	validation-rmse:6.62535                                                      
[5]	validation-rmse:6.54047                                                      
[6]	validation-rmse:6.49733                                                      
[7]	validation-rmse:6.47123                                                      
[8]	validation-rmse:6.45485                                                      
[9]	validation-rmse:6.44109                                                      
[10]	validation-rmse:6.43201                                                     
[11]	validation-rmse:6.42461                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.42183                                                     
[1]	validation-rmse:9.15635                                                      
[2]	validation-rmse:8.28340                                                      
[3]	validation-rmse:7.69423                                                      
[4]	validation-rmse:7.29912                                                      
[5]	validation-rmse:7.03793                                                      
[6]	validation-rmse:6.86524                                                      
[7]	validation-rmse:6.74833                                                      
[8]	validation-rmse:6.66866                                                      
[9]	validation-rmse:6.60943                                                      
[10]	validation-rmse:6.56839                                                     
[11]	validation-rmse:6.53834                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.76336                                                     
[1]	validation-rmse:9.65271                                                      
[2]	validation-rmse:8.81792                                                      
[3]	validation-rmse:8.19456                                                      
[4]	validation-rmse:7.73628                                                      
[5]	validation-rmse:7.40312                                                      
[6]	validation-rmse:7.15777                                                      
[7]	validation-rmse:6.97875                                                      
[8]	validation-rmse:6.84909                                                      
[9]	validation-rmse:6.75391                                                      
[10]	validation-rmse:6.68503                                                     
[11]	validation-rmse:6.63193                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.78210                                                     
[1]	validation-rmse:6.58809                                                     
[2]	validation-rmse:6.56598                                                     
[3]	validation-rmse:6.55441                                                     
[4]	validation-rmse:6.54518                                                     
[5]	validation-rmse:6.53223                                                     
[6]	validation-rmse:6.52780                                                     
[7]	validation-rmse:6.51814                                                     
[8]	validation-rmse:6.50903                                                     
[9]	validation-rmse:6.50081                                                     
[10]	validation-rmse:6.49388                                                    
[11]	validation-rmse:6.48841                                                    
[12]	validation-rmse:6.48178

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.70305                                                     
[1]	validation-rmse:8.28504                                                     
[2]	validation-rmse:7.51223                                                     
[3]	validation-rmse:7.09756                                                     
[4]	validation-rmse:6.88245                                                     
[5]	validation-rmse:6.75764                                                     
[6]	validation-rmse:6.67894                                                     
[7]	validation-rmse:6.63428                                                     
[8]	validation-rmse:6.60778                                                     
[9]	validation-rmse:6.58917                                                     
[10]	validation-rmse:6.57625                                                    
[11]	validation-rmse:6.56869                                                    
[12]	validation-rmse:6.56104

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.92641                                                     
[1]	validation-rmse:7.53217                                                     
[2]	validation-rmse:6.98220                                                     
[3]	validation-rmse:6.75492                                                     
[4]	validation-rmse:6.65617                                                     
[5]	validation-rmse:6.59422                                                     
[6]	validation-rmse:6.57011                                                     
[7]	validation-rmse:6.55373                                                     
[8]	validation-rmse:6.54421                                                     
[9]	validation-rmse:6.53707                                                     
[10]	validation-rmse:6.53312                                                    
[11]	validation-rmse:6.52823                                                    
[12]	validation-rmse:6.52159

  self.starting_round = model.num_boosted_rounds()



[5]	validation-rmse:6.73104                                                     
[6]	validation-rmse:6.72143                                                     
[7]	validation-rmse:6.71443                                                     
[8]	validation-rmse:6.70956                                                     
[9]	validation-rmse:6.70583                                                     
[10]	validation-rmse:6.70402                                                    
[11]	validation-rmse:6.69945                                                    
[12]	validation-rmse:6.69747                                                    
[13]	validation-rmse:6.69512                                                    
[14]	validation-rmse:6.69246                                                    
[15]	validation-rmse:6.68752                                                    
[16]	validation-rmse:6.68292                                                    
[17]	validation-rmse:6.67947

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.21391                                                     
[1]	validation-rmse:6.65772                                                     
[2]	validation-rmse:6.56193                                                     
[3]	validation-rmse:6.53670                                                     
[4]	validation-rmse:6.53173                                                     
[5]	validation-rmse:6.52081                                                     
[6]	validation-rmse:6.50970                                                     
[7]	validation-rmse:6.50251                                                     
[8]	validation-rmse:6.49766                                                     
[9]	validation-rmse:6.49155                                                     
[10]	validation-rmse:6.48143                                                    
[11]	validation-rmse:6.47348                                                    
[12]	validation-rmse:6.46736

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.20952                                                    
[1]	validation-rmse:10.37092                                                    
[2]	validation-rmse:9.67530                                                     
[3]	validation-rmse:9.10089                                                     
[4]	validation-rmse:8.63035                                                     
[5]	validation-rmse:8.24648                                                     
[6]	validation-rmse:7.93562                                                     
[7]	validation-rmse:7.68433                                                     
[8]	validation-rmse:7.48083                                                     
[9]	validation-rmse:7.31717                                                     
[10]	validation-rmse:7.18482                                                    
[11]	validation-rmse:7.08018                                                    
[12]	validation-rmse:6.99519

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.08769                                                    
[1]	validation-rmse:10.16528                                                    
[2]	validation-rmse:9.41498                                                     
[3]	validation-rmse:8.80904                                                     
[4]	validation-rmse:8.32525                                                     
[5]	validation-rmse:7.93724                                                     
[6]	validation-rmse:7.62957                                                     
[7]	validation-rmse:7.38891                                                     
[8]	validation-rmse:7.19985                                                     
[9]	validation-rmse:7.04958                                                     
[10]	validation-rmse:6.93319                                                    
[11]	validation-rmse:6.83804                                                    
[12]	validation-rmse:6.76317

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.62577                                                    
[1]	validation-rmse:11.09274                                                    
[2]	validation-rmse:10.61008                                                    
[3]	validation-rmse:10.17307                                                    
[4]	validation-rmse:9.77900                                                     
[5]	validation-rmse:9.42421                                                     
[6]	validation-rmse:9.10523                                                     
[7]	validation-rmse:8.81803                                                     
[8]	validation-rmse:8.56197                                                     
[9]	validation-rmse:8.33236                                                     
[10]	validation-rmse:8.12623                                                    
[11]	validation-rmse:7.94409                                                    
[12]	validation-rmse:7.78176

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.78942                                                    
[1]	validation-rmse:11.39460                                                    
[2]	validation-rmse:11.02699                                                    
[3]	validation-rmse:10.68524                                                    
[4]	validation-rmse:10.36770                                                    
[5]	validation-rmse:10.07299                                                    
[6]	validation-rmse:9.80004                                                     
[7]	validation-rmse:9.54714                                                     
[8]	validation-rmse:9.31335                                                     
[9]	validation-rmse:9.09752                                                     
[10]	validation-rmse:8.89795                                                    
[11]	validation-rmse:8.71386                                                    
[12]	validation-rmse:8.54388

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.57626                                                    
[1]	validation-rmse:11.00298                                                    
[2]	validation-rmse:10.48807                                                    
[3]	validation-rmse:10.02761                                                    
[4]	validation-rmse:9.61567                                                     
[5]	validation-rmse:9.24848                                                     
[6]	validation-rmse:8.92038                                                     
[7]	validation-rmse:8.62999                                                     
[8]	validation-rmse:8.37178                                                     
[9]	validation-rmse:8.14539                                                     
[10]	validation-rmse:7.94464                                                    
[11]	validation-rmse:7.76718                                                    
[12]	validation-rmse:7.61152

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:9.41937                                                     
[5]	validation-rmse:9.05129                                                     
[6]	validation-rmse:8.73027                                                     
[7]	validation-rmse:8.45151                                                     
[8]	validation-rmse:8.21063                                                     
[9]	validation-rmse:8.00268                                                     
[10]	validation-rmse:7.82420                                                    
[11]	validation-rmse:7.66901                                                    
[12]	validation-rmse:7.53621                                                    
[13]	validation-rmse:7.42183                                                    
[14]	validation-rmse:7.32379                                                    
[15]	validation-rmse:7.23955                                                    
[16]	validation-rmse:7.16690

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.32389                                                     
[1]	validation-rmse:7.12363                                                     
[2]	validation-rmse:6.77811                                                     
[3]	validation-rmse:6.66017                                                     
[4]	validation-rmse:6.61046                                                     
[5]	validation-rmse:6.59150                                                     
[6]	validation-rmse:6.58185                                                     
[7]	validation-rmse:6.57888                                                     
[8]	validation-rmse:6.57312                                                     
[9]	validation-rmse:6.56562                                                     
[10]	validation-rmse:6.56165                                                    
[11]	validation-rmse:6.55814                                                    
[12]	validation-rmse:6.55203

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.78512                                                    
[1]	validation-rmse:11.38721                                                    
[2]	validation-rmse:11.01625                                                    
[3]	validation-rmse:10.67277                                                    
[4]	validation-rmse:10.35117                                                    
[5]	validation-rmse:10.05425                                                    
[6]	validation-rmse:9.77792                                                     
[7]	validation-rmse:9.52243                                                     
[8]	validation-rmse:9.28587                                                     
[9]	validation-rmse:9.06744                                                     
[10]	validation-rmse:8.86746                                                    
[11]	validation-rmse:8.67933                                                    
[12]	validation-rmse:8.50893

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.31592                                                    
[1]	validation-rmse:10.54805                                                    
[2]	validation-rmse:9.89359                                                     
[3]	validation-rmse:9.33911                                                     
[4]	validation-rmse:8.87146                                                     
[5]	validation-rmse:8.47895                                                     
[6]	validation-rmse:8.15062                                                     
[7]	validation-rmse:7.87651                                                     
[8]	validation-rmse:7.64878                                                     
[9]	validation-rmse:7.46037                                                     
[10]	validation-rmse:7.30368                                                    
[11]	validation-rmse:7.17558                                                    
[12]	validation-rmse:7.06835

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.82950                                                    
[1]	validation-rmse:9.76635                                                     
[2]	validation-rmse:8.95117                                                     
[3]	validation-rmse:8.34099                                                     
[4]	validation-rmse:7.88161                                                     
[5]	validation-rmse:7.53940                                                     
[6]	validation-rmse:7.29322                                                     
[7]	validation-rmse:7.11784                                                     
[8]	validation-rmse:6.98446                                                     
[9]	validation-rmse:6.88222                                                     
[10]	validation-rmse:6.80980                                                    
[11]	validation-rmse:6.74990                                                    
[12]	validation-rmse:6.70665

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.66593                                                    
[1]	validation-rmse:11.16608                                                    
[2]	validation-rmse:10.71068                                                    
[3]	validation-rmse:10.29618                                                    
[4]	validation-rmse:9.91915                                                     
[5]	validation-rmse:9.57621                                                     
[6]	validation-rmse:9.26675                                                     
[7]	validation-rmse:8.98628                                                     
[8]	validation-rmse:8.73222                                                     
[9]	validation-rmse:8.50477                                                     
[10]	validation-rmse:8.29898                                                    
[11]	validation-rmse:8.11451                                                    
[12]	validation-rmse:7.94950

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.06061                                                     
[1]	validation-rmse:7.01011                                                     
[2]	validation-rmse:6.73609                                                     
[3]	validation-rmse:6.64669                                                     
[4]	validation-rmse:6.61546                                                     
[5]	validation-rmse:6.59817                                                     
[6]	validation-rmse:6.58770                                                     
[7]	validation-rmse:6.58056                                                     
[8]	validation-rmse:6.57307                                                     
[9]	validation-rmse:6.57148                                                     
[10]	validation-rmse:6.56466                                                    
[11]	validation-rmse:6.56158                                                    
[12]	validation-rmse:6.55784

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.64631                                                     
[1]	validation-rmse:8.21951                                                     
[2]	validation-rmse:7.46558                                                     
[3]	validation-rmse:7.07804                                                     
[4]	validation-rmse:6.87677                                                     
[5]	validation-rmse:6.76766                                                     
[6]	validation-rmse:6.70427                                                     
[7]	validation-rmse:6.66546                                                     
[8]	validation-rmse:6.64308                                                     
[9]	validation-rmse:6.62621                                                     
[10]	validation-rmse:6.61328                                                    
[11]	validation-rmse:6.60525                                                    
[12]	validation-rmse:6.60033

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.43889                                                    
[1]	validation-rmse:10.76028                                                    
[2]	validation-rmse:10.16661                                                    
[3]	validation-rmse:9.65063                                                     
[4]	validation-rmse:9.20333                                                     
[5]	validation-rmse:8.81664                                                     
[6]	validation-rmse:8.48351                                                     
[7]	validation-rmse:8.19760                                                     
[8]	validation-rmse:7.95192                                                     
[9]	validation-rmse:7.74257                                                     
[10]	validation-rmse:7.56403                                                    
[11]	validation-rmse:7.41184                                                    
[12]	validation-rmse:7.28322

  self.starting_round = model.num_boosted_rounds()



[10]	validation-rmse:6.76529                                                    
[11]	validation-rmse:6.76064                                                    
[12]	validation-rmse:6.75779                                                    
[13]	validation-rmse:6.75697                                                    
[14]	validation-rmse:6.75188                                                    
[15]	validation-rmse:6.75000                                                    
[16]	validation-rmse:6.74634                                                    
[17]	validation-rmse:6.74320                                                    
[18]	validation-rmse:6.74001                                                    
[19]	validation-rmse:6.73580                                                    
[20]	validation-rmse:6.73380                                                    
[21]	validation-rmse:6.72853                                                    
[22]	validation-rmse:6.72738

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.50624                                                     
[1]	validation-rmse:6.78501                                                     
[2]	validation-rmse:6.65910                                                     
[3]	validation-rmse:6.63688                                                     
[4]	validation-rmse:6.61656                                                     
[5]	validation-rmse:6.60692                                                     
[6]	validation-rmse:6.60032                                                     
[7]	validation-rmse:6.59089                                                     
[8]	validation-rmse:6.58337                                                     
[9]	validation-rmse:6.58020                                                     
[10]	validation-rmse:6.57687                                                    
[11]	validation-rmse:6.57173                                                    
[12]	validation-rmse:6.56663

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.81581                                                     
[1]	validation-rmse:8.41045                                                     
[2]	validation-rmse:7.62424                                                     
[3]	validation-rmse:7.18685                                                     
[4]	validation-rmse:6.95077                                                     
[5]	validation-rmse:6.81817                                                     
[6]	validation-rmse:6.73702                                                     
[7]	validation-rmse:6.68777                                                     
[8]	validation-rmse:6.65908                                                     
[9]	validation-rmse:6.63722                                                     
[10]	validation-rmse:6.62431                                                    
[11]	validation-rmse:6.61283                                                    
[12]	validation-rmse:6.60551

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.82651                                                    
[1]	validation-rmse:9.76181                                                     
[2]	validation-rmse:8.95677                                                     
[3]	validation-rmse:8.34028                                                     
[4]	validation-rmse:7.89212                                                     
[5]	validation-rmse:7.55584                                                     
[6]	validation-rmse:7.30447                                                     
[7]	validation-rmse:7.12001                                                     
[8]	validation-rmse:6.98958                                                     
[9]	validation-rmse:6.89242                                                     
[10]	validation-rmse:6.81224                                                    
[11]	validation-rmse:6.75549                                                    
[12]	validation-rmse:6.71045

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.75978                                                    
[1]	validation-rmse:11.33950                                                    
[2]	validation-rmse:10.95003                                                    
[3]	validation-rmse:10.58975                                                    
[4]	validation-rmse:10.25672                                                    
[5]	validation-rmse:9.94897                                                     
[6]	validation-rmse:9.66564                                                     
[7]	validation-rmse:9.40452                                                     
[8]	validation-rmse:9.16395                                                     
[9]	validation-rmse:8.94372                                                     
[10]	validation-rmse:8.74101                                                    
[11]	validation-rmse:8.55516                                                    
[12]	validation-rmse:8.38583

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.67755                                                     
[1]	validation-rmse:6.75783                                                     
[2]	validation-rmse:6.55459                                                     
[3]	validation-rmse:6.49094                                                     
[4]	validation-rmse:6.46504                                                     
[5]	validation-rmse:6.45320                                                     
[6]	validation-rmse:6.44307                                                     
[7]	validation-rmse:6.43802                                                     
[8]	validation-rmse:6.43084                                                     
[9]	validation-rmse:6.42708                                                     
[10]	validation-rmse:6.42149                                                    
[11]	validation-rmse:6.41690                                                    
[12]	validation-rmse:6.41207

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.41133                                                     
[1]	validation-rmse:7.98616                                                     
[2]	validation-rmse:7.29967                                                     
[3]	validation-rmse:6.97902                                                     
[4]	validation-rmse:6.82128                                                     
[5]	validation-rmse:6.74058                                                     
[6]	validation-rmse:6.69643                                                     
[7]	validation-rmse:6.66991                                                     
[8]	validation-rmse:6.65510                                                     
[9]	validation-rmse:6.64058                                                     
[10]	validation-rmse:6.63663                                                    
[11]	validation-rmse:6.63361                                                    
[12]	validation-rmse:6.63077

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.27010                                                     
[1]	validation-rmse:7.86789                                                     
[2]	validation-rmse:7.22830                                                     
[3]	validation-rmse:6.94767                                                     
[4]	validation-rmse:6.81465                                                     
[5]	validation-rmse:6.75117                                                     
[6]	validation-rmse:6.72052                                                     
[7]	validation-rmse:6.70369                                                     
[8]	validation-rmse:6.69198                                                     
[9]	validation-rmse:6.68076                                                     
[10]	validation-rmse:6.67661                                                    
[11]	validation-rmse:6.67300                                                    
[12]	validation-rmse:6.66486

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.72170                                                    
[1]	validation-rmse:11.26961                                                    
[2]	validation-rmse:10.85311                                                    
[3]	validation-rmse:10.47033                                                    
[4]	validation-rmse:10.11907                                                    
[5]	validation-rmse:9.79709                                                     
[6]	validation-rmse:9.50261                                                     
[7]	validation-rmse:9.23311                                                     
[8]	validation-rmse:8.98728                                                     
[9]	validation-rmse:8.76405                                                     
[10]	validation-rmse:8.56054                                                    
[11]	validation-rmse:8.37556                                                    
[12]	validation-rmse:8.20654

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.97749                                                     
[1]	validation-rmse:8.60005                                                     
[2]	validation-rmse:7.78151                                                     
[3]	validation-rmse:7.29976                                                     
[4]	validation-rmse:7.02440                                                     
[5]	validation-rmse:6.86586                                                     
[6]	validation-rmse:6.76142                                                     
[7]	validation-rmse:6.69645                                                     
[8]	validation-rmse:6.65826                                                     
[9]	validation-rmse:6.63192                                                     
[10]	validation-rmse:6.61107                                                    
[11]	validation-rmse:6.59683                                                    
[12]	validation-rmse:6.58547

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.97559                                                    
[1]	validation-rmse:9.99568                                                     
[2]	validation-rmse:9.21880                                                     
[3]	validation-rmse:8.62155                                                     
[4]	validation-rmse:8.14333                                                     
[5]	validation-rmse:7.78157                                                     
[6]	validation-rmse:7.50217                                                     
[7]	validation-rmse:7.29939                                                     
[8]	validation-rmse:7.13981                                                     
[9]	validation-rmse:7.00907                                                     
[10]	validation-rmse:6.91138                                                    
[11]	validation-rmse:6.83887                                                    
[12]	validation-rmse:6.78161

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.55860                                                    
[1]	validation-rmse:9.35282                                                     
[2]	validation-rmse:8.49060                                                     
[3]	validation-rmse:7.88669                                                     
[4]	validation-rmse:7.46906                                                     
[5]	validation-rmse:7.18129                                                     
[6]	validation-rmse:6.98379                                                     
[7]	validation-rmse:6.84664                                                     
[8]	validation-rmse:6.75118                                                     
[9]	validation-rmse:6.68234                                                     
[10]	validation-rmse:6.63469                                                    
[11]	validation-rmse:6.59786                                                    
[12]	validation-rmse:6.57125

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.85476                                                     
[2]	validation-rmse:8.02892                                                     
[3]	validation-rmse:7.52245                                                     
[4]	validation-rmse:7.21531                                                     
[5]	validation-rmse:7.02981                                                     
[6]	validation-rmse:6.91672                                                     
[7]	validation-rmse:6.84201                                                     
[8]	validation-rmse:6.79143                                                     
[9]	validation-rmse:6.75948                                                     
[10]	validation-rmse:6.73730                                                    
[11]	validation-rmse:6.72144                                                    
[12]	validation-rmse:6.70909                                                    
[13]	validation-rmse:6.70151

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.33087                                                    
[1]	validation-rmse:10.57274                                                    
[2]	validation-rmse:9.92313                                                     
[3]	validation-rmse:9.37048                                                     
[4]	validation-rmse:8.90145                                                     
[5]	validation-rmse:8.50477                                                     
[6]	validation-rmse:8.17333                                                     
[7]	validation-rmse:7.89571                                                     
[8]	validation-rmse:7.66183                                                     
[9]	validation-rmse:7.46854                                                     
[10]	validation-rmse:7.30827                                                    
[11]	validation-rmse:7.17497                                                    
[12]	validation-rmse:7.06332

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.41418                                                     
[1]	validation-rmse:7.96261                                                     
[2]	validation-rmse:7.26157                                                     
[3]	validation-rmse:6.92383                                                     
[4]	validation-rmse:6.75727                                                     
[5]	validation-rmse:6.66531                                                     
[6]	validation-rmse:6.61338                                                     
[7]	validation-rmse:6.58387                                                     
[8]	validation-rmse:6.56242                                                     
[9]	validation-rmse:6.55069                                                     
[10]	validation-rmse:6.54336                                                    
[11]	validation-rmse:6.54112                                                    
[12]	validation-rmse:6.53815

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.77292                                                     
[1]	validation-rmse:6.67079                                                     
[2]	validation-rmse:6.64584                                                     
[3]	validation-rmse:6.63224                                                     
[4]	validation-rmse:6.62055                                                     
[5]	validation-rmse:6.61275                                                     
[6]	validation-rmse:6.60807                                                     
[7]	validation-rmse:6.58705                                                     
[8]	validation-rmse:6.58034                                                     
[9]	validation-rmse:6.57478                                                     
[10]	validation-rmse:6.57433                                                    
[11]	validation-rmse:6.57251                                                    
[12]	validation-rmse:6.56991

## 3.0 Autlog

In [None]:
# autlog is a function that automatically logs the parameters, metrics and artifacts of the model
# it is a very useful function that can be used to log the model without having to write the code
# it only works with certain models, like xgboost, lightgbm and catboost ---> check website for full list


# you can disable autologging by using the disable parameter if you don't want to log the model
# mlflow.xgboost.autolog(disable=True) 

## 4.0 Model management

In [21]:
with mlflow.start_run():

    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    # we can use the best result from the hyperopt optimization
    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    # log the parameters to mlflow
    mlflow.log_params(best_params)

    # we can use the best parameters to train the model
    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    # once the model is trained, we can save it and make predictions
    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    # we will export the preprocessor as well
    # the preprocessor is the DictVectorizer we used to transform the data
    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)

    # log the preprocessor - the dictvectorizer, to mlflow
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

  self.starting_round = model.num_boosted_rounds()


[0]	validation-rmse:11.44482
[1]	validation-rmse:10.77202
[2]	validation-rmse:10.18363
[3]	validation-rmse:9.67396
[4]	validation-rmse:9.23166
[5]	validation-rmse:8.84808
[6]	validation-rmse:8.51883
[7]	validation-rmse:8.23597
[8]	validation-rmse:7.99320
[9]	validation-rmse:7.78709
[10]	validation-rmse:7.61022
[11]	validation-rmse:7.45952
[12]	validation-rmse:7.33049
[13]	validation-rmse:7.22098
[14]	validation-rmse:7.12713
[15]	validation-rmse:7.04752
[16]	validation-rmse:6.98005
[17]	validation-rmse:6.92232
[18]	validation-rmse:6.87112
[19]	validation-rmse:6.82740
[20]	validation-rmse:6.78995
[21]	validation-rmse:6.75792
[22]	validation-rmse:6.72994
[23]	validation-rmse:6.70547
[24]	validation-rmse:6.68390
[25]	validation-rmse:6.66421
[26]	validation-rmse:6.64806
[27]	validation-rmse:6.63280
[28]	validation-rmse:6.61924
[29]	validation-rmse:6.60773
[30]	validation-rmse:6.59777
[31]	validation-rmse:6.58875
[32]	validation-rmse:6.58107
[33]	validation-rmse:6.57217
[34]	validation-rmse:

  xgb_model.save_model(model_data_path)


# 4.0 Loading a model

In [None]:
# we pass the run id
logged_model = "runs:/0eeed647ce134080b75df6742764b655/models_mlflow"

# load model as a PyFuncModel
loaded_model = mlflow.pyfunc.load_model(logged_model)

# predict on pandas DataFrame
import pandas as pd
#loaded_model.predict(pd.DataFrame({'', 'trip_distance': [1.0]}))  # you need to pass the data in the same format as the model was trained


In [26]:
#check the loaded model
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 0eeed647ce134080b75df6742764b655

You can also load the model as a xgboost model (in this case)

In [27]:
xgboost_model = mlflow.xgboost.load_model(logged_model)

In [28]:
# check the xgboost object
xgboost_model

<xgboost.core.Booster at 0x1891a2d80>

# 5.0 Search with a lot of models

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)
