In [6]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import root_mean_squared_error
import pickle
import mlflow

## MLflow
### Initial setup
1. Need to run on terminal: `mlflow ui --backend-store-uri sqlite:///mlflow.db`
2. MLflow can be found on [http://127.0.0.1:5000/](http://127.0.0.1:5000/)
3. We set the tracking uri on the python script
4. We set the experiment name where all runs will be saved. It the exp doesn't exist mlflow will create one.

#### URI: Unique resource identifier

In [7]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location=('/Users/andreanicolas/Library/CloudStorage/GoogleDrive-andreanicolas91@gmail.com/My '
 'Drive/ASU_ComputerScience/MLOps_studies/02-experiment-tracking/mlruns/1'), creation_time=1716340686867, experiment_id='1', last_update_time=1716340686867, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

#### Read in taxi data using pandas.

In [8]:
def read_dataframe(filename):
    df = pd.read_parquet(filename) #read data
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime # calculate duration
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60) # convert to mins
    mask = ((df.duration >=1) & (df.duration<=60)) # select only significant data
    df = df[mask]
    categorical = ['PULocationID', 'DOLocationID'] # pickup and dropoff location
    df[categorical] = df[categorical].astype(str) # convert to string/object to prevent labeling

    return df

In [9]:
df_train = read_dataframe('../data/green_tripdata_2023-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2023-02.parquet')
len(df_train), len(df_val)

(65946, 62574)

#### dictVectorizer to generate train and validation X

In [10]:
print('combining pickup location ID and dropoff location ID...')
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

combining pickup location ID and dropoff location ID...


In [11]:
dv = DictVectorizer()
categorical = ['PU_DO'] # pickup and dropoff location
numerical = ['trip_distance'] # distance of trip

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts) # no need to fit sicne its done already for training data

#### Obtaining Y values (labels)

In [12]:
y_train = df_train['duration'].values
y_val = df_val['duration'].values

#### Fit linear regression and calculate RMSEs

In [13]:
lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred_train = lr.predict(X_train)
y_pred_val   = lr.predict(X_val)

train_rmse = root_mean_squared_error(y_train,y_pred_train)
val_rmse   = root_mean_squared_error(y_val,y_pred_val)

print(f'RMSE from train data is : {train_rmse:.2f}')
print(f'RMSE from validation data is : {val_rmse:.2f}')

RMSE from train data is : 4.80
RMSE from validation data is : 6.04


#### Save model as pickle file

In [14]:
with open('../models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

## MLFlow

### Initial logs
1. We have start a run, where everytime we hit run a different run will be saved on mlflow.
2. We are saving ad-hoc parameters like train data path, validation data path, alpha
3. We are logging a goodness metric, rmse
4. We also have setup a tag with the developer name
#### LASSO model:

In [23]:
with mlflow.start_run():
    mlflow.set_tag("developer","andrea")

    # log info about dataset
    mlflow.log_param("train-data-path","../01-intro/data/green_tripdata_2023-01.parquet")
    mlflow.log_param("valid-data-path","../01-intro/data/green_tripdata_2023-02.parquet")

    #log alpha
    alpha = 0.01
    mlflow.log_param("alpha",alpha)

    lr = Lasso(alpha)
    lr.fit(X_train,y_train)

    y_pred_val   = lr.predict(X_val)

    #log metric
    val_rmse   = root_mean_squared_error(y_val,y_pred_val)
    mlflow.log_metric("rmse",val_rmse)
    print(f'RMSE from validation data is : {val_rmse:.2f}')

    # save model (pretty sure its saving linear regression one and not lasso)
    mlflow.log_artifact(local_path="../models/lin_reg.bin", artifact_path="models_pickle")

RMSE from validation data is : 8.23


### Optimizing hyperparameters

In [16]:
import xgboost as xgb #xtreme gradient boosting
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
# hyperopt is a library that using bayesian methods to find best set of hyperparameters 
#fmin - minimize obj fcn//output
#tpe - algo used to control flushing of input/output
#hp - different methods to define the search space
#status_ok - signal to send at the end of each run to tell hyperopt that method ran succesfully
#trials - will keep track of info for each run
#scope - to define range of type integer

#### Set up train and validation data to feed into obj function

In [17]:
train = xgb.DMatrix(X_train,label=y_train)
valid = xgb.DMatrix(X_val,label=y_val)

#### Define obj function

In [18]:
def objective(params):

    with mlflow.start_run():
        mlflow.set_tag("model","xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params, # modeling parameters alpha
            dtrain=train, # training data
            num_boost_round=1000,
            evals=[(valid,"validation")], # will use validation data to check for improvements
            early_stopping_rounds=50 # if there are 50 iters and no improvement happens optimization will stop
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val,y_pred)
        mlflow.log_metric("rmse",rmse)
    
    return {'loss':rmse,'status':STATUS_OK} #status ok is just signal that optimization was successful

#### Define search space

In [19]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth',4, 100, 1)), #depth of the trees, we convert to integer since the output originally is float
    'learning_rate':hp.loguniform('learning_rate',-3, 0), # exp(-3) to exp(0)... [0.05, 1]
    'reg_alpha':hp.loguniform('reg_alpha',-5, -1),
    'reg_lambda':hp.loguniform('reg_lambda',-6, -1),
    'min_child_weight':hp.loguniform('min_child_weight',-1, 3),
    'objective': 'reg:linear', # regression problem
    'seed': 42,
}

#### Optimize obj fcn by minimizing the output 

In [20]:
best_result = fmin(
    fn = objective,
    space = search_space,
    algo = tpe.suggest, # algorithm to run optimization
    max_evals = 50,
    trials= Trials() #all info will be stored here
)

[0]	validation-rmse:6.29560                           
[1]	validation-rmse:5.56526                           
[2]	validation-rmse:5.39183                           
[3]	validation-rmse:5.34047                           
[4]	validation-rmse:5.31796                           
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[5]	validation-rmse:5.31052                           
[6]	validation-rmse:5.30097                           
[7]	validation-rmse:5.29845                           
[8]	validation-rmse:5.29336                           
[9]	validation-rmse:5.28976                           
[10]	validation-rmse:5.28616                          
[11]	validation-rmse:5.28406                          
[12]	validation-rmse:5.27899                          
[13]	validation-rmse:5.27592                          
[14]	validation-rmse:5.27334                          
[15]	validation-rmse:5.26945                          
[16]	validation-rmse:5.26775                          
[17]	validation-rmse:5.26636                          
[18]	validation-rmse:5.26443                          
[19]	validation-rmse:5.25555                          
[20]	validation-rmse:5.25380                          
[21]	validation-rmse:5.25199                          
[22]	validation-rmse:5.25035                          
[23]	valid




[2]	validation-rmse:7.32225                                                    
[3]	validation-rmse:6.89683                                                    
[4]	validation-rmse:6.55959                                                    
[5]	validation-rmse:6.29942                                                    
[6]	validation-rmse:6.08580                                                    
[7]	validation-rmse:5.92553                                                    
[8]	validation-rmse:5.79867                                                    
[9]	validation-rmse:5.70267                                                    
[10]	validation-rmse:5.62130                                                   
[11]	validation-rmse:5.56309                                                   
[12]	validation-rmse:5.51897                                                   
[13]	validation-rmse:5.48153                                                   
[14]	validation-rmse:5.44729            




[2]	validation-rmse:6.70319
[3]	validation-rmse:6.28026                                                    
[4]	validation-rmse:5.98947                                                    
[5]	validation-rmse:5.79106                                                    
[6]	validation-rmse:5.65350                                                    
[7]	validation-rmse:5.56203                                                    
[8]	validation-rmse:5.49883                                                    
[9]	validation-rmse:5.44990                                                    
[10]	validation-rmse:5.41817                                                   
[11]	validation-rmse:5.39484                                                   
[12]	validation-rmse:5.37796                                                   
[13]	validation-rmse:5.36325                                                   
[14]	validation-rmse:5.35285                                                   
[15]	validat




[0]	validation-rmse:7.09032                                                    
[1]	validation-rmse:6.02125                                                    
[2]	validation-rmse:5.54164                                                    
[3]	validation-rmse:5.34383                                                    
[4]	validation-rmse:5.25640                                                    
[5]	validation-rmse:5.21337                                                    
[6]	validation-rmse:5.19135                                                    
[7]	validation-rmse:5.17983                                                    
[8]	validation-rmse:5.17198                                                    
[9]	validation-rmse:5.16639                                                    
[10]	validation-rmse:5.16436                                                   
[11]	validation-rmse:5.16564                                                   
[12]	validation-rmse:5.16718            




[1]	validation-rmse:6.89881                                                   
[2]	validation-rmse:6.29615                                                   
[3]	validation-rmse:5.92830                                                   
[4]	validation-rmse:5.70590                                                   
[5]	validation-rmse:5.57081                                                   
[6]	validation-rmse:5.48703                                                   
[7]	validation-rmse:5.43679                                                   
[8]	validation-rmse:5.40145                                                   
[9]	validation-rmse:5.37371                                                   
[10]	validation-rmse:5.35321                                                  
[11]	validation-rmse:5.33973                                                  
[12]	validation-rmse:5.33008                                                  
[13]	validation-rmse:5.32040                        




[0]	validation-rmse:6.25740                                                   
[1]	validation-rmse:5.46745                                                   
[2]	validation-rmse:5.28608                                                   
[3]	validation-rmse:5.23745                                                   
[4]	validation-rmse:5.21270                                                   
[5]	validation-rmse:5.20024                                                   
[6]	validation-rmse:5.19332                                                   
[7]	validation-rmse:5.18844                                                   
[8]	validation-rmse:5.17680                                                   
[9]	validation-rmse:5.17564                                                   
[10]	validation-rmse:5.17783                                                  
[11]	validation-rmse:5.17018                                                  
[12]	validation-rmse:5.17015                        




[0]	validation-rmse:8.59501                                                    
[1]	validation-rmse:7.97967                                                    
[2]	validation-rmse:7.46287                                                    
[3]	validation-rmse:7.03224                                                    
[4]	validation-rmse:6.67562                                                    
[5]	validation-rmse:6.38452                                                    
[6]	validation-rmse:6.14514                                                    
[7]	validation-rmse:5.95296                                                    
[8]	validation-rmse:5.79680                                                    
[9]	validation-rmse:5.67137                                                    
[10]	validation-rmse:5.57160                                                   
[11]	validation-rmse:5.49215                                                   
[12]	validation-rmse:5.42917            




[0]	validation-rmse:8.52596                                                    
[1]	validation-rmse:7.87718                                                    
[2]	validation-rmse:7.33580                                                    
[3]	validation-rmse:6.91399                                                    
[4]	validation-rmse:6.56408                                                    
[5]	validation-rmse:6.30241                                                    
[6]	validation-rmse:6.07255                                                    
[7]	validation-rmse:5.90307                                                    
[8]	validation-rmse:5.77534                                                    
[9]	validation-rmse:5.67683                                                    
[10]	validation-rmse:5.59574                                                   
[11]	validation-rmse:5.53986                                                   
[12]	validation-rmse:5.47818            




[1]	validation-rmse:7.29148                                                    
[2]	validation-rmse:6.68354                                                    
[3]	validation-rmse:6.25674                                                    
[4]	validation-rmse:5.97286                                                    
[5]	validation-rmse:5.76521                                                    
[6]	validation-rmse:5.63450                                                    
[7]	validation-rmse:5.54213                                                    
[8]	validation-rmse:5.47915                                                    
[9]	validation-rmse:5.42831                                                    
[10]	validation-rmse:5.39656                                                   
[11]	validation-rmse:5.37324                                                   
[12]	validation-rmse:5.35622                                                   
[13]	validation-rmse:5.34708            




[2]	validation-rmse:8.22881                                                    
[3]	validation-rmse:7.92996                                                    
[4]	validation-rmse:7.65977                                                    
[5]	validation-rmse:7.41615                                                    
[6]	validation-rmse:7.19693                                                    
[7]	validation-rmse:6.99969                                                    
[8]	validation-rmse:6.82257                                                    
[9]	validation-rmse:6.66433                                                    
[10]	validation-rmse:6.52296                                                   
[11]	validation-rmse:6.39649                                                   
[12]	validation-rmse:6.28390                                                   
[13]	validation-rmse:6.18400                                                   
[14]	validation-rmse:6.09538            




[3]	validation-rmse:6.02167                                                     
[4]	validation-rmse:5.76329                                                     
[5]	validation-rmse:5.60095                                                     
[6]	validation-rmse:5.49387                                                     
[7]	validation-rmse:5.42545                                                     
[8]	validation-rmse:5.37673                                                     
[9]	validation-rmse:5.34275                                                     
[10]	validation-rmse:5.31697                                                    
[11]	validation-rmse:5.30069                                                    
[12]	validation-rmse:5.28647                                                    
[13]	validation-rmse:5.27426                                                    
[14]	validation-rmse:5.26707                                                    
[15]	validation-rmse:5.26073




[0]	validation-rmse:8.71515                                                     
[1]	validation-rmse:8.18832                                                     
[2]	validation-rmse:7.72934                                                     
[3]	validation-rmse:7.33427                                                     
[4]	validation-rmse:6.99545                                                     
[5]	validation-rmse:6.70599                                                     
[6]	validation-rmse:6.46201                                                     
[7]	validation-rmse:6.25292                                                     
[8]	validation-rmse:6.07719                                                     
[9]	validation-rmse:5.92814                                                     
[10]	validation-rmse:5.80608                                                    
[11]	validation-rmse:5.69936                                                    
[12]	validation-rmse:5.61373




[0]	validation-rmse:6.09649                                                     
[1]	validation-rmse:5.36843                                                     
[2]	validation-rmse:5.22878                                                     
[3]	validation-rmse:5.19793                                                     
[4]	validation-rmse:5.19276                                                     
[5]	validation-rmse:5.18766                                                     
[6]	validation-rmse:5.18413                                                     
[7]	validation-rmse:5.17650                                                     
[8]	validation-rmse:5.17291                                                     
[9]	validation-rmse:5.17577                                                     
[10]	validation-rmse:5.17429                                                    
[11]	validation-rmse:5.17398                                                    
[12]	validation-rmse:5.17373




[2]	validation-rmse:6.07245                                                     
[3]	validation-rmse:5.75272                                                     
[4]	validation-rmse:5.57436                                                     
[5]	validation-rmse:5.48808                                                     
[6]	validation-rmse:5.42729                                                     
[7]	validation-rmse:5.39705                                                     
[8]	validation-rmse:5.37993                                                     
[9]	validation-rmse:5.36668                                                     
[10]	validation-rmse:5.35646                                                    
[11]	validation-rmse:5.35186                                                    
[12]	validation-rmse:5.34715                                                    
[13]	validation-rmse:5.34685                                                    
[14]	validation-rmse:5.34299




[0]	validation-rmse:8.11601
[1]	validation-rmse:7.23065                                                     
[2]	validation-rmse:6.60406                                                     
[3]	validation-rmse:6.15744                                                     
[4]	validation-rmse:5.85507                                                     
[5]	validation-rmse:5.64585                                                     
[6]	validation-rmse:5.50576                                                     
[7]	validation-rmse:5.40749                                                     
[8]	validation-rmse:5.34083                                                     
[9]	validation-rmse:5.29628                                                     
[10]	validation-rmse:5.26345                                                    
[11]	validation-rmse:5.24107                                                    
[12]	validation-rmse:5.22159                                                    





[0]	validation-rmse:8.76145                                                     
[1]	validation-rmse:8.26917                                                     
[2]	validation-rmse:7.83668                                                     
[3]	validation-rmse:7.45892                                                     
[4]	validation-rmse:7.12927                                                     
[5]	validation-rmse:6.84499                                                     
[6]	validation-rmse:6.59855                                                     
[7]	validation-rmse:6.38752                                                     
[8]	validation-rmse:6.20494                                                     
[9]	validation-rmse:6.05059                                                     
[10]	validation-rmse:5.91918                                                    
[11]	validation-rmse:5.80545                                                    
[12]	validation-rmse:5.71140




[8]	validation-rmse:5.71138                                                     
[9]	validation-rmse:5.64248                                                     
[10]	validation-rmse:5.59042                                                    
[11]	validation-rmse:5.55036                                                    
[12]	validation-rmse:5.52123                                                    
[13]	validation-rmse:5.49841                                                    
[14]	validation-rmse:5.48425                                                    
[15]	validation-rmse:5.46588                                                    
[16]	validation-rmse:5.45613                                                    
[17]	validation-rmse:5.44742                                                    
[18]	validation-rmse:5.43922                                                    
[19]	validation-rmse:5.43507                                                    
[20]	validation-rmse:5.43240




[1]	validation-rmse:7.98900                                                     
[2]	validation-rmse:7.48500                                                     
[3]	validation-rmse:7.07430                                                     
[4]	validation-rmse:6.73111                                                     
[5]	validation-rmse:6.45801                                                     
[6]	validation-rmse:6.23470                                                     
[7]	validation-rmse:6.05644                                                     
[8]	validation-rmse:5.90786                                                     
[9]	validation-rmse:5.79625                                                     
[10]	validation-rmse:5.70777                                                    
[11]	validation-rmse:5.62928                                                    
[12]	validation-rmse:5.57158                                                    
[13]	validation-rmse:5.52413




[7]	validation-rmse:5.33391                                                     
[8]	validation-rmse:5.32601                                                     
[9]	validation-rmse:5.32270                                                     
[10]	validation-rmse:5.31966                                                    
[11]	validation-rmse:5.31597                                                    
[12]	validation-rmse:5.31467                                                    
[13]	validation-rmse:5.31046                                                    
[14]	validation-rmse:5.30446                                                    
[15]	validation-rmse:5.30023                                                    
[16]	validation-rmse:5.29775                                                    
[17]	validation-rmse:5.29301                                                    
[18]	validation-rmse:5.29064                                                    
[19]	validation-rmse:5.28897




[0]	validation-rmse:5.60923                                                     
[1]	validation-rmse:5.34063                                                     
[2]	validation-rmse:5.31623                                                     
[3]	validation-rmse:5.30963                                                     
[4]	validation-rmse:5.30501                                                     
[5]	validation-rmse:5.30642                                                     
[6]	validation-rmse:5.28919                                                     
[7]	validation-rmse:5.28737                                                     
[8]	validation-rmse:5.28287                                                     
[9]	validation-rmse:5.27955                                                     
[10]	validation-rmse:5.27951                                                    
[11]	validation-rmse:5.27560                                                    
[12]	validation-rmse:5.27418




[1]	validation-rmse:8.64043                                                     
[2]	validation-rmse:8.33828                                                     
[3]	validation-rmse:8.05994                                                     
[4]	validation-rmse:7.80366                                                     
[5]	validation-rmse:7.56815                                                     
[6]	validation-rmse:7.35204                                                     
[7]	validation-rmse:7.15403                                                     
[8]	validation-rmse:6.97312                                                     
[9]	validation-rmse:6.80800                                                     
[10]	validation-rmse:6.65708                                                    
[11]	validation-rmse:6.52004                                                    
[12]	validation-rmse:6.39494                                                    
[13]	validation-rmse:6.28187




[0]	validation-rmse:9.00203                                                     
[1]	validation-rmse:8.70363                                                     
[2]	validation-rmse:8.42590                                                     
[3]	validation-rmse:8.16760                                                     
[4]	validation-rmse:7.92803                                                     
[5]	validation-rmse:7.70600                                                     
[6]	validation-rmse:7.50034                                                     
[7]	validation-rmse:7.31035                                                     
[8]	validation-rmse:7.13448                                                     
[9]	validation-rmse:6.97214                                                     
[10]	validation-rmse:6.82276                                                    
[11]	validation-rmse:6.68543                                                    
[12]	validation-rmse:6.55887




[1]	validation-rmse:8.69528                                                     
[2]	validation-rmse:8.41465                                                     
[3]	validation-rmse:8.15407                                                     
[4]	validation-rmse:7.91283                                                     
[5]	validation-rmse:7.68915                                                     
[6]	validation-rmse:7.48298                                                     
[7]	validation-rmse:7.29181                                                     
[8]	validation-rmse:7.11577                                                     
[9]	validation-rmse:6.95383                                                     
[10]	validation-rmse:6.80462                                                    
[11]	validation-rmse:6.66759                                                    
[12]	validation-rmse:6.54195                                                    
[13]	validation-rmse:6.42679




[1]	validation-rmse:8.66152                                                     
[2]	validation-rmse:8.36721                                                     
[3]	validation-rmse:8.09603                                                     
[4]	validation-rmse:7.84566                                                     
[5]	validation-rmse:7.61438                                                     
[6]	validation-rmse:7.40196                                                     
[7]	validation-rmse:7.20687                                                     
[8]	validation-rmse:7.02835                                                     
[9]	validation-rmse:6.86468                                                     
[10]	validation-rmse:6.71500                                                    
[11]	validation-rmse:6.57830                                                    
[12]	validation-rmse:6.45295                                                    
[13]	validation-rmse:6.33886




[0]	validation-rmse:8.91552                                                     
[1]	validation-rmse:8.54425                                                     
[2]	validation-rmse:8.20784                                                     
[3]	validation-rmse:7.90125                                                     
[4]	validation-rmse:7.62537                                                     
[5]	validation-rmse:7.37285                                                     
[6]	validation-rmse:7.14834                                                     
[7]	validation-rmse:6.94337                                                     
[8]	validation-rmse:6.76121                                                     
[9]	validation-rmse:6.59565                                                     
[10]	validation-rmse:6.45033                                                    
[11]	validation-rmse:6.31705                                                    
[12]	validation-rmse:6.19729




[0]	validation-rmse:8.88979                                                     
[1]	validation-rmse:8.49705                                                     
[2]	validation-rmse:8.14094                                                     
[3]	validation-rmse:7.81881                                                     
[4]	validation-rmse:7.52837                                                     
[5]	validation-rmse:7.26706                                                     
[6]	validation-rmse:7.03268                                                     
[7]	validation-rmse:6.82250                                                     
[8]	validation-rmse:6.63502                                                     
[9]	validation-rmse:6.46810                                                     
[10]	validation-rmse:6.31951                                                    
[11]	validation-rmse:6.18669                                                    
[12]	validation-rmse:6.06896




[1]	validation-rmse:8.72612                                                     
[2]	validation-rmse:8.45793                                                     
[3]	validation-rmse:8.20846                                                     
[4]	validation-rmse:7.97616                                                     
[5]	validation-rmse:7.76025                                                     
[6]	validation-rmse:7.55983                                                     
[7]	validation-rmse:7.37339                                                     
[8]	validation-rmse:7.20144                                                     
[9]	validation-rmse:7.04246                                                     
[10]	validation-rmse:6.89504                                                    
[11]	validation-rmse:6.75878                                                    
[12]	validation-rmse:6.63371                                                    
[13]	validation-rmse:6.51730




[0]	validation-rmse:8.81393                                                     
[1]	validation-rmse:8.36226                                                     
[2]	validation-rmse:7.96171                                                     
[3]	validation-rmse:7.60699                                                     
[4]	validation-rmse:7.29457                                                     
[5]	validation-rmse:7.02032                                                     
[6]	validation-rmse:6.78042                                                     
[7]	validation-rmse:6.57159                                                     
[8]	validation-rmse:6.39041                                                     
[9]	validation-rmse:6.23316                                                     
[10]	validation-rmse:6.09728                                                    
[11]	validation-rmse:5.98002                                                    
[12]	validation-rmse:5.87919




[1]	validation-rmse:8.63716                                                     
[2]	validation-rmse:8.33499                                                     
[3]	validation-rmse:8.05737                                                     
[4]	validation-rmse:7.80249                                                     
[5]	validation-rmse:7.56925                                                     
[6]	validation-rmse:7.35597                                                     
[7]	validation-rmse:7.16118                                                     
[8]	validation-rmse:6.98330                                                     
[9]	validation-rmse:6.82147                                                     
[10]	validation-rmse:6.67470                                                    
[11]	validation-rmse:6.54147                                                    
[12]	validation-rmse:6.42029                                                    
[13]	validation-rmse:6.31069




[11]	validation-rmse:6.28492                                                    
[12]	validation-rmse:6.18801                                                    
[13]	validation-rmse:6.10409                                                    
[14]	validation-rmse:6.03094                                                    
[15]	validation-rmse:5.96806                                                    
[16]	validation-rmse:5.91392                                                    
[17]	validation-rmse:5.86700                                                    
[18]	validation-rmse:5.82528                                                    
[19]	validation-rmse:5.79092                                                    
[20]	validation-rmse:5.75975                                                    
[21]	validation-rmse:5.73362                                                    
[22]	validation-rmse:5.71068                                                    
[23]	validation-rmse:5.68975




[0]	validation-rmse:8.34732                                                     
[1]	validation-rmse:7.58322                                                     
[2]	validation-rmse:6.98522                                                     
[3]	validation-rmse:6.53025                                                     
[4]	validation-rmse:6.18460                                                     
[5]	validation-rmse:5.92673                                                     
[6]	validation-rmse:5.72946                                                     
[7]	validation-rmse:5.58619                                                     
[8]	validation-rmse:5.48540                                                     
[9]	validation-rmse:5.40452                                                     
[10]	validation-rmse:5.34446                                                    
[11]	validation-rmse:5.30439                                                    
[12]	validation-rmse:5.27073




[3]	validation-rmse:5.31730                                                     
[4]	validation-rmse:5.30577                                                     
[5]	validation-rmse:5.29343                                                     
[6]	validation-rmse:5.28913                                                     
[7]	validation-rmse:5.27461                                                     
[8]	validation-rmse:5.26424                                                     
[9]	validation-rmse:5.25685                                                     
[10]	validation-rmse:5.25069                                                    
[11]	validation-rmse:5.24398                                                    
[12]	validation-rmse:5.24551                                                    
[13]	validation-rmse:5.23614                                                    
[14]	validation-rmse:5.23245                                                    
[15]	validation-rmse:5.23060




[0]	validation-rmse:8.67068                                                     
[1]	validation-rmse:8.11028                                                     
[2]	validation-rmse:7.63078                                                     
[3]	validation-rmse:7.22381                                                     
[4]	validation-rmse:6.88012                                                     
[5]	validation-rmse:6.59050                                                     
[6]	validation-rmse:6.34870                                                     
[7]	validation-rmse:6.14549                                                     
[8]	validation-rmse:5.97761                                                     
[9]	validation-rmse:5.83803                                                     
[10]	validation-rmse:5.72195                                                    
[11]	validation-rmse:5.62638                                                    
[12]	validation-rmse:5.54724




[1]	validation-rmse:6.16800                                                     
[2]	validation-rmse:5.69394                                                     
[3]	validation-rmse:5.47887                                                     
[4]	validation-rmse:5.38080                                                     
[5]	validation-rmse:5.32777                                                     
[6]	validation-rmse:5.30316                                                     
[7]	validation-rmse:5.28905                                                     
[8]	validation-rmse:5.27692                                                     
[9]	validation-rmse:5.26905                                                     
[10]	validation-rmse:5.26336                                                    
[11]	validation-rmse:5.24951                                                    
[12]	validation-rmse:5.24817                                                    
[13]	validation-rmse:5.23868




[0]	validation-rmse:8.84435                                                     
[1]	validation-rmse:8.41504                                                     
[2]	validation-rmse:8.03048                                                     
[3]	validation-rmse:7.68756                                                     
[4]	validation-rmse:7.38302                                                     
[5]	validation-rmse:7.11150                                                     
[6]	validation-rmse:6.87183                                                     
[7]	validation-rmse:6.65871                                                     
[8]	validation-rmse:6.47398                                                     
[9]	validation-rmse:6.30779                                                     
[10]	validation-rmse:6.16770                                                    
[11]	validation-rmse:6.04099                                                    
[12]	validation-rmse:5.92952




[2]	validation-rmse:7.15156                                                     
[3]	validation-rmse:6.71993                                                     
[4]	validation-rmse:6.38952                                                     
[5]	validation-rmse:6.13910                                                     
[6]	validation-rmse:5.95083                                                     
[7]	validation-rmse:5.81006                                                     
[8]	validation-rmse:5.70496                                                     
[9]	validation-rmse:5.62646                                                     
[10]	validation-rmse:5.56641                                                    
[11]	validation-rmse:5.52178                                                    
[12]	validation-rmse:5.48855                                                    
[13]	validation-rmse:5.45874                                                    
[14]	validation-rmse:5.43341




[1]	validation-rmse:8.63941                                                     
[2]	validation-rmse:8.33647                                                     
[3]	validation-rmse:8.06088                                                     
[4]	validation-rmse:7.80473                                                     
[5]	validation-rmse:7.57383                                                     
[6]	validation-rmse:7.35956                                                     
[7]	validation-rmse:7.16544                                                     
[8]	validation-rmse:6.98573                                                     
[9]	validation-rmse:6.82544                                                     
[10]	validation-rmse:6.67776                                                    
[11]	validation-rmse:6.54377                                                    
[12]	validation-rmse:6.41995                                                    
[13]	validation-rmse:6.30800




[0]	validation-rmse:7.52841                                                     
[1]	validation-rmse:6.48994                                                     
[2]	validation-rmse:5.92112                                                     
[3]	validation-rmse:5.62012                                                     
[4]	validation-rmse:5.46218                                                     
[5]	validation-rmse:5.37679                                                     
[6]	validation-rmse:5.32870                                                     
[7]	validation-rmse:5.29837                                                     
[8]	validation-rmse:5.28021                                                     
[9]	validation-rmse:5.26721                                                     
[10]	validation-rmse:5.25836                                                    
[11]	validation-rmse:5.24930                                                    
[12]	validation-rmse:5.24219




[1]	validation-rmse:8.21446                                                     
[2]	validation-rmse:7.77213                                                     
[3]	validation-rmse:7.39277                                                     
[4]	validation-rmse:7.06921                                                     
[5]	validation-rmse:6.79417                                                     
[6]	validation-rmse:6.56115                                                     
[7]	validation-rmse:6.36550                                                     
[8]	validation-rmse:6.20037                                                     
[9]	validation-rmse:6.06231                                                     
[10]	validation-rmse:5.94714                                                    
[11]	validation-rmse:5.85115                                                    
[12]	validation-rmse:5.77091                                                    
[13]	validation-rmse:5.70392




[7]	validation-rmse:5.41781                                                     
[8]	validation-rmse:5.40728                                                     
[9]	validation-rmse:5.39554                                                     
[10]	validation-rmse:5.39239                                                    
[11]	validation-rmse:5.38744                                                    
[12]	validation-rmse:5.38415                                                    
[13]	validation-rmse:5.38000                                                    
[14]	validation-rmse:5.37601                                                    
[15]	validation-rmse:5.37362                                                    
[16]	validation-rmse:5.37102                                                    
[17]	validation-rmse:5.36839                                                    
[18]	validation-rmse:5.36622                                                    
[19]	validation-rmse:5.36412




[2]	validation-rmse:7.31729                                                     
[3]	validation-rmse:6.89156                                                     
[4]	validation-rmse:6.55418                                                     
[5]	validation-rmse:6.28848                                                     
[6]	validation-rmse:6.08022                                                     
[7]	validation-rmse:5.91919                                                     
[8]	validation-rmse:5.79303                                                     
[9]	validation-rmse:5.69247                                                     
[10]	validation-rmse:5.61492                                                    
[11]	validation-rmse:5.55640                                                    
[12]	validation-rmse:5.51005                                                    
[13]	validation-rmse:5.47276                                                    
[14]	validation-rmse:5.44239




[0]	validation-rmse:8.25855                                                     
[1]	validation-rmse:7.44569                                                     
[2]	validation-rmse:6.83592                                                     
[3]	validation-rmse:6.38716                                                     
[4]	validation-rmse:6.06102                                                     
[5]	validation-rmse:5.82587                                                     
[6]	validation-rmse:5.65756                                                     
[7]	validation-rmse:5.53841                                                     
[8]	validation-rmse:5.45248                                                     
[9]	validation-rmse:5.39000                                                     
[10]	validation-rmse:5.34440                                                    
[11]	validation-rmse:5.31002                                                    
[12]	validation-rmse:5.28684




[0]	validation-rmse:8.94082                                                     
[1]	validation-rmse:8.59041                                                     
[2]	validation-rmse:8.26990                                                     
[3]	validation-rmse:7.97567                                                     
[4]	validation-rmse:7.70722                                                     
[5]	validation-rmse:7.46308                                                     
[6]	validation-rmse:7.23928                                                     
[7]	validation-rmse:7.03662                                                     
[8]	validation-rmse:6.85151                                                     
[9]	validation-rmse:6.68571                                                     
[10]	validation-rmse:6.53430                                                    
[11]	validation-rmse:6.39659                                                    
[12]	validation-rmse:6.27173




[1]	validation-rmse:8.03055                                                     
[2]	validation-rmse:7.53232                                                     
[3]	validation-rmse:7.11643                                                     
[4]	validation-rmse:6.76941                                                     
[5]	validation-rmse:6.48416                                                     
[6]	validation-rmse:6.24883                                                     
[7]	validation-rmse:6.05554                                                     
[8]	validation-rmse:5.89646                                                     
[9]	validation-rmse:5.76851                                                     
[10]	validation-rmse:5.66179                                                    
[11]	validation-rmse:5.57584                                                    
[12]	validation-rmse:5.50703                                                    
[13]	validation-rmse:5.45182




[1]	validation-rmse:8.47144                                                     
[2]	validation-rmse:8.10882                                                     
[3]	validation-rmse:7.78448                                                     
[4]	validation-rmse:7.49390                                                     
[5]	validation-rmse:7.23495                                                     
[6]	validation-rmse:7.00437                                                     
[7]	validation-rmse:6.79876                                                     
[8]	validation-rmse:6.61724                                                     
[9]	validation-rmse:6.45567                                                     
[10]	validation-rmse:6.31443                                                    
[11]	validation-rmse:6.18961                                                    
[12]	validation-rmse:6.07828                                                    
[13]	validation-rmse:5.98091




[0]	validation-rmse:7.70376                                                     
[1]	validation-rmse:6.67921                                                     
[2]	validation-rmse:6.05020                                                     
[3]	validation-rmse:5.68614                                                     
[4]	validation-rmse:5.46754                                                     
[5]	validation-rmse:5.34973                                                     
[6]	validation-rmse:5.27899                                                     
[7]	validation-rmse:5.23693                                                     
[8]	validation-rmse:5.21024                                                     
[9]	validation-rmse:5.19535                                                     
[10]	validation-rmse:5.18301                                                    
[11]	validation-rmse:5.17465                                                    
[12]	validation-rmse:5.16861




[1]	validation-rmse:8.67701                                                     
[2]	validation-rmse:8.39035                                                     
[3]	validation-rmse:8.12583                                                     
[4]	validation-rmse:7.88213                                                     
[5]	validation-rmse:7.65804                                                     
[6]	validation-rmse:7.45218                                                     
[7]	validation-rmse:7.26332                                                     
[8]	validation-rmse:7.09046                                                     
[9]	validation-rmse:6.93214                                                     
[10]	validation-rmse:6.78740                                                    
[11]	validation-rmse:6.65549                                                    
[12]	validation-rmse:6.53487                                                    
[13]	validation-rmse:6.42511




[3]	validation-rmse:6.05393                                                     
[4]	validation-rmse:5.79478                                                     
[5]	validation-rmse:5.62758                                                     
[6]	validation-rmse:5.51869                                                     
[7]	validation-rmse:5.44839                                                     
[8]	validation-rmse:5.40001                                                     
[9]	validation-rmse:5.36832                                                     
[10]	validation-rmse:5.34267                                                    
[11]	validation-rmse:5.32428                                                    
[12]	validation-rmse:5.31136                                                    
[13]	validation-rmse:5.30147                                                    
[14]	validation-rmse:5.29393                                                    
[15]	validation-rmse:5.28946




[1]	validation-rmse:8.30645                                                     
[2]	validation-rmse:7.88738                                                     
[3]	validation-rmse:7.52232                                                     
[4]	validation-rmse:7.20336                                                     
[5]	validation-rmse:6.92568                                                     
[6]	validation-rmse:6.68542                                                     
[7]	validation-rmse:6.47770                                                     
[8]	validation-rmse:6.29906                                                     
[9]	validation-rmse:6.14404                                                     
[10]	validation-rmse:6.01313                                                    
[11]	validation-rmse:5.89997                                                    
[12]	validation-rmse:5.80338                                                    
[13]	validation-rmse:5.72097




[0]	validation-rmse:8.67956                                                     
[1]	validation-rmse:8.12891                                                     
[2]	validation-rmse:7.65972                                                     
[3]	validation-rmse:7.26074                                                     
[4]	validation-rmse:6.92170                                                     
[5]	validation-rmse:6.63793                                                     
[6]	validation-rmse:6.39691                                                     
[7]	validation-rmse:6.19777                                                     
[8]	validation-rmse:6.03140                                                     
[9]	validation-rmse:5.89492                                                     
[10]	validation-rmse:5.77707                                                    
[11]	validation-rmse:5.68443                                                    
[12]	validation-rmse:5.60400

#### Verifying that best parameters do the trick

In [21]:
best_params = {
'learning_rate': 0.11405256196340997,
'max_depth':36,
'min_child_weight':3.711408284036696,
'objective':'reg:linear',
'reg_alpha':0.2047112668129924,
'reg_lambda':0.01260039738825428,
'seed':42
}

### NO Autologging
Can do what was done previously, with mlflow.start_run etc etc...

In [25]:
mlflow.xgboost.autolog(disable=True)

In [27]:
with mlflow.start_run():
    # log tag and parameters
    mlflow.set_tag("model","xgboost")
    mlflow.log_params(best_params)
    # train model
    booster = xgb.train(
        params=best_params, # modeling parameters alpha
        dtrain=train, # training data
        num_boost_round=1000,
        evals=[(valid,"validation")], # will use validation data to check for improvements
        early_stopping_rounds=50 # if there are 50 iters and no improvement happens optimization will stop
    )
    #calculate metric and log
    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val,y_pred)
    mlflow.log_metric("rmse",rmse)

    # save preprocessor as pickle file
    with open("../models/preprocessor.b","wb") as f_out:
        pickle.dump(dv, f_out)

    # log the preprocessors as artifacts
    mlflow.log_artifact("../models/preprocessor.b",artifact_path="preprocessor")

    # log model as artifact
    # NOTE: this first ran with autologging on, because we had run it before
    # we would need to disable autologging to prevent duplicate artifacts
    mlflow.xgboost.log_model(booster,artifact_path="models_mlflow")

[0]	validation-rmse:8.62314




[1]	validation-rmse:8.03055
[2]	validation-rmse:7.53232
[3]	validation-rmse:7.11643
[4]	validation-rmse:6.76941
[5]	validation-rmse:6.48416
[6]	validation-rmse:6.24883
[7]	validation-rmse:6.05554
[8]	validation-rmse:5.89646
[9]	validation-rmse:5.76851
[10]	validation-rmse:5.66179
[11]	validation-rmse:5.57584
[12]	validation-rmse:5.50703
[13]	validation-rmse:5.45182
[14]	validation-rmse:5.40683
[15]	validation-rmse:5.36931
[16]	validation-rmse:5.33743
[17]	validation-rmse:5.31110
[18]	validation-rmse:5.29017
[19]	validation-rmse:5.27248
[20]	validation-rmse:5.25816
[21]	validation-rmse:5.24655
[22]	validation-rmse:5.23587
[23]	validation-rmse:5.22666
[24]	validation-rmse:5.22105
[25]	validation-rmse:5.21497
[26]	validation-rmse:5.20958
[27]	validation-rmse:5.20458
[28]	validation-rmse:5.19995
[29]	validation-rmse:5.19746
[30]	validation-rmse:5.19466
[31]	validation-rmse:5.19181
[32]	validation-rmse:5.18891
[33]	validation-rmse:5.18613
[34]	validation-rmse:5.18475
[35]	validation-rmse:5.



### Loging models in MLflow
Two options:
- log model as an artifact
  `mlflow.log_artifact("mymodel",artifact_path="models"/)`
- log model using log_model
  `mlflow.<framework>.log_model(model,artifact_path="models")`

### With Autologging
Automatic logging allows you to log metrics, parameters, and models without explicit statements. 

all you need is autolog()

Only works is you use the following libraries:
- Scikit-learn
- Tensorflow and keras
- XGBoost
- Spark
- Pytorch
- Gluon
- LightGBM
- Statsmodels
- Fastai

You can find more information on the mlflow documentation [here](https://mlflow.org/docs/latest/tracking/autolog.html#automatic-logging).

In [22]:
mlflow.xgboost.autolog()

# train model only
booster = xgb.train(
    params=best_params, # modeling parameters alpha
    dtrain=train, # training data
    num_boost_round=1000,
    evals=[(valid,"validation")], # will use validation data to check for improvements
    early_stopping_rounds=50 # if there are 50 iters and no improvement happens optimization will stop
)


2024/05/22 00:58:04 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '55e6319f043c4d438ab65fff41edf3fe', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


[0]	validation-rmse:8.62314




[1]	validation-rmse:8.03055
[2]	validation-rmse:7.53232
[3]	validation-rmse:7.11643
[4]	validation-rmse:6.76941
[5]	validation-rmse:6.48416
[6]	validation-rmse:6.24883
[7]	validation-rmse:6.05554
[8]	validation-rmse:5.89646
[9]	validation-rmse:5.76851
[10]	validation-rmse:5.66179
[11]	validation-rmse:5.57584
[12]	validation-rmse:5.50703
[13]	validation-rmse:5.45182
[14]	validation-rmse:5.40683
[15]	validation-rmse:5.36931
[16]	validation-rmse:5.33743
[17]	validation-rmse:5.31110
[18]	validation-rmse:5.29017
[19]	validation-rmse:5.27248
[20]	validation-rmse:5.25816
[21]	validation-rmse:5.24655
[22]	validation-rmse:5.23587
[23]	validation-rmse:5.22666
[24]	validation-rmse:5.22105
[25]	validation-rmse:5.21497
[26]	validation-rmse:5.20958
[27]	validation-rmse:5.20458
[28]	validation-rmse:5.19995
[29]	validation-rmse:5.19746
[30]	validation-rmse:5.19466
[31]	validation-rmse:5.19181
[32]	validation-rmse:5.18891
[33]	validation-rmse:5.18613
[34]	validation-rmse:5.18475
[35]	validation-rmse:5.



### Reading model from MLflow

In [28]:
#import mlflow
logged_model = 'runs:/0b983bbfdc7148a3951d7bce9c997594/models_mlflow'

# Load model as a PyFuncModel.
py_loaded_model = mlflow.pyfunc.load_model(logged_model)

# load model as XGBoost model
xgb_loaded_model = mlflow.xgboost.load_model(logged_model)



In [30]:
y_pred_xgb = xgb_loaded_model.predict(valid)
y_pred_xgb[:10]

array([22.17077  , 21.226362 , 24.710638 , 18.949577 , 13.896604 ,
       13.295256 , 12.657442 ,  7.4791665, 13.631114 , 11.685048 ],
      dtype=float32)