In [1]:
import pandas as pd

In [2]:
import pickle

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.linear_model import Lasso

from sklearn.metrics import mean_squared_error

In [5]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxis-experiment")

<Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-model-experiement/mlruns/1', creation_time=1716118864844, experiment_id='1', last_update_time=1716118864844, lifecycle_stage='active', name='nyc-taxis-experiment', tags={}>

In [6]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)
    
    df["duration"] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)
    
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    
    categorical = ["PULocationID", "DOLocationID"]
    df[categorical] = df[categorical].astype(str)
    
    return df

In [7]:
df_train = read_dataframe("./data/green_tripdata_2021-01.parquet")
df_val = read_dataframe("./data/green_tripdata_2021-02.parquet")

In [8]:
df_train

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2021-01-01 00:15:56,2021-01-01 00:19:52,N,1.0,43,151,1.0,1.01,5.50,...,0.5,0.00,0.00,,0.3,6.80,2.0,1.0,0.00,3.933333
1,2,2021-01-01 00:25:59,2021-01-01 00:34:44,N,1.0,166,239,1.0,2.53,10.00,...,0.5,2.81,0.00,,0.3,16.86,1.0,1.0,2.75,8.750000
2,2,2021-01-01 00:45:57,2021-01-01 00:51:55,N,1.0,41,42,1.0,1.12,6.00,...,0.5,1.00,0.00,,0.3,8.30,1.0,1.0,0.00,5.966667
3,2,2020-12-31 23:57:51,2021-01-01 00:04:56,N,1.0,168,75,1.0,1.99,8.00,...,0.5,0.00,0.00,,0.3,9.30,2.0,1.0,0.00,7.083333
7,2,2021-01-01 00:26:31,2021-01-01 00:28:50,N,1.0,75,75,6.0,0.45,3.50,...,0.5,0.96,0.00,,0.3,5.76,1.0,1.0,0.00,2.316667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76513,2,2021-01-31 21:38:00,2021-01-31 22:16:00,,,81,90,,17.63,56.23,...,0.0,0.00,6.12,,0.3,65.40,,,,38.000000
76514,2,2021-01-31 22:43:00,2021-01-31 23:21:00,,,35,213,,18.36,46.66,...,0.0,12.20,6.12,,0.3,65.28,,,,38.000000
76515,2,2021-01-31 22:16:00,2021-01-31 22:27:00,,,74,69,,2.50,18.95,...,0.0,0.00,0.00,,0.3,22.00,,,,11.000000
76516,2,2021-01-31 23:10:00,2021-01-31 23:37:00,,,168,215,,14.48,48.87,...,0.0,0.00,6.12,,0.3,58.04,,,,27.000000


In [9]:
df_val

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2021-02-01 00:34:03,2021-02-01 00:51:58,N,1.0,130,205,5.0,3.66,14.00,...,0.5,10.00,0.0,,0.3,25.30,1.0,1.0,0.00,17.916667
1,2,2021-02-01 00:04:00,2021-02-01 00:10:30,N,1.0,152,244,1.0,1.10,6.50,...,0.5,0.00,0.0,,0.3,7.80,2.0,1.0,0.00,6.500000
2,2,2021-02-01 00:18:51,2021-02-01 00:34:06,N,1.0,152,48,1.0,4.93,16.50,...,0.5,0.00,0.0,,0.3,20.55,2.0,1.0,2.75,15.250000
3,2,2021-02-01 00:53:27,2021-02-01 01:11:41,N,1.0,152,241,1.0,6.70,21.00,...,0.5,0.00,0.0,,0.3,22.30,2.0,1.0,0.00,18.233333
4,2,2021-02-01 00:57:46,2021-02-01 01:06:44,N,1.0,75,42,1.0,1.89,8.50,...,0.5,2.45,0.0,,0.3,12.25,1.0,1.0,0.00,8.966667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64567,2,2021-02-28 22:19:00,2021-02-28 22:29:00,,,129,7,,2.63,10.04,...,0.0,0.00,0.0,,0.3,10.34,,,,10.000000
64568,2,2021-02-28 23:18:00,2021-02-28 23:27:00,,,116,166,,1.87,8.33,...,0.0,1.89,0.0,,0.3,10.52,,,,9.000000
64569,2,2021-02-28 23:44:00,2021-02-28 23:58:00,,,74,151,,2.40,12.61,...,0.0,0.00,0.0,,0.3,12.91,,,,14.000000
64570,2,2021-02-28 23:07:00,2021-02-28 23:14:00,,,42,42,,1.11,11.95,...,0.0,0.00,0.0,,0.3,15.00,,,,7.000000


In [10]:
#Feature Engineering
df_train["PU_DO"] = df_train["PULocationID"] + "_" + df_train["DOLocationID"]
df_val["PU_DO"] = df_val["PULocationID"] + "_" + df_val["DOLocationID"]

In [11]:
dv = DictVectorizer()

categorical = ["PU_DO"] #["PULocationID", "DOLocationID"]
numerical = ["trip_distance"]

train_dicts = df_train[categorical + numerical].to_dict(orient="records")
X_train = dv.fit_transform(train_dicts)


val_dicts = df_val[categorical + numerical].to_dict(orient="records")
X_val = dv.transform(val_dicts)

In [12]:
target = "duration"
y_train = df_train[target].values
y_val = df_val[target].values

In [13]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)
mean_squared_error(y_val, y_pred, squared=False)

7.758715210452687

- Lasso(alpha=0.001): 6.583
- Ridge(alpha=0.001): 5.996

In [14]:
# with open("./models/lin_reg.bin", "wb") as f_out:
#     pickle.dump((dv, lr), f_out)

In [14]:
# mlflow_experiemnt example
with mlflow.start_run():
    mlflow.set_tag("developer", "aiden")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.parquet")
    mlflow.log_param("val-data-path", "./data/green_tripdata_2021-02.parquet")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    ls = Lasso(alpha)
    ls.fit(X_train, y_train)

    y_pred = ls.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("./models/las_reg.bin", "wb") as f_out:
        pickle.dump((dv, ls), f_out)
    # from video 2.4 - logging model method1
    mlflow.log_artifact(local_path="./models/las_reg.bin", artifact_path="models_pickle")

In [16]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [17]:
train = xgb.DMatrix(X_train, y_train)
valid = xgb.DMatrix(X_val, y_val)

In [20]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, "validation")],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse",rmse)

        return {"loss": rmse, "status":STATUS_OK}

In [21]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0), # exp(-3), exp(0) - [0.o5, 1]
    'reg_aplha': hp.loguniform('reg_aplha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_wight': hp.loguniform('min_child_wight', -1, 3),
    'objective': 'reg:linear',
    'seed':42,
    }

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:7.81011                           
[1]	validation-rmse:6.89649                           
[2]	validation-rmse:6.68186                           
[3]	validation-rmse:6.61918                           
[4]	validation-rmse:6.59348                           
[5]	validation-rmse:6.58129                           
[6]	validation-rmse:6.57603                           
[7]	validation-rmse:6.57002                           
[8]	validation-rmse:6.55769                           
[9]	validation-rmse:6.55226                           
[10]	validation-rmse:6.54684                          
[11]	validation-rmse:6.54329                          
[12]	validation-rmse:6.53961                          
[13]	validation-rmse:6.53427                          
[14]	validation-rmse:6.52967                          
[15]	validation-rmse:6.52466                          
[16]	validation-rmse:6.52447                          
[17]	validation-rmse:6.51990                          
[18]	valid


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[2]	validation-rmse:7.06583                                                    
[3]	validation-rmse:6.87335                                                    
[4]	validation-rmse:6.79884                                                    
[5]	validation-rmse:6.76206                                                    
[6]	validation-rmse:6.74638                                                    
[7]	validation-rmse:6.73491                                                    
[8]	validation-rmse:6.72740                                                    
[9]	validation-rmse:6.72235                                                    
[10]	validation-rmse:6.71973                                                   
[11]	validation-rmse:6.71576                                                   
[12]	validation-rmse:6.71148                                                   
[13]	validation-rmse:6.70886                                                   
[14]	validation-rmse:6.70538            


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.07602                                                   
[1]	validation-rmse:8.71392                                                    
[2]	validation-rmse:7.87144                                                    
[3]	validation-rmse:7.36066                                                    
[4]	validation-rmse:7.06302                                                    
[5]	validation-rmse:6.88640                                                    
[6]	validation-rmse:6.77570                                                    
[7]	validation-rmse:6.69815                                                    
[8]	validation-rmse:6.64788                                                    
[9]	validation-rmse:6.61231                                                    
[10]	validation-rmse:6.59070                                                   
[11]	validation-rmse:6.57496                                                   
[12]	validation-rmse:6.56309            


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.48141                                                   
[1]	validation-rmse:10.83814                                                   
[2]	validation-rmse:10.27359                                                   
[3]	validation-rmse:9.77805                                                    
[4]	validation-rmse:9.34712                                                    
[5]	validation-rmse:8.96734                                                    
[6]	validation-rmse:8.64490                                                    
[7]	validation-rmse:8.35763                                                    
[8]	validation-rmse:8.11719                                                    
[9]	validation-rmse:7.90242                                                    
[10]	validation-rmse:7.71719                                                   
[11]	validation-rmse:7.55771                                                   
[12]	validation-rmse:7.42177            


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:8.77443                                                       
[1]	validation-rmse:7.41813                                                       
[2]	validation-rmse:6.92692                                                       
[3]	validation-rmse:6.74692                                                       
[4]	validation-rmse:6.65255                                                       
[5]	validation-rmse:6.61430                                                       
[6]	validation-rmse:6.58824                                                       
[7]	validation-rmse:6.57324                                                       
[8]	validation-rmse:6.56262                                                       
[9]	validation-rmse:6.55268                                                       
[10]	validation-rmse:6.55036                                                      
[11]	validation-rmse:6.54518                                                      
[12]


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.62606                                                     
[1]	validation-rmse:11.09451                                                     
[2]	validation-rmse:10.61596                                                     
[3]	validation-rmse:10.18275                                                     
[4]	validation-rmse:9.79384                                                      
[5]	validation-rmse:9.44378                                                      
[6]	validation-rmse:9.13189                                                      
[7]	validation-rmse:8.85301                                                      
[8]	validation-rmse:8.60621                                                      
[9]	validation-rmse:8.38138                                                      
[10]	validation-rmse:8.18023                                                     
[11]	validation-rmse:8.00058                                                     
[12]	validation-


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:6.82005                                                       
[1]	validation-rmse:6.75586                                                       
[2]	validation-rmse:6.73405                                                       
[3]	validation-rmse:6.72454                                                       
[4]	validation-rmse:6.72034                                                       
[5]	validation-rmse:6.71197                                                       
[6]	validation-rmse:6.69936                                                       
[7]	validation-rmse:6.69817                                                       
[8]	validation-rmse:6.70035                                                       
[9]	validation-rmse:6.69417                                                       
[10]	validation-rmse:6.69416                                                      
[11]	validation-rmse:6.68879                                                      
[12]


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:7.78840                                                       
[1]	validation-rmse:6.89184                                                       
[2]	validation-rmse:6.68942                                                       
[3]	validation-rmse:6.63411                                                       
[4]	validation-rmse:6.60975                                                       
[5]	validation-rmse:6.59603                                                       
[6]	validation-rmse:6.58852                                                       
[7]	validation-rmse:6.58112                                                       
[8]	validation-rmse:6.57792                                                       
[9]	validation-rmse:6.57298                                                       
[10]	validation-rmse:6.56910                                                      
[11]	validation-rmse:6.56539                                                      
[12]


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[5]	validation-rmse:7.38759                                                    
[6]	validation-rmse:7.20852                                                    
[7]	validation-rmse:7.08325                                                    
[8]	validation-rmse:6.99695                                                    
[9]	validation-rmse:6.93841                                                    
[10]	validation-rmse:6.89495                                                   
[11]	validation-rmse:6.86742                                                   
[12]	validation-rmse:6.84497                                                   
[13]	validation-rmse:6.82912                                                   
[14]	validation-rmse:6.81756                                                   
[15]	validation-rmse:6.80675                                                   
[16]	validation-rmse:6.80010                                                   
[17]	validation-rmse:6.79637            


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.98078                                                   
[1]	validation-rmse:10.00029                                                   
[2]	validation-rmse:9.22548                                                    
[3]	validation-rmse:8.62449                                                    
[4]	validation-rmse:8.15884                                                    
[5]	validation-rmse:7.79662                                                    
[6]	validation-rmse:7.52695                                                    
[7]	validation-rmse:7.31841                                                    
[8]	validation-rmse:7.15737                                                    
[9]	validation-rmse:7.03225                                                    
[10]	validation-rmse:6.93867                                                   
[11]	validation-rmse:6.86730                                                   
[12]	validation-rmse:6.80759            


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[1]	validation-rmse:10.28210                                                    
[2]	validation-rmse:9.57036                                                     
[3]	validation-rmse:8.99362                                                     
[4]	validation-rmse:8.53054                                                     
[5]	validation-rmse:8.15901                                                     
[6]	validation-rmse:7.86333                                                     
[7]	validation-rmse:7.63019                                                     
[8]	validation-rmse:7.44431                                                     
[9]	validation-rmse:7.29705                                                     
[10]	validation-rmse:7.18309                                                    
[11]	validation-rmse:7.08903                                                    
[12]	validation-rmse:7.01688                                                    
[13]	validation-rmse:6.95841


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:8.61822                                                     
[1]	validation-rmse:7.30774                                                     
[2]	validation-rmse:6.86062                                                     
[3]	validation-rmse:6.70104                                                     
[4]	validation-rmse:6.61938                                                     
[5]	validation-rmse:6.59115                                                     
[6]	validation-rmse:6.56660                                                     
[7]	validation-rmse:6.55744                                                     
[8]	validation-rmse:6.54874                                                     
[9]	validation-rmse:6.54493                                                     
[10]	validation-rmse:6.53848                                                    
[11]	validation-rmse:6.53350                                                    
[12]	validation-rmse:6.53112


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:7.95583                                                     
[1]	validation-rmse:6.95287                                                     
[2]	validation-rmse:6.71411                                                     
[3]	validation-rmse:6.63159                                                     
[4]	validation-rmse:6.59625                                                     
[5]	validation-rmse:6.58424                                                     
[6]	validation-rmse:6.57301                                                     
[7]	validation-rmse:6.56748                                                     
[8]	validation-rmse:6.56068                                                     
[9]	validation-rmse:6.55491                                                     
[10]	validation-rmse:6.55017                                                    
[11]	validation-rmse:6.54813                                                    
[12]	validation-rmse:6.54492


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.31883                                                    
[1]	validation-rmse:10.55763                                                    
[2]	validation-rmse:9.91005                                                     
[3]	validation-rmse:9.36349                                                     
[4]	validation-rmse:8.90584                                                     
[5]	validation-rmse:8.52443                                                     
[6]	validation-rmse:8.20747                                                     
[7]	validation-rmse:7.94165                                                     
[8]	validation-rmse:7.72414                                                     
[9]	validation-rmse:7.54303                                                     
[10]	validation-rmse:7.39198                                                    
[11]	validation-rmse:7.26975                                                    
[12]	validation-rmse:7.16629


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[3]	validation-rmse:8.97321                                                     
[4]	validation-rmse:8.51517                                                     
[5]	validation-rmse:8.15228                                                     
[6]	validation-rmse:7.86436                                                     
[7]	validation-rmse:7.63801                                                     
[8]	validation-rmse:7.46179                                                     
[9]	validation-rmse:7.32295                                                     
[10]	validation-rmse:7.21307                                                    
[11]	validation-rmse:7.12793                                                    
[12]	validation-rmse:7.05894                                                    
[13]	validation-rmse:7.00512                                                    
[14]	validation-rmse:6.96321                                                    
[15]	validation-rmse:6.93185


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.12307                                                    
[1]	validation-rmse:10.23348                                                    
[2]	validation-rmse:9.50629                                                     
[3]	validation-rmse:8.91778                                                     
[4]	validation-rmse:8.43598                                                     
[5]	validation-rmse:8.06405                                                     
[6]	validation-rmse:7.75759                                                     
[7]	validation-rmse:7.50780                                                     
[8]	validation-rmse:7.32944                                                     
[9]	validation-rmse:7.17103                                                     
[10]	validation-rmse:7.04419                                                    
[11]	validation-rmse:6.95175                                                    
[12]	validation-rmse:6.87275


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:8.09431                                                     
[1]	validation-rmse:7.01638                                                     
[2]	validation-rmse:6.74092                                                     
[3]	validation-rmse:6.65069                                                     
[4]	validation-rmse:6.62234                                                     
[5]	validation-rmse:6.60808                                                     
[6]	validation-rmse:6.60081                                                     
[7]	validation-rmse:6.59210                                                     
[8]	validation-rmse:6.57628                                                     
[9]	validation-rmse:6.57331                                                     
[10]	validation-rmse:6.56862                                                    
[11]	validation-rmse:6.56325                                                    
[12]	validation-rmse:6.55833


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:9.78424                                                     
[1]	validation-rmse:8.36402                                                     
[2]	validation-rmse:7.57865                                                     
[3]	validation-rmse:7.14149                                                     
[4]	validation-rmse:6.89386                                                     
[5]	validation-rmse:6.77112                                                     
[6]	validation-rmse:6.69296                                                     
[7]	validation-rmse:6.63954                                                     
[8]	validation-rmse:6.61019                                                     
[9]	validation-rmse:6.58984                                                     
[10]	validation-rmse:6.57397                                                    
[11]	validation-rmse:6.56449                                                    
[12]	validation-rmse:6.55677


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:6.79597                                                     
[1]	validation-rmse:6.76845                                                     
[2]	validation-rmse:6.75591                                                     
[3]	validation-rmse:6.74462                                                     
[4]	validation-rmse:6.73489                                                     
[5]	validation-rmse:6.73061                                                     
[6]	validation-rmse:6.72092                                                     
[7]	validation-rmse:6.72134                                                     
[8]	validation-rmse:6.71865                                                     
[9]	validation-rmse:6.72085                                                     
[10]	validation-rmse:6.71771                                                    
[11]	validation-rmse:6.71545                                                    
[12]	validation-rmse:6.71048


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.30913                                                    
[1]	validation-rmse:10.53969                                                    
[2]	validation-rmse:9.88283                                                     
[3]	validation-rmse:9.32647                                                     
[4]	validation-rmse:8.86270                                                     
[5]	validation-rmse:8.46462                                                     
[6]	validation-rmse:8.13868                                                     
[7]	validation-rmse:7.87353                                                     
[8]	validation-rmse:7.64534                                                     
[9]	validation-rmse:7.46525                                                     
[10]	validation-rmse:7.30371                                                    
[11]	validation-rmse:7.18601                                                    
[12]	validation-rmse:7.08102


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.80175                                                    
[1]	validation-rmse:11.41763                                                    
[2]	validation-rmse:11.06006                                                    
[3]	validation-rmse:10.72706                                                    
[4]	validation-rmse:10.41558                                                    
[5]	validation-rmse:10.12536                                                    
[6]	validation-rmse:9.85670                                                     
[7]	validation-rmse:9.60868                                                     
[8]	validation-rmse:9.37609                                                     
[9]	validation-rmse:9.16117                                                     
[10]	validation-rmse:8.96166                                                    
[11]	validation-rmse:8.77893                                                    
[12]	validation-rmse:8.60749


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.73002                                                     
[1]	validation-rmse:11.28585                                                     
[2]	validation-rmse:10.87504                                                     
[3]	validation-rmse:10.49774                                                     
[4]	validation-rmse:10.15130                                                     
[5]	validation-rmse:9.83486                                                      
[6]	validation-rmse:9.54325                                                      
[7]	validation-rmse:9.27845                                                      
[8]	validation-rmse:9.03457                                                      
[9]	validation-rmse:8.81411                                                      
[10]	validation-rmse:8.61118                                                     
[11]	validation-rmse:8.42729                                                     
[12]	validation-


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:11.67967                                                       
[1]	validation-rmse:11.19283                                                       
[2]	validation-rmse:10.74894                                                       
[3]	validation-rmse:10.34450                                                       
[4]	validation-rmse:9.97701                                                        
[5]	validation-rmse:9.64404                                                        
[6]	validation-rmse:9.34181                                                        
[7]	validation-rmse:9.06860                                                        
[8]	validation-rmse:8.82205                                                        
[9]	validation-rmse:8.59989                                                        
[10]	validation-rmse:8.39988                                                       
[11]	validation-rmse:8.21941                                                


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.62424                                                     
[1]	validation-rmse:9.45875                                                      
[2]	validation-rmse:8.62026                                                      
[3]	validation-rmse:8.02564                                                      
[4]	validation-rmse:7.59569                                                      
[5]	validation-rmse:7.30300                                                      
[6]	validation-rmse:7.09259                                                      
[7]	validation-rmse:6.94977                                                      
[8]	validation-rmse:6.84992                                                      
[9]	validation-rmse:6.76919                                                      
[10]	validation-rmse:6.71568                                                     
[11]	validation-rmse:6.67520                                                     
[12]	validation-


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.65684                                                     
[1]	validation-rmse:9.50718                                                      
[2]	validation-rmse:8.67261                                                      
[3]	validation-rmse:8.07706                                                      
[4]	validation-rmse:7.64870                                                      
[5]	validation-rmse:7.35218                                                      
[6]	validation-rmse:7.14533                                                      
[7]	validation-rmse:6.98965                                                      
[8]	validation-rmse:6.88667                                                      
[9]	validation-rmse:6.80662                                                      
[10]	validation-rmse:6.75089                                                     
[11]	validation-rmse:6.71116                                                     
[12]	validation-


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.10547                                                     
[1]	validation-rmse:8.75650                                                      
[2]	validation-rmse:7.92184                                                      
[3]	validation-rmse:7.40733                                                      
[4]	validation-rmse:7.09424                                                      
[5]	validation-rmse:6.90751                                                      
[6]	validation-rmse:6.78961                                                      
[7]	validation-rmse:6.71678                                                      
[8]	validation-rmse:6.66339                                                      
[9]	validation-rmse:6.63455                                                      
[10]	validation-rmse:6.61380                                                     
[11]	validation-rmse:6.59729                                                     
[12]	validation-


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:9.44229                                                     
[1]	validation-rmse:8.00724                                                     
[2]	validation-rmse:7.30645                                                     
[3]	validation-rmse:6.95323                                                     
[4]	validation-rmse:6.77845                                                     
[5]	validation-rmse:6.68275                                                     
[6]	validation-rmse:6.62939                                                     
[7]	validation-rmse:6.59285                                                     
[8]	validation-rmse:6.57226                                                     
[9]	validation-rmse:6.56091                                                     
[10]	validation-rmse:6.55140                                                    
[11]	validation-rmse:6.54317                                                    
[12]	validation-rmse:6.53901


Parameters: { "min_child_wight", "reg_aplha" } are not used.




[0]	validation-rmse:10.82266                                                    
[1]	validation-rmse:9.75689                                                     
[2]	validation-rmse:8.94870                                                     
[3]	validation-rmse:8.33973                                                     
[4]	validation-rmse:7.88896                                                     
[5]	validation-rmse:7.55861                                                     
[6]	validation-rmse:7.31384                                                     
[7]	validation-rmse:7.13348                                                     
[8]	validation-rmse:7.00007                                                     
[9]	validation-rmse:6.89873                                                     
[10]	validation-rmse:6.82575                                                    
[11]	validation-rmse:6.77010                                                    
[12]	validation-rmse:6.72907

KeyboardInterrupt: 

In [16]:
params = {
    'learning_rate': 0.20072438443168863,
    'max_depth': 53,
    'min_child_wight': 2.1112596906064938,
    'objective': 'reg:linear',
    'reg_aplha': 0.04888540530278026,
    'reg_lambda': 0.18312272392405518,
    'seed': 42,
}
mlflow.xgboost.autolog()

booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=1000,
    evals=[(valid, "validation")],
    early_stopping_rounds=50
)

2024/05/19 17:01:32 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '27230d38a93a4de9b84d19c32d3cb0f9', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow
Parameters: { "min_child_wight", "reg_aplha" } are not used.



[0]	validation-rmse:10.62424
[1]	validation-rmse:9.45875
[2]	validation-rmse:8.62026
[3]	validation-rmse:8.02564
[4]	validation-rmse:7.59569
[5]	validation-rmse:7.30300
[6]	validation-rmse:7.09259
[7]	validation-rmse:6.94977
[8]	validation-rmse:6.84992
[9]	validation-rmse:6.76919
[10]	validation-rmse:6.71568
[11]	validation-rmse:6.67520
[12]	validation-rmse:6.64495
[13]	validation-rmse:6.61953
[14]	validation-rmse:6.59835
[15]	validation-rmse:6.58643
[16]	validation-rmse:6.57501
[17]	validation-rmse:6.56648
[18]	validation-rmse:6.55778
[19]	validation-rmse:6.55230
[20]	validation-rmse:6.54715
[21]	validation-rmse:6.54280
[22]	validation-rmse:6.53910
[23]	validation-rmse:6.53376
[24]	validation-rmse:6.53054
[25]	validation-rmse:6.52862
[26]	validation-rmse:6.52646
[27]	validation-rmse:6.52379
[28]	validation-rmse:6.52212
[29]	validation-rmse:6.52020
[30]	validation-rmse:6.51742
[31]	validation-rmse:6.51619
[32]	validation-rmse:6.51561
[33]	validation-rmse:6.51392
[34]	validation-rmse:6.

2024/05/19 17:03:18 ERROR mlflow.xgboost: Failed to log feature importance plot. XGBoost autologging will ignore the failure and continue. Exception: 
Traceback (most recent call last):
  File "/home/codespace/anaconda3/lib/python3.11/site-packages/mlflow/xgboost/__init__.py", line 684, in train_impl
    log_feature_importance_plot(features, importance, imp_type)
  File "/home/codespace/anaconda3/lib/python3.11/site-packages/mlflow/xgboost/__init__.py", line 577, in log_feature_importance_plot
    fig.tight_layout()
  File "/home/codespace/anaconda3/lib/python3.11/site-packages/matplotlib/figure.py", line 3540, in tight_layout
    engine.execute(self)
  File "/home/codespace/anaconda3/lib/python3.11/site-packages/matplotlib/layout_engine.py", line 181, in execute
    renderer = fig._get_renderer()
               ^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/anaconda3/lib/python3.11/site-packages/matplotlib/figure.py", line 2754, in _get_renderer
    return self.canvas.get_renderer()
    

Error in callback <function _draw_all_if_interactive at 0x784c77363c40> (for post_execute), with arguments args (),kwargs {}:


ValueError: Image size of 640x84170 pixels is too large. It must be less than 2^16 in each direction.

ValueError: Image size of 640x84170 pixels is too large. It must be less than 2^16 in each direction.

<Figure size 640x84170 with 1 Axes>

In [19]:
mlflow.xgboost.autolog(disable=True)

In [20]:
#from video 2.4 - logiing models method2
with mlflow.start_run():
    params = {
        'learning_rate': 0.20072438443168863,
        'max_depth': 53,
        'min_child_wight': 2.1112596906064938,
        'objective': 'reg:linear',
        'reg_aplha': 0.04888540530278026,
        'reg_lambda': 0.18312272392405518,
        'seed': 42,
    }

    mlflow.log_params(params)

    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, "validation")],
        early_stopping_rounds=50
    )
    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse",rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

Parameters: { "min_child_wight", "reg_aplha" } are not used.



[0]	validation-rmse:10.62424
[1]	validation-rmse:9.45875
[2]	validation-rmse:8.62026
[3]	validation-rmse:8.02564
[4]	validation-rmse:7.59569
[5]	validation-rmse:7.30300
[6]	validation-rmse:7.09259
[7]	validation-rmse:6.94977
[8]	validation-rmse:6.84992
[9]	validation-rmse:6.76919
[10]	validation-rmse:6.71568
[11]	validation-rmse:6.67520
[12]	validation-rmse:6.64495
[13]	validation-rmse:6.61953
[14]	validation-rmse:6.59835
[15]	validation-rmse:6.58643
[16]	validation-rmse:6.57501
[17]	validation-rmse:6.56648
[18]	validation-rmse:6.55778
[19]	validation-rmse:6.55230
[20]	validation-rmse:6.54715
[21]	validation-rmse:6.54280
[22]	validation-rmse:6.53910
[23]	validation-rmse:6.53376
[24]	validation-rmse:6.53054
[25]	validation-rmse:6.52862
[26]	validation-rmse:6.52646
[27]	validation-rmse:6.52379
[28]	validation-rmse:6.52212
[29]	validation-rmse:6.52020
[30]	validation-rmse:6.51742
[31]	validation-rmse:6.51619
[32]	validation-rmse:6.51561
[33]	validation-rmse:6.51392
[34]	validation-rmse:6.



In [21]:
# use saved model path1 as python fun in pandas dfs
logged_model = 'runs:/ef251edfc00d490db12c90fe5a05ed31/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)



In [22]:
# path2 xgb model
xgb_model = mlflow.xgboost.load_model(logged_model)



In [23]:
y_pred = xgb_model.predict(valid)

In [24]:
y_pred[:10]

array([16.36843 ,  6.822243, 15.503294, 23.838814,  9.429673, 16.862064,
       11.109989,  8.710851,  8.762387, 13.363783], dtype=float32)