In [1]:
!python -V

Python 3.9.16


In [2]:
import pandas as pd

In [3]:
import pickle

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error

In [6]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/Users/alekseikatridi/my-mlops/2week/mlruns/1', creation_time=1685300379159, experiment_id='1', last_update_time=1685300379159, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [7]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [8]:
df_train = read_dataframe('./data/green_tripdata_2021-01.parquet')
df_val = read_dataframe('./data/green_tripdata_2021-02.parquet')

In [9]:
len(df_train), len(df_val)

(73908, 61921)

In [10]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [11]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [12]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [13]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)

7.758715206931833

In [18]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [19]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [20]:
import xgboost as xgb

In [21]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [22]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [23]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [None]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:20.00166                                                                                                 
[1]	validation-rmse:18.89677                                                                                                 
[2]	validation-rmse:17.87316                                                                                                 
[3]	validation-rmse:16.92649                                                                                                 
[4]	validation-rmse:16.05115                                                                                                 
[5]	validation-rmse:15.24190                                                                                                 
[6]	validation-rmse:14.49534                                                                                                 
[7]	validation-rmse:13.80623                                                                                          

[63]	validation-rmse:6.59910                                                                                                 
[64]	validation-rmse:6.59491                                                                                                 
[65]	validation-rmse:6.59071                                                                                                 
[66]	validation-rmse:6.58690                                                                                                 
[67]	validation-rmse:6.58347                                                                                                 
[68]	validation-rmse:6.58044                                                                                                 
[69]	validation-rmse:6.57697                                                                                                 
[70]	validation-rmse:6.57394                                                                                          

[128]	validation-rmse:6.51112                                                                                                
[129]	validation-rmse:6.51062                                                                                                
[130]	validation-rmse:6.51029                                                                                                
[131]	validation-rmse:6.50997                                                                                                
[132]	validation-rmse:6.50974                                                                                                
[133]	validation-rmse:6.50940                                                                                                
[134]	validation-rmse:6.50916                                                                                                
[135]	validation-rmse:6.50884                                                                                         

[193]	validation-rmse:6.49787                                                                                                
[194]	validation-rmse:6.49786                                                                                                
[195]	validation-rmse:6.49777                                                                                                
[196]	validation-rmse:6.49764                                                                                                
[197]	validation-rmse:6.49748                                                                                                
[198]	validation-rmse:6.49739                                                                                                
[199]	validation-rmse:6.49716                                                                                                
[200]	validation-rmse:6.49712                                                                                         

[258]	validation-rmse:6.49117                                                                                                
[259]	validation-rmse:6.49086                                                                                                
[260]	validation-rmse:6.49081                                                                                                
[261]	validation-rmse:6.49053                                                                                                
[262]	validation-rmse:6.49047                                                                                                
[263]	validation-rmse:6.49040                                                                                                
[264]	validation-rmse:6.49024                                                                                                
[265]	validation-rmse:6.49022                                                                                         

[323]	validation-rmse:6.48594                                                                                                
[324]	validation-rmse:6.48587                                                                                                
[325]	validation-rmse:6.48583                                                                                                
[326]	validation-rmse:6.48590                                                                                                
[327]	validation-rmse:6.48587                                                                                                
[328]	validation-rmse:6.48582                                                                                                
[329]	validation-rmse:6.48566                                                                                                
[330]	validation-rmse:6.48563                                                                                         

[388]	validation-rmse:6.48365                                                                                                
[389]	validation-rmse:6.48364                                                                                                
[390]	validation-rmse:6.48362                                                                                                
[391]	validation-rmse:6.48355                                                                                                
[392]	validation-rmse:6.48351                                                                                                
[393]	validation-rmse:6.48348                                                                                                
[394]	validation-rmse:6.48343                                                                                                
[395]	validation-rmse:6.48340                                                                                         

[453]	validation-rmse:6.48339                                                                                                
[454]	validation-rmse:6.48336                                                                                                
[455]	validation-rmse:6.48335                                                                                                
[0]	validation-rmse:18.07816                                                                                                 
[1]	validation-rmse:15.57656                                                                                                 
[2]	validation-rmse:13.56921                                                                                                 
[3]	validation-rmse:11.97555                                                                                                 
[4]	validation-rmse:10.73576                                                                                          

[60]	validation-rmse:6.49701                                                                                                 
[61]	validation-rmse:6.49672                                                                                                 
[62]	validation-rmse:6.49586                                                                                                 
[63]	validation-rmse:6.49485                                                                                                 
[64]	validation-rmse:6.49381                                                                                                 
[65]	validation-rmse:6.49320                                                                                                 
[66]	validation-rmse:6.49241                                                                                                 
[67]	validation-rmse:6.49202                                                                                          

[125]	validation-rmse:6.46618                                                                                                
[126]	validation-rmse:6.46580                                                                                                
[127]	validation-rmse:6.46534                                                                                                
[128]	validation-rmse:6.46526                                                                                                
[129]	validation-rmse:6.46472                                                                                                
[130]	validation-rmse:6.46491                                                                                                
[131]	validation-rmse:6.46464                                                                                                
[132]	validation-rmse:6.46451                                                                                         

[190]	validation-rmse:6.45234                                                                                                
[191]	validation-rmse:6.45204                                                                                                
[192]	validation-rmse:6.45221                                                                                                
[193]	validation-rmse:6.45215                                                                                                
[194]	validation-rmse:6.45178                                                                                                
[195]	validation-rmse:6.45177                                                                                                
[196]	validation-rmse:6.45172                                                                                                
[197]	validation-rmse:6.45161                                                                                         

[255]	validation-rmse:6.44814                                                                                                
[256]	validation-rmse:6.44815                                                                                                
[257]	validation-rmse:6.44812                                                                                                
[258]	validation-rmse:6.44800                                                                                                
[259]	validation-rmse:6.44778                                                                                                
[260]	validation-rmse:6.44774                                                                                                
[261]	validation-rmse:6.44756                                                                                                
[262]	validation-rmse:6.44727                                                                                         

[320]	validation-rmse:6.44858                                                                                                
[321]	validation-rmse:6.44865                                                                                                
[322]	validation-rmse:6.44851                                                                                                
[323]	validation-rmse:6.44822                                                                                                
[324]	validation-rmse:6.44827                                                                                                
[325]	validation-rmse:6.44813                                                                                                
[326]	validation-rmse:6.44827                                                                                                
[327]	validation-rmse:6.44845                                                                                         

[42]	validation-rmse:6.41945                                                                                                 
[43]	validation-rmse:6.41895                                                                                                 
[44]	validation-rmse:6.41701                                                                                                 
[45]	validation-rmse:6.41581                                                                                                 
[46]	validation-rmse:6.41477                                                                                                 
[47]	validation-rmse:6.41315                                                                                                 
[48]	validation-rmse:6.41196                                                                                                 
[49]	validation-rmse:6.41087                                                                                          

[107]	validation-rmse:6.37523                                                                                                
[108]	validation-rmse:6.37421                                                                                                
[109]	validation-rmse:6.37367                                                                                                
[110]	validation-rmse:6.37245                                                                                                
[111]	validation-rmse:6.37290                                                                                                
[112]	validation-rmse:6.37222                                                                                                
[113]	validation-rmse:6.37105                                                                                                
[114]	validation-rmse:6.37059                                                                                         

[172]	validation-rmse:6.37938                                                                                                
[173]	validation-rmse:6.38248                                                                                                
[174]	validation-rmse:6.38308                                                                                                
[175]	validation-rmse:6.38672                                                                                                
[176]	validation-rmse:6.38766                                                                                                
[177]	validation-rmse:6.38787                                                                                                
[178]	validation-rmse:6.38768                                                                                                
[179]	validation-rmse:6.38796                                                                                         

[53]	validation-rmse:6.54752                                                                                                 
[54]	validation-rmse:6.54562                                                                                                 
[55]	validation-rmse:6.54173                                                                                                 
[56]	validation-rmse:6.54161                                                                                                 
[57]	validation-rmse:6.54036                                                                                                 
[58]	validation-rmse:6.54095                                                                                                 
[59]	validation-rmse:6.53973                                                                                                 
[60]	validation-rmse:6.53677                                                                                          

[118]	validation-rmse:6.50004                                                                                                
[119]	validation-rmse:6.50026                                                                                                
[120]	validation-rmse:6.50170                                                                                                
[121]	validation-rmse:6.50217                                                                                                
[122]	validation-rmse:6.50150                                                                                                
[123]	validation-rmse:6.50105                                                                                                
[124]	validation-rmse:6.50063                                                                                                
[125]	validation-rmse:6.50073                                                                                         

[183]	validation-rmse:6.49601                                                                                                
[184]	validation-rmse:6.49583                                                                                                
[185]	validation-rmse:6.49645                                                                                                
[186]	validation-rmse:6.49686                                                                                                
[187]	validation-rmse:6.49666                                                                                                
[188]	validation-rmse:6.49608                                                                                                
[189]	validation-rmse:6.49594                                                                                                
[190]	validation-rmse:6.49494                                                                                         

[1]	validation-rmse:16.04596                                                                                                 
[2]	validation-rmse:14.13797                                                                                                 
[3]	validation-rmse:12.58302                                                                                                 
[4]	validation-rmse:11.32324                                                                                                 
[5]	validation-rmse:10.31162                                                                                                 
[6]	validation-rmse:9.50855                                                                                                  
[7]	validation-rmse:8.87137                                                                                                  
[8]	validation-rmse:8.37019                                                                                           

[66]	validation-rmse:6.45980                                                                                                 
[67]	validation-rmse:6.45922                                                                                                 
[68]	validation-rmse:6.45818                                                                                                 
[69]	validation-rmse:6.45753                                                                                                 
[70]	validation-rmse:6.45668                                                                                                 
[71]	validation-rmse:6.45620                                                                                                 
[72]	validation-rmse:6.45550                                                                                                 
[73]	validation-rmse:6.45452                                                                                          

[131]	validation-rmse:6.43103                                                                                                
[132]	validation-rmse:6.43112                                                                                                
[133]	validation-rmse:6.43126                                                                                                
[134]	validation-rmse:6.43103                                                                                                
[135]	validation-rmse:6.43059                                                                                                
[136]	validation-rmse:6.43030                                                                                                
[137]	validation-rmse:6.43001                                                                                                
[138]	validation-rmse:6.42984                                                                                         

[196]	validation-rmse:6.41939                                                                                                
[197]	validation-rmse:6.41911                                                                                                
[198]	validation-rmse:6.41916                                                                                                
[199]	validation-rmse:6.41907                                                                                                
[200]	validation-rmse:6.41886                                                                                                
[201]	validation-rmse:6.41874                                                                                                
[202]	validation-rmse:6.41841                                                                                                
[203]	validation-rmse:6.41852                                                                                         

[261]	validation-rmse:6.41858                                                                                                
[262]	validation-rmse:6.41837                                                                                                
[263]	validation-rmse:6.41867                                                                                                
[264]	validation-rmse:6.41865                                                                                                
[265]	validation-rmse:6.41858                                                                                                
[266]	validation-rmse:6.41866                                                                                                
[267]	validation-rmse:6.41950                                                                                                
[268]	validation-rmse:6.41957                                                                                         

[43]	validation-rmse:6.53085                                                                                                 
[44]	validation-rmse:6.52935                                                                                                 
[45]	validation-rmse:6.52715                                                                                                 
[46]	validation-rmse:6.52630                                                                                                 
[47]	validation-rmse:6.52458                                                                                                 
[48]	validation-rmse:6.52350                                                                                                 
[49]	validation-rmse:6.52218                                                                                                 
[50]	validation-rmse:6.52065                                                                                          

[108]	validation-rmse:6.46417                                                                                                
[109]	validation-rmse:6.46361                                                                                                
[110]	validation-rmse:6.46271                                                                                                
[111]	validation-rmse:6.46216                                                                                                
[112]	validation-rmse:6.46137                                                                                                
[113]	validation-rmse:6.46001                                                                                                
[114]	validation-rmse:6.45935                                                                                                
[115]	validation-rmse:6.45862                                                                                         

[173]	validation-rmse:6.42399                                                                                                
[174]	validation-rmse:6.42336                                                                                                
[175]	validation-rmse:6.42291                                                                                                
[176]	validation-rmse:6.42166                                                                                                
[177]	validation-rmse:6.42128                                                                                                
[178]	validation-rmse:6.42088                                                                                                
[179]	validation-rmse:6.42072                                                                                                
[180]	validation-rmse:6.42005                                                                                         

[238]	validation-rmse:6.39135                                                                                                
[239]	validation-rmse:6.39107                                                                                                
[240]	validation-rmse:6.39081                                                                                                
[241]	validation-rmse:6.39029                                                                                                
[242]	validation-rmse:6.38994                                                                                                
[243]	validation-rmse:6.38982                                                                                                
[244]	validation-rmse:6.38938                                                                                                
[245]	validation-rmse:6.38930                                                                                         

[303]	validation-rmse:6.37192                                                                                                
[304]	validation-rmse:6.37224                                                                                                
[305]	validation-rmse:6.37187                                                                                                
[306]	validation-rmse:6.37149                                                                                                
[307]	validation-rmse:6.37082                                                                                                
[308]	validation-rmse:6.37061                                                                                                
[309]	validation-rmse:6.37011                                                                                                
[310]	validation-rmse:6.37002                                                                                         

[368]	validation-rmse:6.35425                                                                                                
[369]	validation-rmse:6.35416                                                                                                
[370]	validation-rmse:6.35406                                                                                                
[371]	validation-rmse:6.35389                                                                                                
[372]	validation-rmse:6.35392                                                                                                
[373]	validation-rmse:6.35376                                                                                                
[374]	validation-rmse:6.35350                                                                                                
[375]	validation-rmse:6.35358                                                                                         

[433]	validation-rmse:6.33955                                                                                                
[434]	validation-rmse:6.33938                                                                                                
[435]	validation-rmse:6.33937                                                                                                
[436]	validation-rmse:6.33905                                                                                                
[437]	validation-rmse:6.33895                                                                                                
[438]	validation-rmse:6.33882                                                                                                
[439]	validation-rmse:6.33872                                                                                                
[440]	validation-rmse:6.33859                                                                                         

[498]	validation-rmse:6.33077                                                                                                
[499]	validation-rmse:6.33069                                                                                                
[500]	validation-rmse:6.33034                                                                                                
[501]	validation-rmse:6.33006                                                                                                
[502]	validation-rmse:6.32977                                                                                                
[503]	validation-rmse:6.32954                                                                                                
[504]	validation-rmse:6.32936                                                                                                
[505]	validation-rmse:6.32941                                                                                         

[563]	validation-rmse:6.31972                                                                                                
[564]	validation-rmse:6.31945                                                                                                
[565]	validation-rmse:6.31942                                                                                                
[566]	validation-rmse:6.31937                                                                                                
[567]	validation-rmse:6.31954                                                                                                
[568]	validation-rmse:6.31930                                                                                                
[569]	validation-rmse:6.31904                                                                                                
[570]	validation-rmse:6.31865                                                                                         

[628]	validation-rmse:6.31285                                                                                                
[629]	validation-rmse:6.31274                                                                                                
[630]	validation-rmse:6.31264                                                                                                
[631]	validation-rmse:6.31274                                                                                                
[632]	validation-rmse:6.31256                                                                                                
[633]	validation-rmse:6.31263                                                                                                
[634]	validation-rmse:6.31247                                                                                                
[635]	validation-rmse:6.31225                                                                                         

[693]	validation-rmse:6.30736                                                                                                
[694]	validation-rmse:6.30735                                                                                                
[695]	validation-rmse:6.30699                                                                                                
[696]	validation-rmse:6.30694                                                                                                
[697]	validation-rmse:6.30678                                                                                                
[698]	validation-rmse:6.30664                                                                                                
[699]	validation-rmse:6.30665                                                                                                
[700]	validation-rmse:6.30659                                                                                         

[758]	validation-rmse:6.30259                                                                                                
[759]	validation-rmse:6.30260                                                                                                
[760]	validation-rmse:6.30280                                                                                                
[761]	validation-rmse:6.30260                                                                                                
[762]	validation-rmse:6.30265                                                                                                
[763]	validation-rmse:6.30254                                                                                                
[764]	validation-rmse:6.30253                                                                                                
[765]	validation-rmse:6.30252                                                                                         

In [None]:
mlflow.xgboost.autolog(disable=True)

In [None]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.parquet")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.parquet")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        