In [1]:
!python -V

Python 3.11.5


In [2]:
import pandas as pd
import numpy as np

In [3]:
import os
import pickle

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

In [40]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR
from sklearn.metrics import mean_squared_error, root_mean_squared_error
from sklearn.utils import resample
from scipy.sparse import csr_matrix

In [21]:
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [7]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location=('file:///C:/Users/Cokroaminoto/Documents/Analisis Data/Data '
 'Scientist/DataTalksClub/MLOps - zoomcamp/02-experiment-tracking/mlruns/1'), creation_time=1727228916584, experiment_id='1', last_update_time=1727228916584, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [8]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [9]:
#df_train = read_dataframe('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
#df_val = read_dataframe('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')

df_train = read_dataframe('/datasets/dtc/green_tripdata_2021-01.parquet')
df_val = read_dataframe('/datasets/dtc/green_tripdata_2021-02.parquet')

In [10]:
df_train.shape

(73908, 21)

In [11]:
df_val.shape

(61921, 21)

In [12]:
# Take 10% of the data randomly
df_train = df_train.sample(frac=0.1, random_state=42)
df_val = df_val.sample(frac=0.1, random_state=42)

In [13]:
df_train.shape

(7391, 21)

In [14]:
df_val.shape

(6192, 21)

In [15]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [16]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [17]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [18]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



9.486348156352333

In [19]:
# Create directories if they don't exist
try:
    os.makedirs('models/', exist_ok=True)
except Exception as e:
    print(f"Error creating models folder: {e}")

with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [20]:
with mlflow.start_run():

    mlflow.set_tag("developer", "adi")

    mlflow.log_param("train-data-path", "/datasets/dtc/green_tripdata_2021-01.parquet")
    #mlflow.log_param("train-data-path", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("sample-size-train-data", df_train.shape[0])  # Record the number of samples taken
    mlflow.log_param("valid-data-path", "/datasets/dtc/green_tripdata_2021-02.parquet")
    #mlflow.log_param("valid-data-path", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("sample-size-valid-data", df_valid.shape[0])  # Record the number of samples taken

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")



In [22]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [23]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            #num_boost_round=1000,  # for processor multi-core Intel i5 with 8GB RAM or higher
            num_boost_round=200,  # Reduced from 1000 for processor Intel Celeron N3350 with 4GB RAM 
            evals=[(valid, 'validation')],
            #early_stopping_rounds=50  # for processor multi-core Intel i5 with 8GB RAM or higher 
            early_stopping_rounds=20  # Reduced from 50 for processor Intel Celeron N3350 with 4GB RAM 
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
    return {'loss': rmse, 'status': STATUS_OK}


In [24]:
search_space = {
    #'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)), # for processor multi-core Intel i5 with 8GB RAM or higher
    'max_depth': scope.int(hp.quniform('max_depth', 4, 20, 1)),  # Reduced range for processor Intel Celeron N3350 with 4GB RAM 
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    #'objective': 'reg:linear', # older version
    'objective': 'reg:squarederror',  # newer version
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|                                                                           | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:7.04451                                                                                            
[1]	validation-rmse:6.91873                                                                                            
[2]	validation-rmse:6.91370                                                                                            
[3]	validation-rmse:6.91251                                                                                            
[4]	validation-rmse:6.90556                                                                                            
[5]	validation-rmse:6.89537                                                                                            
[6]	validation-rmse:6.89635                                                                                            
[7]	validation-rmse:6.89262                                                                                            
[8]	validation-rmse:6.89484             




  2%|█                                                 | 1/50 [00:12<10:03, 12.33s/trial, best loss: 6.876838310257262]




[0]	validation-rmse:10.31778                                                                                           
[1]	validation-rmse:8.97836                                                                                            
[2]	validation-rmse:8.15095                                                                                            
[3]	validation-rmse:7.65286                                                                                            
[4]	validation-rmse:7.35739                                                                                            
[5]	validation-rmse:7.18476                                                                                            
[6]	validation-rmse:7.07997                                                                                            
[7]	validation-rmse:7.01487                                                                                            
[8]	validation-rmse:6.97612             




  4%|██                                                | 2/50 [00:44<19:16, 24.10s/trial, best loss: 6.876838310257262]




[0]	validation-rmse:11.52874                                                                                           
[1]	validation-rmse:10.77844                                                                                           
[2]	validation-rmse:10.13954                                                                                           
[3]	validation-rmse:9.59692                                                                                            
[4]	validation-rmse:9.13984                                                                                            
[5]	validation-rmse:8.75250                                                                                            
[6]	validation-rmse:8.42580                                                                                            
[7]	validation-rmse:8.15505                                                                                            
[8]	validation-rmse:7.92995             




  6%|███                                               | 3/50 [00:59<15:44, 20.09s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.49378                                                                                           
[1]	validation-rmse:10.72227                                                                                           
[2]	validation-rmse:10.06966                                                                                           
[3]	validation-rmse:9.52100                                                                                            
[4]	validation-rmse:9.06241                                                                                            
[5]	validation-rmse:8.68080                                                                                            
[6]	validation-rmse:8.36539                                                                                            
[7]	validation-rmse:8.10546                                                                                            
[8]	validation-rmse:7.89243             




  8%|████                                              | 4/50 [01:31<18:54, 24.66s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.92353                                                                                           
[1]	validation-rmse:11.48218                                                                                           
[2]	validation-rmse:11.07468                                                                                           
[3]	validation-rmse:10.70009                                                                                           
[4]	validation-rmse:10.35725                                                                                           
[5]	validation-rmse:10.04092                                                                                           
[6]	validation-rmse:9.74880                                                                                            
[7]	validation-rmse:9.48993                                                                                            
[8]	validation-rmse:9.24838             




[0]	validation-rmse:11.47719                                                                                           
 10%|█████                                             | 5/50 [02:13<22:54, 30.54s/trial, best loss: 6.815460969919869]




[1]	validation-rmse:10.69271                                                                                           
[2]	validation-rmse:10.03127                                                                                           
[3]	validation-rmse:9.47571                                                                                            
[4]	validation-rmse:9.01265                                                                                            
[5]	validation-rmse:8.62913                                                                                            
[6]	validation-rmse:8.31268                                                                                            
[7]	validation-rmse:8.05176                                                                                            
[8]	validation-rmse:7.83766                                                                                            
[9]	validation-rmse:7.66320             




 12%|██████                                            | 6/50 [02:23<17:29, 23.86s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.72597                                                                                           
[1]	validation-rmse:11.12465                                                                                           
[2]	validation-rmse:10.58994                                                                                           
[3]	validation-rmse:10.11705                                                                                           
[4]	validation-rmse:9.69976                                                                                            
[5]	validation-rmse:9.33242                                                                                            
[6]	validation-rmse:9.00877                                                                                            
[7]	validation-rmse:8.72657                                                                                            
[8]	validation-rmse:8.47976             




 14%|███████                                           | 7/50 [02:39<15:17, 21.33s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.42697                                                                                           
[1]	validation-rmse:10.60520                                                                                           
[2]	validation-rmse:9.92428                                                                                            
[3]	validation-rmse:9.35489                                                                                            
[4]	validation-rmse:8.88772                                                                                            
[5]	validation-rmse:8.50075                                                                                            
[6]	validation-rmse:8.18372                                                                                            
[7]	validation-rmse:7.92664                                                                                            
[8]	validation-rmse:7.71781             




 16%|████████                                          | 8/50 [02:54<13:35, 19.42s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.98549                                                                                           
[1]	validation-rmse:11.59867                                                                                           
[2]	validation-rmse:11.23971                                                                                           
[3]	validation-rmse:10.90376                                                                                           
[4]	validation-rmse:10.59062                                                                                           
[5]	validation-rmse:10.30334                                                                                           
[6]	validation-rmse:10.03538                                                                                           
[7]	validation-rmse:9.78727                                                                                            
[8]	validation-rmse:9.55841             




 18%|█████████                                         | 9/50 [04:03<23:41, 34.66s/trial, best loss: 6.815460969919869]




[0]	validation-rmse:11.56933                                                                                           
[1]	validation-rmse:10.85048                                                                                           
[2]	validation-rmse:10.23072                                                                                           
[3]	validation-rmse:9.69978                                                                                            
[4]	validation-rmse:9.24623                                                                                            
[5]	validation-rmse:8.85918                                                                                            
[6]	validation-rmse:8.53110                                                                                            
[7]	validation-rmse:8.25572                                                                                            
[8]	validation-rmse:8.02279             




 20%|█████████▊                                       | 10/50 [04:25<20:29, 30.74s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:11.88415                                                                                           
[1]	validation-rmse:11.41289                                                                                           
[2]	validation-rmse:10.98243                                                                                           
[3]	validation-rmse:10.58866                                                                                           
[4]	validation-rmse:10.22986                                                                                           
[5]	validation-rmse:9.90048                                                                                            
[6]	validation-rmse:9.60377                                                                                            
[7]	validation-rmse:9.32940                                                                                            
[8]	validation-rmse:9.08686             




 22%|██████████▊                                      | 11/50 [04:43<17:26, 26.84s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:7.38540                                                                                            
[1]	validation-rmse:6.93795                                                                                            
[2]	validation-rmse:6.88022                                                                                            
[3]	validation-rmse:6.88406                                                                                            
[4]	validation-rmse:6.87960                                                                                            
[5]	validation-rmse:6.87384                                                                                            
[6]	validation-rmse:6.87132                                                                                            
[7]	validation-rmse:6.86712                                                                                            
[8]	validation-rmse:6.86863             




 24%|███████████▊                                     | 12/50 [04:47<12:41, 20.04s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:7.92298                                                                                            
[1]	validation-rmse:7.07144                                                                                            
[2]	validation-rmse:6.92755                                                                                            
[3]	validation-rmse:6.89066                                                                                            
[4]	validation-rmse:6.88354                                                                                            
[5]	validation-rmse:6.87434                                                                                            
[6]	validation-rmse:6.87512                                                                                            
[7]	validation-rmse:6.86634                                                                                            
[8]	validation-rmse:6.85737             




 26%|████████████▋                                    | 13/50 [04:56<10:14, 16.61s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:11.52264                                                                                           
[1]	validation-rmse:10.76981                                                                                           
[2]	validation-rmse:10.12902                                                                                           
[3]	validation-rmse:9.58625                                                                                            
[4]	validation-rmse:9.12875                                                                                            
[5]	validation-rmse:8.74571                                                                                            
[6]	validation-rmse:8.42525                                                                                            
[7]	validation-rmse:8.15890                                                                                            
[8]	validation-rmse:7.93844             




 28%|█████████████▋                                   | 14/50 [05:29<13:00, 21.67s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:9.18996                                                                                            
[1]	validation-rmse:7.78696                                                                                            
[2]	validation-rmse:7.20852                                                                                            
[3]	validation-rmse:6.98129                                                                                            
[4]	validation-rmse:6.89242                                                                                            
[5]	validation-rmse:6.85891                                                                                            
[6]	validation-rmse:6.85304                                                                                            
[7]	validation-rmse:6.84177                                                                                            
[8]	validation-rmse:6.83787             




 30%|██████████████▋                                  | 15/50 [05:44<11:24, 19.56s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:7.87202                                                                                            
[1]	validation-rmse:7.07778                                                                                            
[2]	validation-rmse:6.94566                                                                                            
[3]	validation-rmse:6.91523                                                                                            
[4]	validation-rmse:6.91386                                                                                            
[5]	validation-rmse:6.91178                                                                                            
[6]	validation-rmse:6.91264                                                                                            
[7]	validation-rmse:6.91513                                                                                            
[8]	validation-rmse:6.91549             




 32%|███████████████▋                                 | 16/50 [05:52<09:06, 16.08s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:11.72555                                                                                           
[1]	validation-rmse:11.12813                                                                                           
[2]	validation-rmse:10.59569                                                                                           
[3]	validation-rmse:10.12032                                                                                           
[4]	validation-rmse:9.70508                                                                                            
[5]	validation-rmse:9.33939                                                                                            
[6]	validation-rmse:9.01732                                                                                            
[7]	validation-rmse:8.73217                                                                                            
[8]	validation-rmse:8.48879             




 34%|████████████████▋                                | 17/50 [06:37<13:36, 24.75s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:11.94446                                                                                           
[1]	validation-rmse:11.52142                                                                                           
[2]	validation-rmse:11.12983                                                                                           
[3]	validation-rmse:10.76765                                                                                           
[4]	validation-rmse:10.43336                                                                                           
[5]	validation-rmse:10.12543                                                                                           
[6]	validation-rmse:9.84165                                                                                            
[7]	validation-rmse:9.58068                                                                                            
[8]	validation-rmse:9.34129             




 36%|█████████████████▋                               | 18/50 [07:07<14:05, 26.43s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:7.23441                                                                                            
[1]	validation-rmse:6.91697                                                                                            
[2]	validation-rmse:6.87366                                                                                            
[3]	validation-rmse:6.87559                                                                                            
[4]	validation-rmse:6.86562                                                                                            
[5]	validation-rmse:6.86436                                                                                            
[6]	validation-rmse:6.85887                                                                                            
[7]	validation-rmse:6.85797                                                                                            
[8]	validation-rmse:6.85680             




 38%|██████████████████▌                              | 19/50 [07:12<10:18, 19.96s/trial, best loss: 6.813466976283383]




[0]	validation-rmse:11.01685                                                                                           
[1]	validation-rmse:9.95050                                                                                            
[2]	validation-rmse:9.14117                                                                                            
[3]	validation-rmse:8.53730                                                                                            
[4]	validation-rmse:8.09089                                                                                            
[5]	validation-rmse:7.76437                                                                                            
[6]	validation-rmse:7.52618                                                                                            
[7]	validation-rmse:7.35464                                                                                            
[8]	validation-rmse:7.22989             




[0]	validation-rmse:11.00654                                                                                           
[1]	validation-rmse:9.93676                                                                                            
[2]	validation-rmse:9.12404                                                                                            
 40%|███████████████████▌                             | 20/50 [07:23<08:24, 16.82s/trial, best loss: 6.813466976283383]




[3]	validation-rmse:8.51963                                                                                            
[4]	validation-rmse:8.07398                                                                                            
[5]	validation-rmse:7.74187                                                                                            
[6]	validation-rmse:7.50117                                                                                            
[7]	validation-rmse:7.32533                                                                                            
[8]	validation-rmse:7.19915                                                                                            
[9]	validation-rmse:7.10119                                                                                            
[10]	validation-rmse:7.03438                                                                                           
[11]	validation-rmse:6.98523            




[0]	validation-rmse:10.72731                                                                                           
[1]	validation-rmse:9.52625                                                                                            
[2]	validation-rmse:8.68196                                                                                            
[3]	validation-rmse:8.09494                                                                                            
[4]	validation-rmse:7.70297                                                                                            
[5]	validation-rmse:7.43658                                                                                            
 42%|████████████████████▌                            | 21/50 [07:29<06:36, 13.67s/trial, best loss: 6.807823020734649]




[6]	validation-rmse:7.24718                                                                                            
[7]	validation-rmse:7.12789                                                                                            
[8]	validation-rmse:7.04196                                                                                            
[9]	validation-rmse:6.98690                                                                                            
[10]	validation-rmse:6.94490                                                                                           
[11]	validation-rmse:6.91823                                                                                           
[12]	validation-rmse:6.89582                                                                                           
[13]	validation-rmse:6.87694                                                                                           
[14]	validation-rmse:6.86952            




[0]	validation-rmse:10.52752                                                                                           
[1]	validation-rmse:9.24575                                                                                            
[2]	validation-rmse:8.39918                                                                                            
                                                                                                                       




[3]	validation-rmse:7.84366
[4]	validation-rmse:7.49642                                                                                            
[5]	validation-rmse:7.26770                                                                                            
[6]	validation-rmse:7.11756                                                                                            
[7]	validation-rmse:7.02852                                                                                            
[8]	validation-rmse:6.96815                                                                                            
[9]	validation-rmse:6.92858                                                                                            
[10]	validation-rmse:6.90099                                                                                           
[11]	validation-rmse:6.87781                                                                                           
[12]	validat




[0]	validation-rmse:9.99779                                                                                            
[1]	validation-rmse:8.58895                                                                                            
 46%|██████████████████████▌                          | 23/50 [07:43<04:37, 10.27s/trial, best loss: 6.791712877899082]




[2]	validation-rmse:7.80349                                                                                            
[3]	validation-rmse:7.37479                                                                                            
[4]	validation-rmse:7.14621                                                                                            
[5]	validation-rmse:7.01258                                                                                            
[6]	validation-rmse:6.94435                                                                                            
[7]	validation-rmse:6.90516                                                                                            
[8]	validation-rmse:6.87912                                                                                            
[9]	validation-rmse:6.86029                                                                                            
[10]	validation-rmse:6.85335            




[0]	validation-rmse:9.55531                                                                                            
[1]	validation-rmse:8.12662                                                                                            
[2]	validation-rmse:7.46057                                                                                            
[3]	validation-rmse:7.16037                                                                                            
 48%|████████████████████████                          | 24/50 [07:50<04:00,  9.26s/trial, best loss: 6.78515925152334]




[4]	validation-rmse:7.02099                                                                                            
[5]	validation-rmse:6.95769                                                                                            
[6]	validation-rmse:6.92510                                                                                            
[7]	validation-rmse:6.90896                                                                                            
[8]	validation-rmse:6.89985                                                                                            
[9]	validation-rmse:6.89232                                                                                            
[10]	validation-rmse:6.89143                                                                                           
[11]	validation-rmse:6.88978                                                                                           
[12]	validation-rmse:6.88886            




[0]	validation-rmse:9.83074                                                                                            
 50%|█████████████████████████                         | 25/50 [07:58<03:42,  8.89s/trial, best loss: 6.78515925152334]




[1]	validation-rmse:8.39196                                                                                            
[2]	validation-rmse:7.63115                                                                                            
[3]	validation-rmse:7.24717                                                                                            
[4]	validation-rmse:7.04565                                                                                            
[5]	validation-rmse:6.94670                                                                                            
[6]	validation-rmse:6.89547                                                                                            
[7]	validation-rmse:6.86394                                                                                            
[8]	validation-rmse:6.84653                                                                                            
[9]	validation-rmse:6.83594             




[0]	validation-rmse:8.80486                                                                                            
[1]	validation-rmse:7.53054                                                                                            
[2]	validation-rmse:7.10210                                                                                            
 52%|██████████████████████████                        | 26/50 [08:07<03:32,  8.85s/trial, best loss: 6.78515925152334]




[3]	validation-rmse:6.96434                                                                                            
[4]	validation-rmse:6.90745                                                                                            
[5]	validation-rmse:6.88828                                                                                            
[6]	validation-rmse:6.87676                                                                                            
[7]	validation-rmse:6.87302                                                                                            
[8]	validation-rmse:6.86873                                                                                            
[9]	validation-rmse:6.86063                                                                                            
[10]	validation-rmse:6.86206                                                                                           
[11]	validation-rmse:6.85938            




 54%|███████████████████████████                       | 27/50 [08:16<03:36,  9.40s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:10.97489                                                                                           
[1]	validation-rmse:9.88716                                                                                            
[2]	validation-rmse:9.07194                                                                                            
[3]	validation-rmse:8.46334                                                                                            
[4]	validation-rmse:8.01747                                                                                            
[5]	validation-rmse:7.69269                                                                                            
[6]	validation-rmse:7.45469                                                                                            
[7]	validation-rmse:7.28535                                                                                            
[8]	validation-rmse:7.15970             




 56%|████████████████████████████                      | 28/50 [08:29<03:48, 10.38s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:9.90994                                                                                            
[1]	validation-rmse:8.48531                                                                                            
[2]	validation-rmse:7.71259                                                                                            
[3]	validation-rmse:7.31093                                                                                            
[4]	validation-rmse:7.09973                                                                                            
[5]	validation-rmse:6.99262                                                                                            
[6]	validation-rmse:6.93386                                                                                            
[7]	validation-rmse:6.90256                                                                                            
[8]	validation-rmse:6.88061             




 58%|████████████████████████████▉                     | 29/50 [08:38<03:31, 10.08s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.26107                                                                                           
[1]	validation-rmse:10.33617                                                                                           
[2]	validation-rmse:9.59091                                                                                            
[3]	validation-rmse:8.99542                                                                                            
[4]	validation-rmse:8.52564                                                                                            
[5]	validation-rmse:8.15711                                                                                            
[6]	validation-rmse:7.86705                                                                                            
[7]	validation-rmse:7.64477                                                                                            
[8]	validation-rmse:7.47311             




 60%|██████████████████████████████                    | 30/50 [08:49<03:23, 10.19s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:10.22138                                                                                           
[1]	validation-rmse:8.85536                                                                                            
[2]	validation-rmse:8.02156                                                                                            
[3]	validation-rmse:7.53182                                                                                            
[4]	validation-rmse:7.24920                                                                                            
[5]	validation-rmse:7.08720                                                                                            
[6]	validation-rmse:6.98713                                                                                            
[7]	validation-rmse:6.92570                                                                                            
[8]	validation-rmse:6.89307             




 62%|███████████████████████████████                   | 31/50 [09:05<03:47, 11.99s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:8.94425                                                                                            
[1]	validation-rmse:7.58396                                                                                            
[2]	validation-rmse:7.10069                                                                                            
[3]	validation-rmse:6.93309                                                                                            
[4]	validation-rmse:6.88197                                                                                            
[5]	validation-rmse:6.85650                                                                                            
[6]	validation-rmse:6.83894                                                                                            
[7]	validation-rmse:6.83511                                                                                            
[8]	validation-rmse:6.83745             




[0]	validation-rmse:10.73497                                                                                           
[1]	validation-rmse:9.53795                                                                                            
 64%|████████████████████████████████                  | 32/50 [09:38<05:23, 18.00s/trial, best loss: 6.78515925152334]




[2]	validation-rmse:8.68763                                                                                            
[3]	validation-rmse:8.09925                                                                                            
[4]	validation-rmse:7.69720                                                                                            
[5]	validation-rmse:7.42652                                                                                            
[6]	validation-rmse:7.24641                                                                                            
[7]	validation-rmse:7.12187                                                                                            
[8]	validation-rmse:7.03583                                                                                            
[9]	validation-rmse:6.97930                                                                                            
[10]	validation-rmse:6.93966            




 66%|█████████████████████████████████                 | 33/50 [09:56<05:10, 18.29s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:6.90082                                                                                            
[1]	validation-rmse:6.88629                                                                                            
[2]	validation-rmse:6.89095                                                                                            
[3]	validation-rmse:6.88513                                                                                            
[4]	validation-rmse:6.88536                                                                                            
[5]	validation-rmse:6.90033                                                                                            
[6]	validation-rmse:6.90087                                                                                            
[7]	validation-rmse:6.90082                                                                                            
[8]	validation-rmse:6.89670             




 68%|██████████████████████████████████                | 34/50 [10:01<03:50, 14.38s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:10.68101                                                                                           
[1]	validation-rmse:9.45837                                                                                            
[2]	validation-rmse:8.60990                                                                                            
[3]	validation-rmse:8.03547                                                                                            
[4]	validation-rmse:7.65287                                                                                            
[5]	validation-rmse:7.39735                                                                                            
[6]	validation-rmse:7.22993                                                                                            
[7]	validation-rmse:7.12009                                                                                            
[8]	validation-rmse:7.04405             




[0]	validation-rmse:10.17087                                                                                           
 70%|███████████████████████████████████               | 35/50 [10:23<03:48, 15.21s/trial, best loss: 6.78515925152334]




[1]	validation-rmse:8.80322                                                                                            
[2]	validation-rmse:7.98877                                                                                            
[3]	validation-rmse:7.51077                                                                                            
[4]	validation-rmse:7.23337                                                                                            
[5]	validation-rmse:7.08905                                                                                            
[6]	validation-rmse:6.99239                                                                                            
[7]	validation-rmse:6.94505                                                                                            
[8]	validation-rmse:6.91478                                                                                            
[9]	validation-rmse:6.89235             




 72%|████████████████████████████████████              | 36/50 [10:33<03:31, 15.09s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:8.25877                                                                                            
[1]	validation-rmse:7.18714                                                                                            
[2]	validation-rmse:6.93342                                                                                            
[3]	validation-rmse:6.86550                                                                                            
[4]	validation-rmse:6.84141                                                                                            
[5]	validation-rmse:6.83580                                                                                            
[6]	validation-rmse:6.83205                                                                                            
[7]	validation-rmse:6.83152                                                                                            
[8]	validation-rmse:6.82488             




[0]	validation-rmse:11.23352                                                                                           
[1]	validation-rmse:10.29211                                                                                           
 74%|█████████████████████████████████████             | 37/50 [10:47<03:03, 14.15s/trial, best loss: 6.78515925152334]




[2]	validation-rmse:9.53611                                                                                            
[3]	validation-rmse:8.93830                                                                                            
[4]	validation-rmse:8.46945                                                                                            
[5]	validation-rmse:8.10149                                                                                            
[6]	validation-rmse:7.82108                                                                                            
[7]	validation-rmse:7.60352                                                                                            
[8]	validation-rmse:7.43448                                                                                            
[9]	validation-rmse:7.29853                                                                                            
[10]	validation-rmse:7.19673            




 76%|██████████████████████████████████████            | 38/50 [11:01<02:57, 14.76s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:9.36498                                                                                            
[1]	validation-rmse:7.94642                                                                                            
[2]	validation-rmse:7.33152                                                                                            
[3]	validation-rmse:7.07149                                                                                            
[4]	validation-rmse:6.95827                                                                                            
[5]	validation-rmse:6.90491                                                                                            
[6]	validation-rmse:6.87401                                                                                            
[7]	validation-rmse:6.85898                                                                                            
[8]	validation-rmse:6.85004             




 78%|███████████████████████████████████████           | 39/50 [11:15<02:38, 14.37s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.75414                                                                                           
[1]	validation-rmse:11.17525                                                                                           
[2]	validation-rmse:10.65898                                                                                           
[3]	validation-rmse:10.19920                                                                                           
[4]	validation-rmse:9.79127                                                                                            
[5]	validation-rmse:9.43091                                                                                            
[6]	validation-rmse:9.11244                                                                                            
[7]	validation-rmse:8.83150                                                                                            
[8]	validation-rmse:8.58582             




 80%|████████████████████████████████████████          | 40/50 [11:45<03:10, 19.03s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:10.37776                                                                                           
[1]	validation-rmse:9.06210                                                                                            
[2]	validation-rmse:8.21662                                                                                            
[3]	validation-rmse:7.69293                                                                                            
[4]	validation-rmse:7.38360                                                                                            
[5]	validation-rmse:7.19364                                                                                            
[6]	validation-rmse:7.08104                                                                                            
[7]	validation-rmse:6.99800                                                                                            
[8]	validation-rmse:6.94668             




 82%|█████████████████████████████████████████         | 41/50 [12:00<02:41, 17.90s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.34849                                                                                           
[1]	validation-rmse:10.48015                                                                                           
[2]	validation-rmse:9.76950                                                                                            
[3]	validation-rmse:9.18926                                                                                            
[4]	validation-rmse:8.72010                                                                                            
[5]	validation-rmse:8.34147                                                                                            
[6]	validation-rmse:8.03977                                                                                            
[7]	validation-rmse:7.79969                                                                                            
[8]	validation-rmse:7.60897             




 84%|██████████████████████████████████████████        | 42/50 [12:17<02:20, 17.56s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:9.86293                                                                                            
[1]	validation-rmse:8.42732                                                                                            
[2]	validation-rmse:7.66423                                                                                            
[3]	validation-rmse:7.26375                                                                                            
[4]	validation-rmse:7.05978                                                                                            
[5]	validation-rmse:6.95224                                                                                            
[6]	validation-rmse:6.89647                                                                                            
[7]	validation-rmse:6.86474                                                                                            
[8]	validation-rmse:6.84575             




 86%|███████████████████████████████████████████       | 43/50 [12:32<01:57, 16.81s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.65039                                                                                           
[1]	validation-rmse:10.99151                                                                                           
[2]	validation-rmse:10.41502                                                                                           
[3]	validation-rmse:9.91338                                                                                            
[4]	validation-rmse:9.47883                                                                                            
[5]	validation-rmse:9.10150                                                                                            
[6]	validation-rmse:8.77675                                                                                            
[7]	validation-rmse:8.49632                                                                                            
[8]	validation-rmse:8.25308             




                                                                                                                       




[0]	validation-rmse:10.80879
[1]	validation-rmse:9.65279                                                                                            
[2]	validation-rmse:8.81441                                                                                            
[3]	validation-rmse:8.22211                                                                                            
[4]	validation-rmse:7.80997                                                                                            
[5]	validation-rmse:7.52230                                                                                            
[6]	validation-rmse:7.31914                                                                                            
[7]	validation-rmse:7.17879                                                                                            
[8]	validation-rmse:7.08790                                                                                            
[9]	validat




 90%|█████████████████████████████████████████████     | 45/50 [13:10<01:30, 18.03s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:8.54339                                                                                            
[1]	validation-rmse:7.35773                                                                                            
[2]	validation-rmse:7.02854                                                                                            
[3]	validation-rmse:6.92726                                                                                            
[4]	validation-rmse:6.89298                                                                                            
[5]	validation-rmse:6.87241                                                                                            
[6]	validation-rmse:6.86658                                                                                            
[7]	validation-rmse:6.86493                                                                                            
[8]	validation-rmse:6.86188             




 92%|██████████████████████████████████████████████    | 46/50 [13:23<01:06, 16.53s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.81758                                                                                           
[1]	validation-rmse:11.28978                                                                                           
[2]	validation-rmse:10.81296                                                                                           
[3]	validation-rmse:10.38332                                                                                           
[4]	validation-rmse:9.99742                                                                                            
[5]	validation-rmse:9.65124                                                                                            
[6]	validation-rmse:9.34141                                                                                            
[7]	validation-rmse:9.06474                                                                                            
[8]	validation-rmse:8.81824             




 94%|███████████████████████████████████████████████   | 47/50 [14:03<01:10, 23.60s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:7.56442                                                                                            
[1]	validation-rmse:6.98718                                                                                            
[2]	validation-rmse:6.91432                                                                                            
[3]	validation-rmse:6.89113                                                                                            
[4]	validation-rmse:6.88372                                                                                            
[5]	validation-rmse:6.87918                                                                                            
[6]	validation-rmse:6.87349                                                                                            
[7]	validation-rmse:6.87654                                                                                            
[8]	validation-rmse:6.87181             




 96%|████████████████████████████████████████████████  | 48/50 [14:16<00:41, 20.56s/trial, best loss: 6.78515925152334]




[0]	validation-rmse:11.14377                                                                                           
[1]	validation-rmse:10.14553                                                                                           
[2]	validation-rmse:9.36750                                                                                            
[3]	validation-rmse:8.76998                                                                                            
[4]	validation-rmse:8.31136                                                                                            
[5]	validation-rmse:7.95217                                                                                            
[6]	validation-rmse:7.69427                                                                                            
[7]	validation-rmse:7.50184                                                                                            
[8]	validation-rmse:7.35154             




[0]	validation-rmse:11.41251                                                                                           
 98%|█████████████████████████████████████████████████ | 49/50 [14:45<00:22, 22.51s/trial, best loss: 6.78515925152334]




[1]	validation-rmse:10.58456                                                                                           
[2]	validation-rmse:9.89506                                                                                            
[3]	validation-rmse:9.32495                                                                                            
[4]	validation-rmse:8.85441                                                                                            
[5]	validation-rmse:8.47122                                                                                            
[6]	validation-rmse:8.15697                                                                                            
[7]	validation-rmse:7.90285                                                                                            
[8]	validation-rmse:7.69935                                                                                            
[9]	validation-rmse:7.53510             




100%|██████████████████████████████████████████████████| 50/50 [14:58<00:00, 17.97s/trial, best loss: 6.78515925152334]


In [26]:
mlflow.xgboost.autolog(disable=True)

In [None]:
def objective(params):
    #mlflow.end_run()  # End the previous run if it exists
    with mlflow.start_run(nested=True): # Use Nested Runs
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=200,
            evals=[(valid, 'validation')],
            early_stopping_rounds=20
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
    return {'loss': rmse, 'status': STATUS_OK}


In [29]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    #best_params = {
    #    'learning_rate': 0.09585355369315604,
    #    'max_depth': 30,
    #    'min_child_weight': 1.060597050922164,
    #    'objective': 'reg:linear',
    #    'reg_alpha': 0.018060244040060163,
    #    'reg_lambda': 0.011658731377413597,
    #    'seed': 42
    #}
    # Using best_result to set best_params
    best_params = {
        'learning_rate': best_result['learning_rate'],
        'max_depth': int(best_result['max_depth']),  # Make sure to convert to int
        'min_child_weight': best_result['min_child_weight'],
        #'objective': 'reg:linear', #older version
        'objective': 'reg:squarederror',  
        'reg_alpha': best_result['reg_alpha'],
        'reg_lambda': best_result['reg_lambda'],
        'seed': 42
    }

    best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=200, #1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=20 #50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")


  0%|                                                                           | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:7.13259                                                                                            
[1]	validation-rmse:6.91844                                                                                            
[2]	validation-rmse:6.90432                                                                                            
[3]	validation-rmse:6.89006                                                                                            
[4]	validation-rmse:6.87571                                                                                            
[5]	validation-rmse:6.87444                                                                                            
[6]	validation-rmse:6.87527                                                                                            
[7]	validation-rmse:6.87217                                                                                            
[8]	validation-rmse:6.87226             




  2%|█                                                 | 1/50 [00:09<07:39,  9.39s/trial, best loss: 6.864888798804896]




[0]	validation-rmse:9.38116                                                                                            
[1]	validation-rmse:7.96889                                                                                            
[2]	validation-rmse:7.35823                                                                                            
[3]	validation-rmse:7.10112                                                                                            
[4]	validation-rmse:6.99412                                                                                            
[5]	validation-rmse:6.93926                                                                                            
[6]	validation-rmse:6.91070                                                                                            
[7]	validation-rmse:6.89804                                                                                            
[8]	validation-rmse:6.89273             




  4%|██                                                | 2/50 [00:18<07:20,  9.18s/trial, best loss: 6.864888798804896]




[0]	validation-rmse:7.28497                                                                                            
[1]	validation-rmse:6.96226                                                                                            
[2]	validation-rmse:6.93456                                                                                            
[3]	validation-rmse:6.92739                                                                                            
[4]	validation-rmse:6.91899                                                                                            
[5]	validation-rmse:6.92177                                                                                            
[6]	validation-rmse:6.91899                                                                                            
[7]	validation-rmse:6.91486                                                                                            
[8]	validation-rmse:6.91905             




  6%|███                                               | 3/50 [00:31<08:36, 10.99s/trial, best loss: 6.864888798804896]




[0]	validation-rmse:11.47917                                                                                           
[1]	validation-rmse:10.69723                                                                                           
[2]	validation-rmse:10.03749                                                                                           
[3]	validation-rmse:9.48544                                                                                            
[4]	validation-rmse:9.02575                                                                                            
[5]	validation-rmse:8.63778                                                                                            
[6]	validation-rmse:8.31242                                                                                            
[7]	validation-rmse:8.05349                                                                                            
[8]	validation-rmse:7.84038             




[0]	validation-rmse:10.89201                                                                                           
  8%|████                                               | 4/50 [00:48<09:55, 12.94s/trial, best loss: 6.82707982983743]




[1]	validation-rmse:9.76198                                                                                            
[2]	validation-rmse:8.93225                                                                                            
[3]	validation-rmse:8.33219                                                                                            
[4]	validation-rmse:7.90534                                                                                            
[5]	validation-rmse:7.60285                                                                                            
[6]	validation-rmse:7.39143                                                                                            
[7]	validation-rmse:7.24311                                                                                            
[8]	validation-rmse:7.13827                                                                                            
[9]	validation-rmse:7.06383             




 10%|█████                                              | 5/50 [01:03<10:33, 14.07s/trial, best loss: 6.82707982983743]




[0]	validation-rmse:11.39473                                                                                           
[1]	validation-rmse:10.55609                                                                                           
[2]	validation-rmse:9.86231                                                                                            
[3]	validation-rmse:9.29217                                                                                            
[4]	validation-rmse:8.82651                                                                                            
[5]	validation-rmse:8.44967                                                                                            
[6]	validation-rmse:8.14555                                                                                            
[7]	validation-rmse:7.90201                                                                                            
[8]	validation-rmse:7.70739             




[0]	validation-rmse:11.90837                                                                                           
[1]	validation-rmse:11.45435                                                                                           
[2]	validation-rmse:11.03792                                                                                           
 12%|██████                                             | 6/50 [01:17<10:04, 13.74s/trial, best loss: 6.82707982983743]




[3]	validation-rmse:10.65503                                                                                           
[4]	validation-rmse:10.30477                                                                                           
[5]	validation-rmse:9.98351                                                                                            
[6]	validation-rmse:9.69005                                                                                            
[7]	validation-rmse:9.42236                                                                                            
[8]	validation-rmse:9.17673                                                                                            
[9]	validation-rmse:8.95429                                                                                            
[10]	validation-rmse:8.75134                                                                                           
[11]	validation-rmse:8.56829            




 14%|███████                                           | 7/50 [01:25<08:38, 12.05s/trial, best loss: 6.815540481592726]




[0]	validation-rmse:11.03614                                                                                           
[1]	validation-rmse:9.98126                                                                                            
[2]	validation-rmse:9.17712                                                                                            
[3]	validation-rmse:8.57371                                                                                            
[4]	validation-rmse:8.12531                                                                                            
[5]	validation-rmse:7.79482                                                                                            
[6]	validation-rmse:7.55421                                                                                            
[7]	validation-rmse:7.37931                                                                                            
[8]	validation-rmse:7.25177             




[0]	validation-rmse:11.00477                                                                                           
[1]	validation-rmse:9.93023                                                                                            
[2]	validation-rmse:9.12234                                                                                            
[3]	validation-rmse:8.51945                                                                                            
                                                                                                                       




[4]	validation-rmse:8.07381
[5]	validation-rmse:7.75019                                                                                            
[6]	validation-rmse:7.51117                                                                                            
[7]	validation-rmse:7.33511                                                                                            
[8]	validation-rmse:7.21184                                                                                            
[9]	validation-rmse:7.12406                                                                                            
[10]	validation-rmse:7.06003                                                                                           
[11]	validation-rmse:7.00911                                                                                           
[12]	validation-rmse:6.97327                                                                                           
[13]	validat




 18%|█████████                                         | 9/50 [01:43<06:57, 10.19s/trial, best loss: 6.815540481592726]




[0]	validation-rmse:11.32216                                                                                           
[1]	validation-rmse:10.43926                                                                                           
[2]	validation-rmse:9.71776                                                                                            
[3]	validation-rmse:9.13310                                                                                            
[4]	validation-rmse:8.66204                                                                                            
[5]	validation-rmse:8.28752                                                                                            
[6]	validation-rmse:7.98755                                                                                            
[7]	validation-rmse:7.75484                                                                                            
[8]	validation-rmse:7.56283             




 20%|█████████▊                                       | 10/50 [02:09<10:03, 15.08s/trial, best loss: 6.815540481592726]




[0]	validation-rmse:10.60996                                                                                           
[1]	validation-rmse:9.34685                                                                                            
[2]	validation-rmse:8.50249                                                                                            
[3]	validation-rmse:7.94567                                                                                            
[4]	validation-rmse:7.57811                                                                                            
[5]	validation-rmse:7.33213                                                                                            
[6]	validation-rmse:7.17019                                                                                            
[7]	validation-rmse:7.05840                                                                                            
[8]	validation-rmse:6.98794             




 22%|██████████▊                                      | 11/50 [02:37<12:12, 18.79s/trial, best loss: 6.815540481592726]




[0]	validation-rmse:7.90177                                                                                            
[1]	validation-rmse:7.06810                                                                                            
[2]	validation-rmse:6.91413                                                                                            
[3]	validation-rmse:6.88597                                                                                            
[4]	validation-rmse:6.87992                                                                                            
[5]	validation-rmse:6.87250                                                                                            
[6]	validation-rmse:6.87452                                                                                            
[7]	validation-rmse:6.87508                                                                                            
[8]	validation-rmse:6.87564             




 24%|███████████▊                                     | 12/50 [02:49<10:42, 16.90s/trial, best loss: 6.815540481592726]




[0]	validation-rmse:10.91315                                                                                           
[1]	validation-rmse:9.79711                                                                                            
[2]	validation-rmse:8.96506                                                                                            
[3]	validation-rmse:8.35522                                                                                            
[4]	validation-rmse:7.91699                                                                                            
[5]	validation-rmse:7.60302                                                                                            
[6]	validation-rmse:7.37838                                                                                            
[7]	validation-rmse:7.22303                                                                                            
[8]	validation-rmse:7.10922             




[0]	validation-rmse:11.62580                                                                                           
 26%|████████████▋                                    | 13/50 [03:06<10:07, 16.41s/trial, best loss: 6.809433066099296]




[1]	validation-rmse:10.94904                                                                                           
[2]	validation-rmse:10.35990                                                                                           
[3]	validation-rmse:9.84940                                                                                            
[4]	validation-rmse:9.40878                                                                                            
[5]	validation-rmse:9.02926                                                                                            
[6]	validation-rmse:8.70384                                                                                            
[7]	validation-rmse:8.42644                                                                                            
[8]	validation-rmse:8.19111                                                                                            
[9]	validation-rmse:7.99149             




 28%|█████████████▋                                   | 14/50 [03:22<10:03, 16.76s/trial, best loss: 6.809433066099296]




[0]	validation-rmse:9.41896                                                                                            
[1]	validation-rmse:7.99013                                                                                            
[2]	validation-rmse:7.36029                                                                                            
[3]	validation-rmse:7.08903                                                                                            
[4]	validation-rmse:6.97180                                                                                            
[5]	validation-rmse:6.91525                                                                                            
[6]	validation-rmse:6.89263                                                                                            
[7]	validation-rmse:6.87610                                                                                            
[8]	validation-rmse:6.86836             




 30%|██████████████▋                                  | 15/50 [03:32<08:31, 14.61s/trial, best loss: 6.809433066099296]




[0]	validation-rmse:8.20016                                                                                            
[1]	validation-rmse:7.20556                                                                                            
[2]	validation-rmse:6.99037                                                                                            
[3]	validation-rmse:6.93368                                                                                            
[4]	validation-rmse:6.91094                                                                                            
[5]	validation-rmse:6.90023                                                                                            
[6]	validation-rmse:6.89866                                                                                            
[7]	validation-rmse:6.89600                                                                                            
[8]	validation-rmse:6.89241             




 32%|███████████████▋                                 | 16/50 [03:48<08:34, 15.14s/trial, best loss: 6.809433066099296]




[0]	validation-rmse:9.74078                                                                                            
[1]	validation-rmse:8.31601                                                                                            
[2]	validation-rmse:7.59962                                                                                            
[3]	validation-rmse:7.25059                                                                                            
[4]	validation-rmse:7.08028                                                                                            
[5]	validation-rmse:6.99501                                                                                            
[6]	validation-rmse:6.94871                                                                                            
[7]	validation-rmse:6.92220                                                                                            
[8]	validation-rmse:6.90732             




 34%|████████████████▋                                | 17/50 [04:08<09:02, 16.45s/trial, best loss: 6.809433066099296]




[0]	validation-rmse:11.94481                                                                                           
[1]	validation-rmse:11.52251                                                                                           
[2]	validation-rmse:11.13220                                                                                           
[3]	validation-rmse:10.77003                                                                                           
[4]	validation-rmse:10.43713                                                                                           
[5]	validation-rmse:10.12917                                                                                           
[6]	validation-rmse:9.84689                                                                                            
[7]	validation-rmse:9.58759                                                                                            
[8]	validation-rmse:9.34903             




 36%|█████████████████▋                               | 18/50 [04:30<09:41, 18.17s/trial, best loss: 6.809433066099296]




[0]	validation-rmse:10.75962                                                                                           
[1]	validation-rmse:9.57260                                                                                            
[2]	validation-rmse:8.72241                                                                                            
[3]	validation-rmse:8.13172                                                                                            
[4]	validation-rmse:7.71895                                                                                            
[5]	validation-rmse:7.43844                                                                                            
[6]	validation-rmse:7.24603                                                                                            
[7]	validation-rmse:7.11722                                                                                            
[8]	validation-rmse:7.02954             




 38%|██████████████████▌                              | 19/50 [04:41<08:19, 16.11s/trial, best loss: 6.807378527833568]




[0]	validation-rmse:11.46197                                                                                           
[1]	validation-rmse:10.66838                                                                                           
[2]	validation-rmse:10.00168                                                                                           
[3]	validation-rmse:9.44582                                                                                            
[4]	validation-rmse:8.98410                                                                                            
[5]	validation-rmse:8.60362                                                                                            
[6]	validation-rmse:8.29154                                                                                            
[7]	validation-rmse:8.03691                                                                                            
[8]	validation-rmse:7.82979             




[0]	validation-rmse:10.22900                                                                                           
 40%|███████████████████▌                             | 20/50 [05:08<09:32, 19.09s/trial, best loss: 6.807378527833568]




[1]	validation-rmse:8.86400                                                                                            
[2]	validation-rmse:8.02924                                                                                            
[3]	validation-rmse:7.53038                                                                                            
[4]	validation-rmse:7.24255                                                                                            
[5]	validation-rmse:7.06546                                                                                            
[6]	validation-rmse:6.97032                                                                                            
[7]	validation-rmse:6.91065                                                                                            
[8]	validation-rmse:6.87657                                                                                            
[9]	validation-rmse:6.85575             




 42%|████████████████████▏                           | 21/50 [05:13<07:21, 15.24s/trial, best loss: 6.7991457744518105]




[0]	validation-rmse:10.25918                                                                                           
[1]	validation-rmse:8.90109                                                                                            
[2]	validation-rmse:8.06268                                                                                            
[3]	validation-rmse:7.55678                                                                                            
[4]	validation-rmse:7.26232                                                                                            
[5]	validation-rmse:7.07976                                                                                            
[6]	validation-rmse:6.97896                                                                                            
[7]	validation-rmse:6.91606                                                                                            
[8]	validation-rmse:6.88115             




 44%|█████████████████████                           | 22/50 [05:20<05:51, 12.54s/trial, best loss: 6.7991457744518105]




[0]	validation-rmse:10.23473                                                                                           
[1]	validation-rmse:8.86763                                                                                            
[2]	validation-rmse:8.03487                                                                                            
[3]	validation-rmse:7.54337                                                                                            
[4]	validation-rmse:7.25453                                                                                            
[5]	validation-rmse:7.08135                                                                                            
[6]	validation-rmse:6.97672                                                                                            
[7]	validation-rmse:6.92062                                                                                            
[8]	validation-rmse:6.88235             




 46%|██████████████████████▌                          | 23/50 [05:27<04:54, 10.89s/trial, best loss: 6.797943993113697]




[0]	validation-rmse:8.74772                                                                                            
[1]	validation-rmse:7.45512                                                                                            
[2]	validation-rmse:7.03977                                                                                            
[3]	validation-rmse:6.89529                                                                                            
[4]	validation-rmse:6.84683                                                                                            
[5]	validation-rmse:6.83006                                                                                            
[6]	validation-rmse:6.81518                                                                                            
[7]	validation-rmse:6.81063                                                                                            
[8]	validation-rmse:6.81434             




[0]	validation-rmse:10.13721                                                                                           
 48%|███████████████████████▌                         | 24/50 [05:33<03:59,  9.20s/trial, best loss: 6.797943993113697]




[1]	validation-rmse:8.75338                                                                                            
[2]	validation-rmse:7.92511                                                                                            
[3]	validation-rmse:7.45815                                                                                            
[4]	validation-rmse:7.19248                                                                                            
[5]	validation-rmse:7.03648                                                                                            
[6]	validation-rmse:6.95433                                                                                            
[7]	validation-rmse:6.90003                                                                                            
[8]	validation-rmse:6.87469                                                                                            
[9]	validation-rmse:6.84969             




[0]	validation-rmse:11.77881                                                                                           
 50%|████████████████████████▌                        | 25/50 [05:41<03:43,  8.94s/trial, best loss: 6.796163589951437]




[1]	validation-rmse:11.21913                                                                                           
[2]	validation-rmse:10.71791                                                                                           
[3]	validation-rmse:10.26866                                                                                           
[4]	validation-rmse:9.86820                                                                                            
[5]	validation-rmse:9.51227                                                                                            
[6]	validation-rmse:9.19405                                                                                            
[7]	validation-rmse:8.91211                                                                                            
[8]	validation-rmse:8.66289                                                                                            
[9]	validation-rmse:8.44120             




 52%|█████████████████████████▍                       | 26/50 [05:54<04:10, 10.45s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:10.14966                                                                                           
[1]	validation-rmse:8.75898                                                                                            
[2]	validation-rmse:7.93665                                                                                            
[3]	validation-rmse:7.46666                                                                                            
[4]	validation-rmse:7.19729                                                                                            
[5]	validation-rmse:7.04684                                                                                            
[6]	validation-rmse:6.96147                                                                                            
[7]	validation-rmse:6.91124                                                                                            
[8]	validation-rmse:6.87534             




[0]	validation-rmse:9.05109                                                                                            
 54%|██████████████████████████▍                      | 27/50 [06:05<03:57, 10.31s/trial, best loss: 6.796163589951437]




[1]	validation-rmse:7.70009                                                                                            
[2]	validation-rmse:7.18588                                                                                            
[3]	validation-rmse:7.00148                                                                                            
[4]	validation-rmse:6.93388                                                                                            
[5]	validation-rmse:6.90917                                                                                            
[6]	validation-rmse:6.89590                                                                                            
[7]	validation-rmse:6.89976                                                                                            
[8]	validation-rmse:6.90058                                                                                            
[9]	validation-rmse:6.89860             




 56%|███████████████████████████▍                     | 28/50 [06:09<03:12,  8.74s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:9.90330                                                                                            
[1]	validation-rmse:8.47888                                                                                            
[2]	validation-rmse:7.71093                                                                                            
[3]	validation-rmse:7.30534                                                                                            
[4]	validation-rmse:7.09540                                                                                            
[5]	validation-rmse:6.98613                                                                                            
[6]	validation-rmse:6.92606                                                                                            
[7]	validation-rmse:6.89094                                                                                            
[8]	validation-rmse:6.87293             




[0]	validation-rmse:6.98072                                                                                            
 58%|████████████████████████████▍                    | 29/50 [06:20<03:14,  9.27s/trial, best loss: 6.796163589951437]




[1]	validation-rmse:6.95919                                                                                            
[2]	validation-rmse:6.96297                                                                                            
[3]	validation-rmse:6.96762                                                                                            
[4]	validation-rmse:6.97092                                                                                            
[5]	validation-rmse:6.97497                                                                                            
[6]	validation-rmse:6.96874                                                                                            
[7]	validation-rmse:6.94176                                                                                            
[8]	validation-rmse:6.94472                                                                                            
[9]	validation-rmse:6.94388             




[0]	validation-rmse:10.55854                                                                                           
[1]	validation-rmse:9.29378                                                                                            
 60%|█████████████████████████████▍                   | 30/50 [06:24<02:27,  7.36s/trial, best loss: 6.796163589951437]




[2]	validation-rmse:8.43927                                                                                            
[3]	validation-rmse:7.87843                                                                                            
[4]	validation-rmse:7.51637                                                                                            
[5]	validation-rmse:7.29100                                                                                            
[6]	validation-rmse:7.14109                                                                                            
[7]	validation-rmse:7.04540                                                                                            
[8]	validation-rmse:6.98058                                                                                            
[9]	validation-rmse:6.93784                                                                                            
[10]	validation-rmse:6.90770            




 62%|██████████████████████████████▍                  | 31/50 [06:29<02:12,  6.99s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:7.70405                                                                                            
[1]	validation-rmse:7.02192                                                                                            
[2]	validation-rmse:6.92545                                                                                            
[3]	validation-rmse:6.89613                                                                                            
[4]	validation-rmse:6.88879                                                                                            
[5]	validation-rmse:6.88665                                                                                            
[6]	validation-rmse:6.88158                                                                                            
[7]	validation-rmse:6.87803                                                                                            
[8]	validation-rmse:6.88095             




 64%|███████████████████████████████▎                 | 32/50 [06:32<01:45,  5.84s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:11.17575                                                                                           
[1]	validation-rmse:10.19739                                                                                           
[2]	validation-rmse:9.42674                                                                                            
[3]	validation-rmse:8.82383                                                                                            
[4]	validation-rmse:8.35774                                                                                            
[5]	validation-rmse:8.00142                                                                                            
[6]	validation-rmse:7.73063                                                                                            
[7]	validation-rmse:7.52423                                                                                            
[8]	validation-rmse:7.36900             




 66%|████████████████████████████████▎                | 33/50 [06:45<02:16,  8.04s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:8.78207                                                                                            
[1]	validation-rmse:7.48208                                                                                            
[2]	validation-rmse:7.04722                                                                                            
[3]	validation-rmse:6.90801                                                                                            
[4]	validation-rmse:6.85040                                                                                            
[5]	validation-rmse:6.83517                                                                                            
[6]	validation-rmse:6.82541                                                                                            
[7]	validation-rmse:6.82392                                                                                            
[8]	validation-rmse:6.82049             




[0]	validation-rmse:11.67835                                                                                           
 68%|█████████████████████████████████▎               | 34/50 [06:50<01:49,  6.87s/trial, best loss: 6.796163589951437]




[1]	validation-rmse:11.04005                                                                                           
[2]	validation-rmse:10.47923                                                                                           
[3]	validation-rmse:9.98696                                                                                            
[4]	validation-rmse:9.55696                                                                                            
[5]	validation-rmse:9.18196                                                                                            
[6]	validation-rmse:8.85698                                                                                            
[7]	validation-rmse:8.57541                                                                                            
[8]	validation-rmse:8.33293                                                                                            
[9]	validation-rmse:8.12320             




 70%|██████████████████████████████████▎              | 35/50 [07:01<02:03,  8.20s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:7.24460                                                                                            
[1]	validation-rmse:6.89412                                                                                            
[2]	validation-rmse:6.86023                                                                                            
[3]	validation-rmse:6.86219                                                                                            
[4]	validation-rmse:6.85937                                                                                            
[5]	validation-rmse:6.85011                                                                                            
[6]	validation-rmse:6.84993                                                                                            
[7]	validation-rmse:6.85056                                                                                            
[8]	validation-rmse:6.84487             




[0]	validation-rmse:9.76087                                                                                            
[1]	validation-rmse:8.32877                                                                                            
 72%|███████████████████████████████████▎             | 36/50 [07:05<01:36,  6.92s/trial, best loss: 6.796163589951437]




[2]	validation-rmse:7.61105                                                                                            
[3]	validation-rmse:7.24202                                                                                            
[4]	validation-rmse:7.06706                                                                                            
[5]	validation-rmse:6.98118                                                                                            
[6]	validation-rmse:6.93786                                                                                            
[7]	validation-rmse:6.91069                                                                                            
[8]	validation-rmse:6.89410                                                                                            
[9]	validation-rmse:6.89533                                                                                            
[10]	validation-rmse:6.89235            




 74%|████████████████████████████████████▎            | 37/50 [07:12<01:30,  6.98s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:9.29896                                                                                            
[1]	validation-rmse:7.88642                                                                                            
[2]	validation-rmse:7.29139                                                                                            
[3]	validation-rmse:7.04746                                                                                            
[4]	validation-rmse:6.94421                                                                                            
[5]	validation-rmse:6.89938                                                                                            
[6]	validation-rmse:6.87197                                                                                            
[7]	validation-rmse:6.85487                                                                                            
[8]	validation-rmse:6.84772             




 76%|█████████████████████████████████████▏           | 38/50 [07:19<01:26,  7.19s/trial, best loss: 6.796163589951437]




[0]	validation-rmse:11.18854                                                                                           
[1]	validation-rmse:10.21623                                                                                           
[2]	validation-rmse:9.45143                                                                                            
[3]	validation-rmse:8.84852                                                                                            
[4]	validation-rmse:8.37688                                                                                            
[5]	validation-rmse:8.01716                                                                                            
[6]	validation-rmse:7.74039                                                                                            
[7]	validation-rmse:7.52832                                                                                            
[8]	validation-rmse:7.36865             




[0]	validation-rmse:11.17814                                                                                           
 78%|██████████████████████████████████████▏          | 39/50 [07:31<01:32,  8.40s/trial, best loss: 6.791619314152785]




[1]	validation-rmse:10.20362                                                                                           
[2]	validation-rmse:9.43561                                                                                            
[3]	validation-rmse:8.83439                                                                                            
[4]	validation-rmse:8.36900                                                                                            
[5]	validation-rmse:8.01412                                                                                            
[6]	validation-rmse:7.74203                                                                                            
[7]	validation-rmse:7.53662                                                                                            
[8]	validation-rmse:7.37542                                                                                            
[9]	validation-rmse:7.25634             




                                                                                                                       




[0]	validation-rmse:11.63778
[1]	validation-rmse:10.96910                                                                                           
[2]	validation-rmse:10.38701                                                                                           
[3]	validation-rmse:9.88052                                                                                            
[4]	validation-rmse:9.44144                                                                                            
[5]	validation-rmse:9.06223                                                                                            
[6]	validation-rmse:8.73657                                                                                            
[7]	validation-rmse:8.45662                                                                                            
[8]	validation-rmse:8.21771                                                                                            
[9]	validat




 82%|████████████████████████████████████████▏        | 41/50 [07:49<01:21,  9.02s/trial, best loss: 6.791619314152785]




[0]	validation-rmse:11.30474                                                                                           
[1]	validation-rmse:10.40810                                                                                           
[2]	validation-rmse:9.67608                                                                                            
[3]	validation-rmse:9.08738                                                                                            
[4]	validation-rmse:8.62032                                                                                            
[5]	validation-rmse:8.24814                                                                                            
[6]	validation-rmse:7.95650                                                                                            
[7]	validation-rmse:7.72954                                                                                            
[8]	validation-rmse:7.54321             




 84%|█████████████████████████████████████████▏       | 42/50 [08:03<01:23, 10.49s/trial, best loss: 6.791619314152785]




[0]	validation-rmse:10.65516                                                                                           
[1]	validation-rmse:9.43060                                                                                            
[2]	validation-rmse:8.58404                                                                                            
[3]	validation-rmse:8.00813                                                                                            
[4]	validation-rmse:7.63034                                                                                            
[5]	validation-rmse:7.37222                                                                                            
[6]	validation-rmse:7.21823                                                                                            
[7]	validation-rmse:7.10452                                                                                            
[8]	validation-rmse:7.02524             




 86%|██████████████████████████████████████████▏      | 43/50 [08:16<01:19, 11.34s/trial, best loss: 6.791619314152785]




[0]	validation-rmse:11.84588                                                                                           
[1]	validation-rmse:11.34087                                                                                           
[2]	validation-rmse:10.88423                                                                                           
[3]	validation-rmse:10.46788                                                                                           
[4]	validation-rmse:10.09373                                                                                           
[5]	validation-rmse:9.75484                                                                                            
[6]	validation-rmse:9.44863                                                                                            
[7]	validation-rmse:9.17246                                                                                            
[8]	validation-rmse:8.92289             




[0]	validation-rmse:12.00256                                                                                           
 88%|███████████████████████████████████████████      | 44/50 [08:30<01:10, 11.83s/trial, best loss: 6.791619314152785]




[1]	validation-rmse:11.63033                                                                                           
[2]	validation-rmse:11.28227                                                                                           
[3]	validation-rmse:10.95690                                                                                           
[4]	validation-rmse:10.65473                                                                                           
[5]	validation-rmse:10.37165                                                                                           
[6]	validation-rmse:10.10922                                                                                           
[7]	validation-rmse:9.86280                                                                                            
[8]	validation-rmse:9.63453                                                                                            
[9]	validation-rmse:9.42380             




 90%|████████████████████████████████████████████     | 45/50 [08:40<00:56, 11.39s/trial, best loss: 6.791619314152785]




[0]	validation-rmse:11.52578                                                                                           
[1]	validation-rmse:10.77400                                                                                           
[2]	validation-rmse:10.13801                                                                                           
[3]	validation-rmse:9.60613                                                                                            
[4]	validation-rmse:9.14780                                                                                            
[5]	validation-rmse:8.76640                                                                                            
[6]	validation-rmse:8.44211                                                                                            
[7]	validation-rmse:8.17489                                                                                            
[8]	validation-rmse:7.95316             




[0]	validation-rmse:11.10617                                                                                           
 92%|█████████████████████████████████████████████    | 46/50 [09:00<00:54, 13.66s/trial, best loss: 6.791619314152785]




[1]	validation-rmse:10.08828                                                                                           
[2]	validation-rmse:9.29985                                                                                            
[3]	validation-rmse:8.69145                                                                                            
[4]	validation-rmse:8.22902                                                                                            
[5]	validation-rmse:7.88125                                                                                            
[6]	validation-rmse:7.62207                                                                                            
[7]	validation-rmse:7.42698                                                                                            
[8]	validation-rmse:7.27931                                                                                            
[9]	validation-rmse:7.17189             




[0]	validation-rmse:11.74901                                                                                           
                                                                                                                       




[1]	validation-rmse:11.16567
[2]	validation-rmse:10.64626                                                                                           
[3]	validation-rmse:10.18322                                                                                           
[4]	validation-rmse:9.77123                                                                                            
[5]	validation-rmse:9.40661                                                                                            
[6]	validation-rmse:9.08562                                                                                            
[7]	validation-rmse:8.80309                                                                                            
[8]	validation-rmse:8.55363                                                                                            
[9]	validation-rmse:8.33631                                                                                            
[10]	valida




[0]	validation-rmse:11.53896                                                                                           
 96%|███████████████████████████████████████████████  | 48/50 [09:22<00:24, 12.36s/trial, best loss: 6.790096051333509]




[1]	validation-rmse:10.79797                                                                                           
[2]	validation-rmse:10.16530                                                                                           
[3]	validation-rmse:9.62634                                                                                            
[4]	validation-rmse:9.17031                                                                                            
[5]	validation-rmse:8.78607                                                                                            
[6]	validation-rmse:8.46216                                                                                            
[7]	validation-rmse:8.19199                                                                                            
[8]	validation-rmse:7.96761                                                                                            
[9]	validation-rmse:7.78053             




[0]	validation-rmse:11.10942                                                                                           
 98%|████████████████████████████████████████████████ | 49/50 [09:35<00:12, 12.54s/trial, best loss: 6.790096051333509]




[1]	validation-rmse:10.09677                                                                                           
[2]	validation-rmse:9.30952                                                                                            
[3]	validation-rmse:8.70761                                                                                            
[4]	validation-rmse:8.24547                                                                                            
[5]	validation-rmse:7.90087                                                                                            
[6]	validation-rmse:7.64235                                                                                            
[7]	validation-rmse:7.45129                                                                                            
[8]	validation-rmse:7.29972                                                                                            
[9]	validation-rmse:7.19018             




100%|█████████████████████████████████████████████████| 50/50 [09:45<00:00, 11.71s/trial, best loss: 6.790096051333509]
[0]	validation-rmse:9.99779
[1]	validation-rmse:8.58895
[2]	validation-rmse:7.80349
[3]	validation-rmse:7.37479
[4]	validation-rmse:7.14621
[5]	validation-rmse:7.01258
[6]	validation-rmse:6.94435
[7]	validation-rmse:6.90516
[8]	validation-rmse:6.87912
[9]	validation-rmse:6.86029
[10]	validation-rmse:6.85335
[11]	validation-rmse:6.84362
[12]	validation-rmse:6.84287
[13]	validation-rmse:6.84243
[14]	validation-rmse:6.84169
[15]	validation-rmse:6.83336
[16]	validation-rmse:6.83549
[17]	validation-rmse:6.83595
[18]	validation-rmse:6.83437
[19]	validation-rmse:6.83373
[20]	validation-rmse:6.83162
[21]	validation-rmse:6.82980
[22]	validation-rmse:6.82709
[23]	validation-rmse:6.82711
[24]	validation-rmse:6.82633
[25]	validation-rmse:6.82577
[26]	validation-rmse:6.82432
[27]	validation-rmse:6.82308
[28]	validation-rmse:6.82216
[29]	validation-rmse:6.81941
[30]	validation-rmse



'\nwith mlflow.start_run():\n    best_result[\'max_depth\'] = int(best_result[\'max_depth\'])  # Ensure max_depth is an integer\n    mlflow.log_params(best_result)\n    booster = xgb.train(\n        params=best_result,\n        dtrain=train,\n        #num_boost_round=1000, # for processor multi-core Intel i5 with 8GB RAM or higher\n        num_boost_round=200,  # Reduced from 1000 for processor Intel Celeron N3350 with 4GB RAM \n        evals=[(valid, \'validation\')],\n        #early_stopping_rounds=50 # for processor multi-core Intel i5 with 8GB RAM or higher\n        early_stopping_rounds=20  # Reduced from 50 for processor Intel Celeron N3350 with 4GB RAM \n    )\n    y_pred = booster.predict(valid)\n    rmse = mean_squared_error(y_val, y_pred, squared=False)\n    mlflow.log_metric("rmse", rmse)\n\n    with open("models/preprocessor.b", "wb") as f_out:\n        pickle.dump(dv, f_out)\n    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")\n\n    mlflow.xgboo

In [46]:
#mlflow.sklearn.autolog(log_input_examples=False)  # Disable logging of input examples
mlflow.sklearn.autolog() # Enable automatic logging

# Print shapes for debugging
print("Shape of X_train before any transformation:", X_train.shape)
print("Shape of y_train before any transformation:", y_train.shape)

# If X_train and X_val are sparse matrices, convert them to dense
if isinstance(X_train, csr_matrix):
    X_train_dense = X_train.toarray()
else:
    X_train_dense = X_train

if isinstance(X_val, csr_matrix):
    X_val_dense = X_val.toarray()
else:
    X_val_dense = X_val

# Check shapes after conversion
print("Shape of X_train_dense after conversion:", X_train_dense.shape)
print("Shape of y_train after conversion:", y_train.shape)

# Ensure X_train_dense and y_train have consistent lengths
assert X_train_dense.shape[0] == y_train.shape[0], "X_train and y_train must have the same number of samples."

# Define sample size (10% of original data)
sample_size = int(0.1 * X_train_dense.shape[0])

# Randomly sample 10% of the training set
X_train_sampled, y_train_sampled = resample(X_train_dense, y_train, n_samples=sample_size, random_state=42)

# Loop through different models and apply optimizations
for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):
    with mlflow.start_run():
        
        # Log parameters and artifacts
        mlflow.log_param("train-data-path", "/datasets/dtc/green_tripdata_2021-01.parquet")
        #mlflow.log_param("train-data-path", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
        mlflow.log_param("sample-size-train-data", df_train.shape[0])  # Record the number of samples taken
        mlflow.log_param("valid-data-path", "/datasets/dtc/green_tripdata_2021-02.parquet")
        #mlflow.log_param("valid-data-path", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
        mlflow.log_param("sample-size-valid-data", df_valid.shape[0])  # Record the number of samples taken
        
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        # Configure the model based on its class
        if model_class == LinearSVR:
            # Set parameters for LinearSVR
            mlmodel = model_class(max_iter=1000, dual=True, tol=1e-2)  # Use dual=True for compatibility
        elif model_class in (RandomForestRegressor, ExtraTreesRegressor):
            # Use parallel processing for tree-based models
            mlmodel = model_class(n_jobs=-1)
        elif model_class == GradientBoostingRegressor:
            # Enable early stopping for Gradient Boosting
            mlmodel = model_class(n_iter_no_change=5, validation_fraction=0.1)
        else:
            mlmodel = model_class()

        # Fit the model using the sampled training data
        mlmodel.fit(X_train_sampled, y_train_sampled)

        # Make predictions using the original validation set
        y_pred = mlmodel.predict(X_val_dense)

        # Log RMSE using the original validation set
        rmse = root_mean_squared_error(y_val, y_pred)
        #rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

        # Print training time
        print(f"Finished training {model_class.__name__} with RMSE: {rmse}")


Shape of X_train before any transformation: (7391, 3418)
Shape of y_train before any transformation: (7391,)
Shape of X_train_dense after conversion: (7391, 3418)
Shape of y_train after conversion: (7391,)
Finished training RandomForestRegressor with RMSE: 7.259841238023631
Finished training GradientBoostingRegressor with RMSE: 7.020166648537414
Finished training ExtraTreesRegressor with RMSE: 7.403640765063365
Finished training LinearSVR with RMSE: 1506.4530306035401
