In [43]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVR

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

import mlflow

In [44]:
# set the dagshub tracking server

mlflow.set_tracking_uri("https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow")


In [45]:
import dagshub
dagshub.init(repo_owner='akashagalaveaaa1', repo_name='Taxi-Demand-Prediction-System', mlflow=True)

In [46]:
# load the training and test data

train_data_path = "../data/processed/train.csv"
test_data_path = "../data/processed/test.csv"

train_df = pd.read_csv(train_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

test_df = pd.read_csv(test_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

train_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,187,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,194,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,180,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,197,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185,185.0,4
...,...,...,...,...,...,...,...,...
2016-02-29 22:45:00,15.0,9.0,11.0,11.0,29,12,12.0,0
2016-02-29 23:00:00,12.0,15.0,9.0,11.0,29,17,14.0,0
2016-02-29 23:15:00,17.0,12.0,15.0,9.0,29,15,14.0,0
2016-02-29 23:30:00,15.0,17.0,12.0,15.0,29,15,15.0,0


In [47]:
# missing value in training data

train_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [48]:
# missing values in the test data

test_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [49]:
# make X_train and y_train

X_train = train_df.drop(columns=["total_pickups"])

y_train = train_df["total_pickups"]

In [50]:
X_train.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185.0,4


In [51]:
# make X_test and y_test

X_test = test_df.drop(columns=["total_pickups"])

y_test = test_df["total_pickups"]

In [52]:
X_test.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-01 00:00:00,36.0,44.0,31.0,29.0,0,39.0,1
2016-03-01 00:15:00,41.0,36.0,44.0,31.0,0,37.0,1
2016-03-01 00:30:00,35.0,41.0,36.0,44.0,0,41.0,1
2016-03-01 00:45:00,47.0,35.0,41.0,36.0,0,38.0,1
2016-03-01 01:00:00,34.0,47.0,35.0,41.0,0,35.0,1


In [53]:
from sklearn import set_config

set_config(transform_output="pandas")

In [54]:
# encode the data

encoder = ColumnTransformer([
    ("ohe", OneHotEncoder(drop="first",sparse_output=False), ["region","day_of_week"])
], remainder="passthrough", n_jobs=-1,force_int_remainder_cols=False)

In [55]:
encoder

In [56]:
# encode the train and test data

X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

In [57]:
import optuna
import tqdm 

In [58]:
# set the experiment

mlflow.set_experiment("Model Selection")

<Experiment: artifact_location='mlflow-artifacts:/03e27324340f498f8c4539333a5763c2', creation_time=1746980667373, experiment_id='0', last_update_time=1746980667373, lifecycle_stage='active', name='Model Selection', tags={}>

In [59]:
def objective(trial):
    # start the child run
    with mlflow.start_run(nested=True) as child:
        
        # model name search space
        list_of_models = ["LR", "RF", "GBR", "XGBR"]
        model_name = trial.suggest_categorical("model_name", list_of_models)
    
        if model_name == "LR":
            model = LinearRegression()
    
        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,100,step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf",3,10)
            model = RandomForestRegressor(n_estimators=n_estimators_rf, 
                                          max_depth=max_depth_rf, 
                                          random_state=42, n_jobs=-1)
    
        elif model_name == "GBR":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,100,step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",1e-4,1e-1, log=True)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb, 
                                              learning_rate=learning_rate_gb,
                                             random_state=42)
    
        elif model_name == "XGBR":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,100,step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",1e-4,1e-1, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",3,10)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                learning_rate=learning_rate_xgb,
                                max_depth=max_depth_xgb)
    
        # log the model name
        mlflow.log_param("model_name",model_name)
        
        # log the model parameters
        mlflow.log_params(model.get_params())
        
        # fit on the data
        model.fit(X_train_encoded,y_train)
    
        # get the predictions
        y_pred = model.predict(X_test_encoded)
    
        # calculate the loss
        loss = mean_absolute_percentage_error(y_test, y_pred)
    
        # log the metric
        mlflow.log_metric("MAPE",loss)
        return loss

In [60]:
# optimize the objective function

with mlflow.start_run(run_name="best_model", nested=True) as parent:

    # create a study object
    study = optuna.create_study(study_name="model_selection", direction="minimize")
    # optimize the objective function
    study.optimize(func=objective, n_trials=50, n_jobs=-1)
    
    # log the best parameters
    mlflow.log_params(study.best_params)
    # log the best error value
    mlflow.log_metric("Best_MAPE", study.best_value)

[I 2025-05-11 22:32:21,448] A new study created in memory with name: model_selection


🏃 View run capable-fawn-488 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/45cc198f9db04e6687eb268d8465c682
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run melodic-grouse-301 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/f73f4c51ad3948e19086033697fac67e
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run exultant-shrew-756 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/b2a38e1392be4ca7b2add8a53e73ee22
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run rambunctious-jay-992 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/615028fed3fd4ddcbeb85f17756fe927
🧪 View experimen

[I 2025-05-11 22:33:06,428] Trial 4 finished with value: 6.438340187072754 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.00024120592078432892, 'max_depth_xgb': 6}. Best is trial 4 with value: 6.438340187072754.


🏃 View run big-slug-906 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/0accc17a030f4b6581e81892ea5bb363
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run upset-grub-870 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/39b0872c510140ad94965d00c9ff480b
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:33:09,441] Trial 8 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:10,448] Trial 1 finished with value: 5.961167812347412 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 80, 'learning_rate_xgb': 0.0012458595457911774, 'max_depth_xgb': 9}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:11,425] Trial 5 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run funny-trout-311 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/7f873f8af27d4048b9dae1ebceffb38f
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:33:13,444] Trial 2 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:14,449] Trial 10 finished with value: 0.3824175000190735 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 80, 'learning_rate_xgb': 0.0502853441898907, 'max_depth_xgb': 3}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:17,422] Trial 7 finished with value: 6.57019567489624 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.00011264289279815445, 'max_depth_xgb': 6}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:18,445] Trial 11 finished with value: 6.4469828605651855 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.0020328058188813494, 'max_depth_xgb': 6}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:22,444] Trial 6 finished with value: 4.229926586151123 and 

🏃 View run learned-midge-557 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/02e05e15b45b4219a776b00036fe4e3e
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:33:35,443] Trial 0 finished with value: 0.17689965791966933 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 7}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:33:46,428] Trial 9 finished with value: 0.5436767136482558 and parameters: {'model_name': 'RF', 'n_estimators_rf': 70, 'max_depth_rf': 3}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run dazzling-frog-327 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/6ed0e5d2ef7d4a439664df4ec5d65220
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run puzzled-squirrel-874 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/f0e907b10990463f8c81c3dbb0379075
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run rogue-koi-536 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/75173b844aba4ff8a467044bb8c0113a
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run flawless-croc-489 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/4a812e8c8cc14a2aba1fd69b0a15c0ac
🧪 View experiment at:

[I 2025-05-11 22:34:09,461] Trial 13 finished with value: 5.91038703918457 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 30, 'learning_rate_xgb': 0.0037886116700582948, 'max_depth_xgb': 3}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:10,478] Trial 14 finished with value: 0.5924916863441467 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.04764808583460656, 'max_depth_xgb': 3}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:11,477] Trial 15 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run flawless-bass-96 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/cdc9e058ac474c73b54c1319faada2e2
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:34:13,444] Trial 16 finished with value: 6.058462142944336 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.001248258497056549, 'max_depth_xgb': 3}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:14,484] Trial 17 finished with value: 6.062399864196777 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.001181491597946713, 'max_depth_xgb': 7}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:16,427] Trial 12 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:18,436] Trial 19 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:22,416] Trial 18 finished with value: 2.719686985015869 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.0137

🏃 View run useful-kit-99 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/d7fdb1cc1d504a469083ec6d6d647759
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run classy-vole-920 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/9593de433ada4e168b8979399d72a8c6
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run gregarious-finch-280 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/631a625270a0482ab1df0c34d6007bd2
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:34:37,461] Trial 20 finished with value: 4.478214263916016 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 30, 'learning_rate_xgb': 0.013033433070160305, 'max_depth_xgb': 5}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:43,481] Trial 21 finished with value: 6.48221418074998 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 40, 'learning_rate_gb': 0.00038688857976728534}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:34:45,441] Trial 22 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run loud-seal-832 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/b8e682011d97421f8c1ff90b7e0ccfa1
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run shivering-shark-813 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/bb036287942f4237932492efd2e2f0c8
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run grandiose-colt-434 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/a3e1748a44fe4b52b03c0a9244e604db
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:35:06,442] Trial 24 finished with value: 6.522342222392143 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 10, 'learning_rate_gb': 0.0008930980987753327}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:08,428] Trial 25 finished with value: 6.5275204501208925 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 10, 'learning_rate_gb': 0.0008089296926071863}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run unruly-mare-784 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/80bd708ee5044731a9dfa09b3d5755eb
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run masked-vole-60 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/6445c377afd845c581559ee9af82b0b8
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:35:13,445] Trial 23 finished with value: 5.109613689531225 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 40, 'learning_rate_gb': 0.006731624891181981}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:18,428] Trial 3 finished with value: 0.23773064042053363 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 5}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:20,426] Trial 27 finished with value: 6.457444962911486 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 30, 'learning_rate_gb': 0.000650935072943228}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run rogue-worm-731 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/28f8f21159b14484a07940403d6bd104
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run upbeat-stag-470 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/dcf585e6aab943d0b22462b3b9467456
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run adorable-ray-457 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/141f9896e40f4254bd63406b0c577b66
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run calm-fowl-747 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/3283438ff3354ca9a940c74a21627dac
🧪 View experiment at: https://

[I 2025-05-11 22:35:33,179] Trial 31 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run treasured-skunk-931 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/ae95bafbd032461788e021b6d48ecb06
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:35:35,411] Trial 32 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:37,429] Trial 33 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:39,419] Trial 28 finished with value: 5.83290553345908 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 60, 'learning_rate_gb': 0.0021294187772420104}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:35:41,456] Trial 26 finished with value: 4.783267953909393 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 60, 'learning_rate_gb': 0.0056774706698556}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run dashing-kite-741 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/12ca8e13ca914ec59781869ef41837a1
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run efficient-panda-148 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/cf4148a651514943972e1cb67d6d1627
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:03,445] Trial 29 finished with value: 0.6690585888430117 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.0267435263611706}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run valuable-bat-96 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/4df02d9f2b4f4ee2a26da8358eec989a
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:06,448] Trial 35 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run brawny-snipe-258 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/193fa4c69309407899463929ec1ba1b2
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run thundering-ox-347 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/f91a44758ff14faa9c6b2d800771b658
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run legendary-flea-45 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/37c491016003464d8ed649fede893401
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:18,437] Trial 37 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:36:20,451] Trial 38 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run sneaky-gnat-396 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/d2ee5f3658e24e2da42b0a54c5684958
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:26,420] Trial 30 finished with value: 2.9628216198297115 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 60, 'learning_rate_gb': 0.014436618355648273}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run bright-ape-291 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/488a8374c26d4b12b3804e03daed2fda
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:28,447] Trial 34 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run resilient-ape-538 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/26912c9b2d5d422b9b65a9cfe0671259
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run fun-perch-696 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/8514e3733ac2429098ea9b966bea64b9
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run gaudy-yak-121 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/27e39d02b9bb44198b89e0e3659c519e
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:35,445] Trial 40 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:36:37,414] Trial 41 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[W 2025-05-11 22:36:38,422] Trial 49 failed with parameters: {} because of the following error: RestException('403: You have reached the limit of 100 experiments for a private repository in the free plan.\nPlease upgrade to the Starter plan to increase the limit at https://dagshub.com/pricing?repo_id=75854').
Traceback (most recent call last):
  File "c:\Final Projects\Taxi Demand Prediction System\TAXI\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\akash\AppData\Local\Temp\ipykernel_10780\2228030350.py", line 3, in objective
    with mlflow.start_run(nest

🏃 View run melodic-hare-406 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/172f5af41b9a42c382f9015def65108d
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run unique-crane-80 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/659658fbfeb446a4a3ad3e00ea8338f6
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0
🏃 View run monumental-hen-753 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/2120b142e3fd4ba5a3fd8aef20dc0782
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:53,419] Trial 39 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:36:54,417] Trial 44 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.
[W 2025-05-11 22:36:55,394] Trial 46 failed with parameters: {} because of the following error: RestException('403: You have reached the limit of 100 experiments for a private repository in the free plan.\nPlease upgrade to the Starter plan to increase the limit at https://dagshub.com/pricing?repo_id=75854').
Traceback (most recent call last):
  File "c:\Final Projects\Taxi Demand Prediction System\TAXI\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\akash\AppData\Local\Temp\ipykernel_10780\2228030350.py", line 3, in objective
    with mlflow.start_run(nest

🏃 View run bittersweet-grub-673 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/9df26b629474436ab26ed48ba4c5c45a
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:36:58,461] Trial 45 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run adaptable-midge-422 at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/27d5fab23bb149128e7a5bf4399ff20f
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


[I 2025-05-11 22:37:00,445] Trial 47 finished with value: 0.13029011350572198 and parameters: {'model_name': 'RF', 'n_estimators_rf': 10, 'max_depth_rf': 10}. Best is trial 8 with value: 0.07934790285463077.
[I 2025-05-11 22:37:01,430] Trial 48 finished with value: 0.07934790285463077 and parameters: {'model_name': 'LR'}. Best is trial 8 with value: 0.07934790285463077.


🏃 View run best_model at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0/runs/4b0ecb279b4d436db637cd5831ada58d
🧪 View experiment at: https://dagshub.com/akashagalaveaaa1/Taxi-Demand-Prediction-System.mlflow/#/experiments/0


In [61]:
# best value

study.best_value

0.07934790285463077

In [62]:
# best parameters

study.best_params

{'model_name': 'LR'}

In [63]:
# model value counts

study.trials_dataframe()['params_model_name'].value_counts()

params_model_name
LR      23
XGBR    12
GBR      9
RF       4
Name: count, dtype: int64

In [68]:
# train the linear regression model

lr = LinearRegression()

lr.fit(X_train_encoded, y_train)

# get predictions
y_pred_train = lr.predict(X_train_encoded) 
y_pred_test = lr.predict(X_test_encoded)

# loss

mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

print("The training error is ", mape_train)
print("The test error is ", mape_test)

The training error is  0.0877801330456651
The test error is  0.07934790285463077


In [69]:
lr.coef_

array([-2.33737604,  0.71512405, -0.55601505, -1.25311068, -3.20463231,
       -0.86685973, -2.79925402, -3.62516859,  0.41386463, -2.9376376 ,
       -1.97624678, -3.75050442,  0.51806283, -2.54033388, -2.43297463,
        0.47632075,  0.61254786, -4.7417372 , -2.03077217, -1.26960984,
       -4.03690273, -2.08863167, -1.0414428 ,  0.73561736, -0.99999442,
       -0.85944985, -2.43098478,  0.67112238,  0.57385071, -0.11719951,
       -0.28045898, -0.37180749, -0.5238324 , -0.4233113 , -0.34045774,
       -0.54170892, -0.36264553, -0.2493965 , -0.31905518,  2.4912456 ])

In [70]:
def tune_ridge(trial):
    # hyperparameter space
    alpha = trial.suggest_float("alpha",30,100)
    
    # make the model object
    ridge = Ridge(alpha=alpha, random_state=42)
    
    # train the model
    ridge.fit(X_train_encoded, y_train)
    
    # get predictions
    y_pred = ridge.predict(X_test_encoded)
    
    # calculate loss
    loss = mean_absolute_percentage_error(y_test, y_pred)

    return loss
        

In [71]:
# create study

study = optuna.create_study(study_name="tune_model", direction="minimize")

[I 2025-05-11 22:42:07,539] A new study created in memory with name: tune_model


In [72]:
# optimize

study.optimize(func=tune_ridge, n_trials=100, n_jobs=-1, show_progress_bar=True)

  0%|          | 0/100 [00:00<?, ?it/s]

[I 2025-05-11 22:42:12,124] Trial 7 finished with value: 0.07918731340757482 and parameters: {'alpha': 82.35288129589719}. Best is trial 1 with value: 0.07918274413809458.
[I 2025-05-11 22:42:12,127] Trial 1 finished with value: 0.07918274413809458 and parameters: {'alpha': 85.64668690776647}. Best is trial 1 with value: 0.07918274413809458.
[I 2025-05-11 22:42:12,345] Trial 9 finished with value: 0.0791850962110863 and parameters: {'alpha': 83.94035572728858}. Best is trial 1 with value: 0.07918274413809458.
[I 2025-05-11 22:42:12,431] Trial 8 finished with value: 0.07922152819872504 and parameters: {'alpha': 59.54826345670036}. Best is trial 1 with value: 0.07918274413809458.
[I 2025-05-11 22:42:12,433] Trial 2 finished with value: 0.0792245564542275 and parameters: {'alpha': 57.684611192666495}. Best is trial 1 with value: 0.07918274413809458.
[I 2025-05-11 22:42:12,434] Trial 10 finished with value: 0.07925183372755243 and parameters: {'alpha': 42.13778557747433}. Best is trial 1 w

In [73]:
# best parameters

study.best_params

{'alpha': 99.97541126726539}

In [74]:
# best value

study.best_value

0.07916409574411024