In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder,MinMaxScaler,PowerTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error,r2_score

In [32]:
df=pd.read_csv('/content/final_df (4).csv')
df.head()

Unnamed: 0,person_age,person_ratings,latitude,longitude,delivery_location_latitude,delivery_location_longitude,weather_condition,traffic_density,vehicle_condition,order_type,...,city_name,day,month,day_of_week,is_weekend,pickup_time_in_minutes,order_time_hr,time_slot,distance,distance_type
0,37.0,4.9,22.745049,75.892471,22.765049,75.912471,sunny,High,2,Snack,...,INDO,19,3,Saturday,1,15.0,11.0,Morning,3.025149,short
1,34.0,4.5,12.913041,77.683237,13.043041,77.813237,stormy,Jam,2,Snack,...,BANG,25,3,Friday,0,5.0,19.0,Evening,20.18353,very long
2,23.0,4.4,12.914264,77.6784,12.924264,77.6884,sandstorms,Low,0,Drinks,...,BANG,19,3,Saturday,1,15.0,8.0,Morning,1.552758,short
3,38.0,4.7,11.003669,76.976494,11.053669,77.026494,sunny,Medium,0,Buffet,...,COIMB,5,4,Tuesday,0,10.0,18.0,Evening,7.790401,medium
4,32.0,4.6,12.972793,80.249982,13.012793,80.289982,cloudy,High,1,Snack,...,CHEN,26,3,Saturday,1,15.0,13.0,Afternoon,6.210138,medium


In [33]:
!pip install dagshub mlflow



In [34]:
import dagshub
dagshub.init(repo_owner='amitkumar981', repo_name='swiggy-delivery-time-prediction', mlflow=True)

In [35]:
import mlflow
mlflow.set_tracking_uri('https://dagshub.com/amitkumar981/swiggy-delivery-time-prediction.mlflow')

In [36]:
mlflow.set_experiment('exp-8 built stacking regressor')

<Experiment: artifact_location='mlflow-artifacts:/863f2f159a0649febeac7ea51531b97c', creation_time=1743568465618, experiment_id='8', last_update_time=1743568465618, lifecycle_stage='active', name='exp-8 built stacking regressor', tags={}>

In [37]:
#drop missning values from dataset
df.dropna(inplace=True)

In [38]:
#drop unessary columns from dataset
columns_to_drop=['latitude', 'longitude','delivery_location_latitude', 'delivery_location_longitude','order_time_hr']

In [39]:
df.drop(columns_to_drop,axis=1,inplace=True)

In [40]:
#saprate dependent and independent variable from data set
x=df.drop(columns='time_taken')
y=df['time_taken']

In [41]:
#numarical columns
num_cols=['person_age','person_ratings','pickup_time_in_minutes','distance']
num_cat_cols=['weather_condition','order_type','vehicle_type','multiple_deliveries','city','festival',
             'city_name','day_of_week','time_slot']
ordinal_cat_cols=['traffic_density','distance_type']

In [42]:
traffic_order=['Low ','Medium ','High ','Jam ']
distance_order=['short','medium','long','very long']

In [43]:
#spliting
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)
x_train.shape,x_test.shape
x_train

Unnamed: 0,person_age,person_ratings,weather_condition,traffic_density,vehicle_condition,order_type,vehicle_type,multiple_deliveries,festival,city,city_name,day,month,day_of_week,is_weekend,pickup_time_in_minutes,time_slot,distance,distance_type
24939,21.0,4.5,stormy,Jam,1,Drinks,scooter,0.0,no,urban,CHEN,9,3,Wednesday,0,10.0,Evening,4.656558,short
9439,30.0,4.9,windy,Jam,2,Drinks,scooter,1.0,no,metropolitian,RANCHI,3,3,Thursday,0,15.0,Night,4.527973,short
18409,32.0,5.0,fog,Jam,2,Buffet,scooter,2.0,yes,urban,MUM,13,3,Sunday,1,5.0,Night,4.588899,short
15267,35.0,4.9,fog,Low,2,Snack,scooter,0.0,no,metropolitian,AURG,11,2,Friday,0,10.0,Morning,1.526399,short
43924,23.0,4.7,cloudy,Low,2,Snack,electric_scooter,1.0,no,metropolitian,COIMB,11,3,Friday,0,10.0,Morning,3.116141,short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20042,27.0,4.7,sandstorms,Medium,2,Meal,motorcycle,1.0,no,urban,RANCHI,2,4,Saturday,1,15.0,Evening,13.582511,long
7521,36.0,4.7,stormy,Jam,0,Snack,motorcycle,1.0,no,metropolitian,PUNE,12,3,Saturday,1,5.0,Night,16.852242,very long
13483,34.0,4.8,cloudy,Low,2,Drinks,scooter,1.0,no,metropolitian,JAP,17,3,Thursday,0,10.0,Night,4.469606,short
1038,27.0,4.8,stormy,Low,2,Meal,scooter,1.0,no,metropolitian,AGR,16,2,Wednesday,0,10.0,Night,20.831034,very long


In [44]:
preprocessor=ColumnTransformer(transformers=[
    ('scaling',MinMaxScaler(),num_cols),
    ('OHE',OneHotEncoder(drop='first',handle_unknown='ignore',sparse_output=False,feature_name_combiner='concat'),num_cat_cols),
    ('ordinal_encoding',OrdinalEncoder(categories=[traffic_order,distance_order]),ordinal_cat_cols),
],remainder='passthrough',verbose_feature_names_out=False,force_int_remainder_cols=False)
preprocessor.set_output(transform='pandas')

In [45]:
#apply power transformer on target column
pt=PowerTransformer()
y_train_pt=pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt=pt.transform(y_test.values.reshape(-1,1))

In [46]:
x_train_trans=preprocessor.fit_transform(x_train)
x_test_trans=preprocessor.transform(x_test)
x_train_trans

Unnamed: 0,person_age,person_ratings,pickup_time_in_minutes,distance,weather_condition_fog,weather_condition_sandstorms,weather_condition_stormy,weather_condition_sunny,weather_condition_windy,order_type_Drinks,...,day_of_week_Wednesday,time_slot_Evening,time_slot_Morning,time_slot_Night,traffic_density,distance_type,vehicle_condition,day,month,is_weekend
24939,0.052632,0.80,0.5,0.163629,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,1.0,0.0,0.0,3.0,0.0,1,9,3,0
9439,0.526316,0.96,1.0,0.157036,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,3.0,0.0,2,3,3,0
18409,0.631579,1.00,0.0,0.160160,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,3.0,0.0,2,13,3,1
15267,0.789474,0.96,0.5,0.003145,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,2,11,2,0
43924,0.157895,0.88,0.5,0.084651,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,2,11,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20042,0.368421,0.88,1.0,0.621266,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,2.0,2,2,4,1
7521,0.842105,0.88,0.0,0.788907,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,3.0,3.0,0,12,3,1
13483,0.736842,0.92,0.5,0.154044,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,2,17,3,0
1038,0.368421,0.92,0.5,0.992901,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,3.0,2,16,2,0


In [47]:
!pip install optuna



In [48]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
import optuna
from sklearn.metrics import mean_absolute_error

from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import StackingRegressor

In [49]:
lightgbm_params={'num_leaves': 80,
 'max_depth': 7,
 'learning_rate': 0.07521051144940308,
 'n_estimators': 350,
 'lambda_l1': 4.994553000952651,
 'lambda_l2': 1.8523641834042492,
 'min_data_in_leaf': 45,
 'feature_fraction': 0.7095710565305883,
 'bagging_fraction': 0.9976539330187573,
 'bagging_freq': 4}

rf_params={'n_estimators': 400,
 'max_depth': 21,
 'min_samples_split': 18,
 'min_samples_leaf': 10,
 'bootstrap': True,
 'max_samples': 0.7634340439115328,
 'min_impurity_decrease': 3.465395379320018e-05}

lgbm_regressor=LGBMRegressor(**lightgbm_params)
rf_regressor=RandomForestRegressor(**rf_params)

In [50]:
def objective(trial):
    with mlflow.start_run(nested=True):
        # Choose meta-model
        meta_model_name = trial.suggest_categorical("model", ["LR", "KNN", "DT"])

        if meta_model_name == "LR":
            meta_model = LinearRegression()

        elif meta_model_name == "KNN":
            n_neighbors = trial.suggest_int("n_neighbors", 2, 20)
            knn_weights = trial.suggest_categorical("knn_weights", ["uniform", "distance"])
            meta_model = KNeighborsRegressor(n_neighbors=n_neighbors, weights=knn_weights)

        elif meta_model_name == "DT":
            max_depth_dt = trial.suggest_int("max_depth_dt", 1, 10)
            min_samples_split_dt = trial.suggest_int("min_samples_split_dt", 2, 10)
            min_samples_leaf_dt = trial.suggest_int("min_samples_leaf_dt", 1, 10)
            meta_model = DecisionTreeRegressor(
                max_depth=max_depth_dt,
                min_samples_split=min_samples_split_dt,
                min_samples_leaf=min_samples_leaf_dt,
                random_state=42,
            )

        # Log chosen meta-model
        mlflow.log_param("meta_model", meta_model_name)

        # Define Stacking Regressor
        stacking_reg = StackingRegressor(
            estimators=[("lgbm", lgbm_regressor), ("rf", rf_regressor)],
            final_estimator=meta_model,
            n_jobs=-1
        )

        # Wrap with Target Transformer (if applicable)
        model = TransformedTargetRegressor(regressor=stacking_reg, transformer=pt)

        #fit model

        model.fit(x_train_trans,y_train)

      #predict
        y_pred=model.predict(x_train_trans)

       #calculate error
        error=mean_absolute_error(y_train,y_pred)

      #log error
        mlflow.log_metric('train_error',error)

        return error






In [51]:
study=optuna.create_study(direction='minimize')

with mlflow.start_run(run_name='best_model'):

    #optimize
    study.optimize(objective,n_trials=10,show_progress_bar=True)

    #log params
    mlflow.log_params(study.best_params)

    #log best value

    mlflow.log_metric('best_value',study.best_value)



[I 2025-04-02 04:35:08,637] A new study created in memory with name: no-name-3280ee64-dd7a-4098-a1d9-80b8de1a6f83


  0%|          | 0/10 [00:00<?, ?it/s]

🏃 View run blushing-panda-139 at: https://dagshub.com/amitkumar981/swiggy-delivery-time-prediction.mlflow/#/experiments/8/runs/72fdb4c08c4d4fd7b3a7be184bd1d87a
🧪 View experiment at: https://dagshub.com/amitkumar981/swiggy-delivery-time-prediction.mlflow/#/experiments/8
[I 2025-04-02 04:40:33,746] Trial 0 finished with value: 3.043201936948876 and parameters: {'model': 'KNN', 'n_neighbors': 5, 'knn_weights': 'uniform'}. Best is trial 0 with value: 3.043201936948876.
🏃 View run intrigued-panda-576 at: https://dagshub.com/amitkumar981/swiggy-delivery-time-prediction.mlflow/#/experiments/8/runs/61454b70e4ea4e1b80c9b13ef7545028
🧪 View experiment at: https://dagshub.com/amitkumar981/swiggy-delivery-time-prediction.mlflow/#/experiments/8
[I 2025-04-02 04:45:36,844] Trial 1 finished with value: 2.965596877661748 and parameters: {'model': 'KNN', 'n_neighbors': 7, 'knn_weights': 'uniform'}. Best is trial 1 with value: 2.965596877661748.
🏃 View run beautiful-shark-483 at: https://dagshub.com/amit

In [52]:
study.best_params

{'model': 'LR'}

In [54]:
study.best_value

2.765030919222006

In [55]:
# optimization history plot

optuna.visualization.plot_optimization_history(study)

In [56]:
# parallel coord plot

optuna.visualization.plot_parallel_coordinate(study,params=["model"])