In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

file_path = "amazon_delivery.csv"
df = pd.read_csv(file_path)

selected_columns = [
    'Store_Latitude', 'Store_Longitude', 'Drop_Latitude', 'Drop_Longitude',
    'Order_Date', 'Order_Time', 'Pickup_Time', 'Weather', 'Traffic',
    'Vehicle', 'Area', 'Delivery_Time'
]
df_selected = df[selected_columns]

df_selected = df_selected.dropna()

df_selected['Order_Date'] = pd.to_datetime(df_selected['Order_Date'])
df_selected['Day_of_Week'] = df_selected['Order_Date'].dt.dayofweek
df_selected['Order_Time'] = pd.to_datetime(df_selected['Order_Time'])
df_selected['Pickup_Time'] = pd.to_datetime(df_selected['Pickup_Time'])
df_selected['Time_To_Pickup'] = (df_selected['Pickup_Time'] - df_selected['Order_Time']).dt.total_seconds() / 60.0

df_selected = df_selected.drop(columns=['Order_Date', 'Order_Time', 'Pickup_Time'])

df_selected = pd.get_dummies(df_selected, columns=['Weather', 'Traffic', 'Vehicle', 'Area'])

df_selected['Distance'] = np.sqrt(
    (df_selected['Drop_Latitude'] - df_selected['Store_Latitude'])**2 +
    (df_selected['Drop_Longitude'] - df_selected['Store_Longitude'])**2
)

X = df_selected.drop(columns=['Delivery_Time'])
y = df_selected['Delivery_Time']

poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'reg_alpha': [0, 1],
    'reg_lambda': [0.1, 1]
}

xg_reg = xgb.XGBRegressor(objective='reg:squarederror')
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_grid,
                                   n_iter=100, scoring='neg_mean_squared_error',
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
best_params = random_search.best_params_
print(f"Best parameters: {best_params}")

xg_reg_best = xgb.XGBRegressor(**best_params)
xg_reg_best.fit(X_train, y_train)

def create_dnn_model():
    model = Sequential()
    model.add(Dense(512, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(1, activation='linear'))

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

dnn_model = create_dnn_model()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
history = dnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2,
                        callbacks=[early_stopping, reduce_lr])

y_pred_xgboost = xg_reg_best.predict(X_test)
rmse_xgboost = np.sqrt(mean_squared_error(y_test, y_pred_xgboost))
print(f"XGBoost RMSE: {rmse_xgboost}")

y_pred_dnn = dnn_model.predict(X_test)
rmse_dnn = np.sqrt(mean_squared_error(y_test, y_pred_dnn))
print(f"DNN RMSE: {rmse_dnn}")


  df_selected['Order_Time'] = pd.to_datetime(df_selected['Order_Time'])
  df_selected['Pickup_Time'] = pd.to_datetime(df_selected['Pickup_Time'])


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Best parameters: {'subsample': 1.0, 'reg_lambda': 1, 'reg_alpha': 0, 'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.1, 'colsample_bytree': 0.8}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 13ms/step - loss: 15709.2549 - val_loss: 4255.7441 - learning_rate: 0.0010
Epoch 2/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - loss: 3115.7847 - val_loss: 2030.1581 - learning_rate: 0.0010
Epoch 3/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 2207.1680 - val_loss: 2027.9120 - learning_rate: 0.0010
Epoch 4/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 2162.5334 - val_loss: 2004.8718 - learning_rate: 0.0010
Epoch 5/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - loss: 2175.6304 - val_loss: 2030.3141 - learning_rate: 0.0010
Epoch 6/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - loss: 2116.8066 - val_loss: 2016.5388 - learning_rate: 0.0010
Epoch 7/100
[1m873/873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 14

In [60]:
# Save XGBoost model
xg_reg_best.save_model("xgboost_model.json")

# Save DNN model
dnn_model.save("dnn_model.h5")

from google.colab import files
files.download("xgboost_model.json")
files.download("dnn_model.h5")





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
import numpy as np
from sklearn.metrics import mean_squared_error

# Predict using both trained models
y_pred_xgb = xg_reg_best.predict(X_test)
y_pred_dnn = dnn_model.predict(X_test).flatten()

# Search for best weight to combine them
best_rmse = float("inf")
best_weight = 0.0

weights = np.linspace(0, 1, 101)  # Try weights from 0.00 to 1.00 (step of 0.01)

for w in weights:
    y_pred_combined = w * y_pred_xgb + (1 - w) * y_pred_dnn
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_combined))

    if rmse < best_rmse:
        best_rmse = rmse
        best_weight = w

# Final ensemble prediction using the best weight
y_pred_ensemble = best_weight * y_pred_xgb + (1 - best_weight) * y_pred_dnn

print(f"✅ Best Ensemble RMSE: {best_rmse:.4f}")
print(f"📊 Best Weight → XGBoost: {best_weight:.2f}, DNN: {1 - best_weight:.2f}")


[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
✅ Best Ensemble RMSE: 40.8437
📊 Best Weight → XGBoost: 0.77, DNN: 0.23


In [16]:
import numpy as np
import random

# Ensure X_test and y_test are NumPy arrays
X_test_array = np.array(X_test)
y_test_array = np.array(y_test)

# Choose a random test index
random_index = random.randint(0, len(X_test_array) - 1)

# Select single test sample
single_order_features = X_test_array[random_index:random_index + 1]
actual_delivery_time = y_test_array[random_index]

# Predict using both models
xgb_pred = xg_reg_best.predict(single_order_features)[0]
dnn_pred = dnn_model.predict(single_order_features)[0][0]

# Ensemble prediction
ensemble_pred = best_weight * xgb_pred + (1 - best_weight) * dnn_pred

# Output
print(f"🔮 Predicted Delivery Time: {ensemble_pred:.2f} hours")
print(f"📦 Actual Delivery Time: {actual_delivery_time:.2f} hours")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
🔮 Predicted Delivery Time: 93.22 hours
📦 Actual Delivery Time: 80.00 hours
