In [1]:
# Step 1: Import and setup
import pandas as pd
import sys
import os
import warnings

sys.path.append(os.path.abspath(".."))

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

from src.prepare_data import load_and_prepare_data
from src.train_model import train_delivery_model
from src.optimize_conditions import recommend_optimal_config


In [2]:
# Step 2: Load and prepare data

df, feature_cols, target_col, encoder = load_and_prepare_data("/data/amazon_delivery.csv")

In [3]:
# from the dataset description (and investigating it on) Kaggle we know that there is no missing value. So, no need
# for further EDA. Just have a look at the dataset here.
df.head()

Unnamed: 0,Order_ID,Agent_Age,Agent_Rating,Delivery_Time,Order_Hour,Pickup_Delay_Minutes,Distance_km,Vehicle_motorcycle,Vehicle_scooter,Vehicle_van,Weather_Cloudy,Weather_Fog,Weather_Sandstorms,Weather_Stormy,Weather_Sunny,Weather_Windy,Traffic_High,Traffic_Jam,Traffic_Low,Traffic_Medium,Area_Metropolitian,Area_Other,Area_Semi-Urban,Area_Urban,Category_Apparel,Category_Books,Category_Clothing,Category_Cosmetics,Category_Electronics,Category_Grocery,Category_Home,Category_Jewelry,Category_Kitchen,Category_Outdoors,Category_Pet Supplies,Category_Shoes,Category_Skincare,Category_Snacks,Category_Sports,Category_Toys,DayOfWeek_Friday,DayOfWeek_Monday,DayOfWeek_Saturday,DayOfWeek_Sunday,DayOfWeek_Thursday,DayOfWeek_Tuesday,DayOfWeek_Wednesday
0,ialx566343618,37,4.9,120,11,15.0,3.020737,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,akqg208421122,34,4.5,165,19,5.0,20.143737,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,njpu434582536,23,4.4,130,8,15.0,1.549693,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,rjto796129700,38,4.7,105,18,10.0,7.774497,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,zguw716275638,32,4.6,150,13,15.0,6.197898,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [4]:
# Step 3: Train model

model = train_delivery_model(df[feature_cols], df[target_col])


Model RMSE on test set: 22.47 minutes


In [5]:
# Step 4: Define scenario context
# Now that the model is trained, to test how it optimizes delivery for a hypothetic senario, we'll define unconrolable 
# context as follows. Then, in the ortimizer function, all posible combinations of controlable variables will be considered to
# find the optimum option for this case.

context = {
    "Weather": "Sunny",
    "Traffic": "Medium",
    "Area": "Urban",
    "Category": "Electronics",
    "DayOfWeek": "Monday",
    "Order_Hour": 11,
    "Pickup_Delay_Minutes": 15,
    "Distance_km": 4.2
}


In [6]:
# Step 5: Run optimization
best_config = recommend_optimal_config(context, model, encoder)


In [7]:
# Step 6: Display result
print("Best delivery configuration for given scenario:")
print("-" * 50)
print(best_config)


Best delivery configuration for given scenario:
--------------------------------------------------
Vehicle                        Bike
Agent_Age                        25
Agent_Rating                    5.0
Predicted_Delivery_Time    69.25401
Name: 11, dtype: object
