# Food Delivery Time Prediction

The goals of this notebook are:

- Load the dataset
- Create and refine features where needed
- Export cleaned data for further testing when training models

In [18]:
import numpy as np
import pandas as pd
import random
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

In [19]:
SEED = 7
TARGET = "Delivery_Time_min"

random.seed(SEED)
np.random.seed(SEED)

In [20]:
df = pd.read_csv('../data/food_delivery_data.csv')

In [21]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


## Feature Engineering

### Handle Missing Values

In [22]:
df.isna().sum()

Order_ID                   0
Distance_km                0
Weather                   30
Traffic_Level             30
Time_of_Day               30
Vehicle_Type               0
Preparation_Time_min       0
Courier_Experience_yrs    30
Delivery_Time_min          0
dtype: int64

In [23]:
cols = ["Weather", "Traffic_Level", "Time_of_Day"]

constant_imputer = SimpleImputer(strategy="most_frequent")
df[cols] = constant_imputer.fit_transform(df[cols])

In [24]:
cols = ["Courier_Experience_yrs"]

median_imputer = SimpleImputer(strategy="median")
df[cols] = median_imputer.fit_transform(df[cols])

In [25]:
df.isna().sum()

Order_ID                  0
Distance_km               0
Weather                   0
Traffic_Level             0
Time_of_Day               0
Vehicle_Type              0
Preparation_Time_min      0
Courier_Experience_yrs    0
Delivery_Time_min         0
dtype: int64

### Encode Categorical Columns

In [26]:
cols = ["Weather", "Time_of_Day", "Vehicle_Type", "Traffic_Level"]

onehot_encoder = OneHotEncoder(sparse_output=False)
encoded_array = onehot_encoder.fit_transform(df[cols])
encoded_cols = onehot_encoder.get_feature_names_out(cols)

encoded_df = pd.DataFrame(encoded_array, columns=encoded_cols)

df = pd.concat([df.drop(columns=cols), encoded_df], axis=1)

In [None]:
# Remove Traffic_Level from onehot encoding columns if using ordinal encoding
cols = ["Traffic_Level"]

ordinal_enconder = OrdinalEncoder()
df[cols] = ordinal_enconder.fit_transform(df[cols])

In [28]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min,Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Weather_Windy,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Bike,Vehicle_Type_Car,Vehicle_Type_Scooter,Traffic_Level_High,Traffic_Level_Low,Traffic_Level_Medium
0,522,7.93,12,1.0,43,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,738,16.42,20,2.0,84,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,741,9.52,28,1.0,59,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
3,661,7.44,5,1.0,37,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,412,19.03,16,5.0,68,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


## Save data

In [29]:
df.to_csv("../data/testing_data.csv", index=False)