## Import libraries

In [89]:
# Import libraries
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Read csv file

In [90]:
data = pd.read_csv("train_null_removed.csv")

In [91]:
data

Unnamed: 0,id,order_hour,day_of_week,restaurant_type,cuisine_type,distance_km,estimated_delivery_min,order_value_inr,num_items,is_peak_hour,weather_condition,traffic_density,delivery_partner_rating,delivery_partner_orders,restaurant_rating,restaurant_avg_prep_min,is_promo_order,area_type,is_late
0,1,6,sunday,cafe,south_indian,10.7,55,1208.0,8,True,humid,moderate,3.6,3393.0,4.3,23.5,False,commercial,on_time
1,2,19,friday,casual_dining,dessert,1.1,78,172.0,12,True,light_rain,gridlock,2.5,2331.0,4.7,23.8,True,college_area,late
2,3,14,sunday,fine_dining,dessert,13.6,14,2314.0,2,True,clear,moderate,4.1,3393.0,3.1,23.0,False,college_area,late
3,4,10,monday,cloud_kitchen,south_indian,7.5,70,2754.0,2,True,clear,gridlock,4.6,2618.0,3.8,26.0,False,residential,late
4,5,7,thursday,fine_dining,chinese,7.2,47,789.0,9,True,light_rain,heavy,3.9,4121.0,4.4,28.9,False,residential,on_time
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2204,2996,5,tuesday,cloud_kitchen,other,3.5,60,741.0,10,True,hot,gridlock,3.8,1676.0,3.2,14.1,False,college_area,late
2205,2997,14,monday,casual_dining,pizza_burger,14.2,35,538.0,6,False,clear,moderate,4.6,1778.0,4.0,41.3,True,residential,on_time
2206,2998,17,wednesday,cafe,other,0.8,71,518.0,5,True,heavy_rain,heavy,2.4,253.0,3.8,13.3,False,market,late
2207,2999,23,friday,casual_dining,italian,14.5,70,1445.0,5,True,hot,heavy,4.2,3167.0,3.7,15.8,True,residential,on_time


## Encoding Categorical Data

In [92]:
cols_category = ['day_of_week', 'restaurant_type', 'cuisine_type', 'is_peak_hour', 'weather_condition', 'traffic_density', 'is_promo_order', 'area_type', 'is_late']

In [93]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), cols_category)
    ],
    remainder = 'passthrough'
)
transformed = preprocessor.fit_transform(data)
encoded_cols = preprocessor.named_transformers_['cat'].get_feature_names_out(cols_category)
all_columns = [col for col in data.columns if col not in cols_category] + list(encoded_cols)
df_encoded = pd.DataFrame(transformed, columns=all_columns)
df_encoded

Unnamed: 0,id,order_hour,distance_km,estimated_delivery_min,order_value_inr,num_items,delivery_partner_rating,delivery_partner_orders,restaurant_rating,restaurant_avg_prep_min,...,traffic_density_moderate,is_promo_order_False,is_promo_order_True,area_type_college_area,area_type_commercial,area_type_highway,area_type_market,area_type_residential,is_late_late,is_late_on_time
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,6.0,10.7,55.0,1208.0,8.0,3.6,3393.0,4.3,23.5
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2.0,19.0,1.1,78.0,172.0,12.0,2.5,2331.0,4.7,23.8
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,14.0,13.6,14.0,2314.0,2.0,4.1,3393.0,3.1,23.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,4.0,10.0,7.5,70.0,2754.0,2.0,4.6,2618.0,3.8,26.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,5.0,7.0,7.2,47.0,789.0,9.0,3.9,4121.0,4.4,28.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2204,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,2996.0,5.0,3.5,60.0,741.0,10.0,3.8,1676.0,3.2,14.1
2205,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2997.0,14.0,14.2,35.0,538.0,6.0,4.6,1778.0,4.0,41.3
2206,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,2998.0,17.0,0.8,71.0,518.0,5.0,2.4,253.0,3.8,13.3
2207,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2999.0,23.0,14.5,70.0,1445.0,5.0,4.2,3167.0,3.7,15.8


In [94]:
cols = data.columns
day_of_week = data['day_of_week'].unique()
restaurant_type = data['restaurant_type'].unique()
cuisine_type = data['cuisine_type'].unique()
weather_condition = data['weather_condition'].unique()
traffic_density = data['traffic_density'].unique()
area_type = data['area_type'].unique()

In [95]:
day_of_week, restaurant_type, cuisine_type, weather_condition, traffic_density, area_type

(array(['sunday', 'friday', 'monday', 'thursday', 'wednesday', 'saturday',
        'tuesday'], dtype=object),
 array(['cafe', 'casual_dining', 'fine_dining', 'cloud_kitchen',
        'fast_food'], dtype=object),
 array(['south_indian', 'dessert', 'chinese', 'biryani', 'italian',
        'pizza_burger', 'indian', 'healthy', 'other'], dtype=object),
 array(['humid', 'light_rain', 'clear', 'fog', 'hot', 'heavy_rain'],
       dtype=object),
 array(['moderate', 'gridlock', 'heavy', 'low'], dtype=object),
 array(['commercial', 'college_area', 'residential', 'market', 'highway'],
       dtype=object))

In [96]:
data

Unnamed: 0,id,order_hour,day_of_week,restaurant_type,cuisine_type,distance_km,estimated_delivery_min,order_value_inr,num_items,is_peak_hour,weather_condition,traffic_density,delivery_partner_rating,delivery_partner_orders,restaurant_rating,restaurant_avg_prep_min,is_promo_order,area_type,is_late
0,1,6,sunday,cafe,south_indian,10.7,55,1208.0,8,True,humid,moderate,3.6,3393.0,4.3,23.5,False,commercial,on_time
1,2,19,friday,casual_dining,dessert,1.1,78,172.0,12,True,light_rain,gridlock,2.5,2331.0,4.7,23.8,True,college_area,late
2,3,14,sunday,fine_dining,dessert,13.6,14,2314.0,2,True,clear,moderate,4.1,3393.0,3.1,23.0,False,college_area,late
3,4,10,monday,cloud_kitchen,south_indian,7.5,70,2754.0,2,True,clear,gridlock,4.6,2618.0,3.8,26.0,False,residential,late
4,5,7,thursday,fine_dining,chinese,7.2,47,789.0,9,True,light_rain,heavy,3.9,4121.0,4.4,28.9,False,residential,on_time
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2204,2996,5,tuesday,cloud_kitchen,other,3.5,60,741.0,10,True,hot,gridlock,3.8,1676.0,3.2,14.1,False,college_area,late
2205,2997,14,monday,casual_dining,pizza_burger,14.2,35,538.0,6,False,clear,moderate,4.6,1778.0,4.0,41.3,True,residential,on_time
2206,2998,17,wednesday,cafe,other,0.8,71,518.0,5,True,heavy_rain,heavy,2.4,253.0,3.8,13.3,False,market,late
2207,2999,23,friday,casual_dining,italian,14.5,70,1445.0,5,True,hot,heavy,4.2,3167.0,3.7,15.8,True,residential,on_time


In [97]:
df_encoded

Unnamed: 0,id,order_hour,distance_km,estimated_delivery_min,order_value_inr,num_items,delivery_partner_rating,delivery_partner_orders,restaurant_rating,restaurant_avg_prep_min,...,traffic_density_moderate,is_promo_order_False,is_promo_order_True,area_type_college_area,area_type_commercial,area_type_highway,area_type_market,area_type_residential,is_late_late,is_late_on_time
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,6.0,10.7,55.0,1208.0,8.0,3.6,3393.0,4.3,23.5
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2.0,19.0,1.1,78.0,172.0,12.0,2.5,2331.0,4.7,23.8
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,14.0,13.6,14.0,2314.0,2.0,4.1,3393.0,3.1,23.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,4.0,10.0,7.5,70.0,2754.0,2.0,4.6,2618.0,3.8,26.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,5.0,7.0,7.2,47.0,789.0,9.0,3.9,4121.0,4.4,28.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2204,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,2996.0,5.0,3.5,60.0,741.0,10.0,3.8,1676.0,3.2,14.1
2205,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2997.0,14.0,14.2,35.0,538.0,6.0,4.6,1778.0,4.0,41.3
2206,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,2998.0,17.0,0.8,71.0,518.0,5.0,2.4,253.0,3.8,13.3
2207,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,2999.0,23.0,14.5,70.0,1445.0,5.0,4.2,3167.0,3.7,15.8


In [98]:
late_cols = ['is_late_late', 'is_late_on_time']

# Input
X = df_encoded.drop(late_cols, axis=1)
# output
y = df_encoded[late_cols].idxmax(axis=1).str.replace('Late_', '', regex=False)
y_numeric = y.map({'is_late_late': 0, 'is_late_on_time': 1})

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [100]:
model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

In [101]:
y_pred = model.predict(X_test)

In [102]:
# Model evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1score = f1_score(y_test, y_pred, average='macro')
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 score: {f1score}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 score: 1.0


In [79]:
# Exporting predictions.csv file
label_map = {'is_late_late': 0, 'is_late_on_time': 1}
y_labels = pd.Series(y_pred).map(label_map)
predictions = pd.DataFrame({
    'id': X.id,
    'prediction': y_labels
})
predictions.to_csv("predictions.csv", index=False)