# 📦 Delivery Time Prediction for RapidLogistics

This project builds a Machine Learning model to predict delivery time for shipments based on package details, transport type, and delay factors like traffic and weather.

In [1]:
## Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
## Step 2: Load the Dataset
df = pd.read_csv("Delivery Time Prediction for RapidLogistics.csv")
df.head()

Unnamed: 0,shipment_id,origin_city,destination_city,package_weight_kg,package_size_cu_m,transport_mode,historical_avg_delivery_time_hrs,traffic_delay_hrs,weather_delay_hrs,estimated_delivery_time_hrs
0,SHIP10000,Delhi,Chennai,11.0,0.55,Air,18,1.0,0.52,19.52
1,SHIP10001,Bangalore,Mumbai,10.83,0.53,Rail,36,0.76,0.3,37.06
2,SHIP10002,Chennai,Chennai,3.09,0.39,Road,91,0.65,0.16,91.81
3,SHIP10003,Chennai,Hyderabad,8.86,0.36,Road,111,0.49,0.45,111.94
4,SHIP10004,Delhi,Delhi,14.43,0.29,Road,76,0.85,0.01,76.86


In [4]:
## Step 3: Data Preprocessing
# Check for null values
print("Null Values Check:\n", df.isnull().sum())

Null Values Check:
 shipment_id                         0
origin_city                         0
destination_city                    0
package_weight_kg                   0
package_size_cu_m                   0
transport_mode                      0
historical_avg_delivery_time_hrs    0
traffic_delay_hrs                   0
weather_delay_hrs                   0
estimated_delivery_time_hrs         0
dtype: int64


In [6]:
# Create separate LabelEncoders
le_origin = LabelEncoder()
le_dest = LabelEncoder()
le_mode = LabelEncoder()

In [21]:
# Encode categorical variables
df['origin_city'] = le_origin.fit_transform(df['origin_city'])
df['destination_city'] = le_dest.fit_transform(df['destination_city'])
df['transport_mode'] = le_mode.fit_transform(df['transport_mode'])


In [8]:
# Drop shipment_id
df.drop('shipment_id', axis=1, inplace=True)

In [9]:
## Step 4: Feature and Target Split
X = df.drop('estimated_delivery_time_hrs', axis=1)
y = df['estimated_delivery_time_hrs']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
## Step 5: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
## Step 6: Model Evaluation
y_pred = model.predict(X_test)

print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

MAE: 0.37213450000000364
MSE: 0.2352646669500023
RMSE: 0.48504089203901385
R2 Score: 0.999734097143482


In [23]:
## Step 7: Predict New Delivery Time (Sample)
sample = pd.DataFrame({
    'origin_city': [le_origin.transform(['Delhi'])[0]],
    'destination_city': [le_dest.transform(['Chennai'])[0]],
    'package_weight_kg': [12],
    'package_size_cu_m': [0.6],
    'transport_mode': [le_mode.transform(['Air'])[0]],
    'historical_avg_delivery_time_hrs': [18],
    'traffic_delay_hrs': [1.2],
    'weather_delay_hrs': [0.3]
})

predicted_time = model.predict(sample)
print("Predicted Delivery Time:", predicted_time[0], "hours")

Predicted Delivery Time: 19.60169999999999 hours
