In [4]:
# Scenario: Predicting Delivery Time for E-commerce Orders
# An e-commerce company wants to predict how long an order will take to deliver based on:
# Distance to customer (km)
# Number of items in the order
# Traffic level (1 = Low, 2 = Medium, 3 = High)
# Warehouse processing time (hours)
# Since multiple factors affect delivery time, they use Multiple Linear Regression.
# DeliveryTime=b0​+b1​(Distance)+b2​(Items)+b3​(Traffic)+b4​(ProcessingTime).

# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import mean_absolute_error, r2_score

# df = pd.read_csv("Delievery_dataset.csv")
# print("Dataset")
# print(df)

# X = df[["Distance_km", "Items", "Traffic_Level", "Processing_Time_hr"]]
# y = df["Processing_Time_hr"]
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, radom_state=42
# )
# model = LinearRegression()
# model.fit(X_train, y_train)
# print("\nSlopes (b1, b2, b3, b4):", model.coef_)
# print("Intercept (b0):", model.intercept_)
# y_pred = model.predict(X_test)

# print("\nActual vs Predicted:")
# for actual, pred in zip(y_test, y_pred):
#     print(f"Actual: {actual:.2f}, Predicted: {pred:.2f}")

# mae = mean_absolute_error(y_test, y_pred)
# r2 = r2_score(y_test, y_pred)

# print("\nMean Absolute Error:", mae)
# print("R2 Score:", r2)




import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

df = pd.read_csv("Delievery_dataset.csv")

print("Dataset:")
print(df.head())
print("\nColumns:", df.columns)

X = df[["Distance_km", "Items", "Traffic_Level", "Processing_Time_hr"]]
y = df["Delivery_Time_hr"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

print("\nCoefficients (b1, b2, b3, b4):", model.coef_)
print("Intercept (b0):", model.intercept_)

y_pred = model.predict(X_test)

print("\nActual vs Predicted Delivery Time:")
for actual, pred in zip(y_test, y_pred):
    print(f"Actual: {actual:.2f}, Predicted: {pred:.2f}")

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nMean Absolute Error:", mae)
print("R2 Score:", r2)
new_order_data = {
    "Distance_km": [15],
    "Items": [5],
    "Traffic_Level": [3],
    "Processing_Time_hr": [2]
}
new_order_df = pd.DataFrame(new_order_data)
predicted_time = model.predict(new_order_df)
print(f"\nPredicted Delivery Time: {predicted_time[0]:.2f} hours")

Dataset:
   Distance_km  Items  Traffic_Level  Processing_Time_hr  Delivery_Time_hr
0            5      2              1                 1.0               3.0
1            8      3              2                 1.5               5.0
2           12      5              3                 2.0               8.0
3            4      1              1                 0.5               2.5
4           15      6              3                 2.5              10.0

Columns: Index(['Distance_km', 'Items', 'Traffic_Level', 'Processing_Time_hr',
       'Delivery_Time_hr'],
      dtype='object')

Coefficients (b1, b2, b3, b4): [ 0.53055556 -0.23611111  0.74444444  0.54444444]
Intercept (b0): -0.5138888888888911

Actual vs Predicted Delivery Time:
Actual: 5.50, Predicted: 5.62
Actual: 4.00, Predicted: 4.23
Actual: 3.00, Predicted: 2.96

Mean Absolute Error: 0.13240740740740767
R2 Score: 0.9778728070175439

Predicted Delivery Time: 9.59 hours
