In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load data
df = pd.read_csv('../data/final_data.csv')

In [75]:
X = df[['Agent_Age', 'Agent_Rating', 'Distance_km', 'Order_Hour', 'Order_Day', 'Weather', 'Traffic']]
y = df['Delivery_Time']

In [76]:
print("✅ Using features:", list(X.columns))

✅ Using features: ['Agent_Age', 'Agent_Rating', 'Distance_km', 'Order_Hour', 'Order_Day', 'Weather', 'Traffic']


In [77]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [78]:
categorical_cols = X_train.select_dtypes(include=['object']).columns
print("Categorical columns:", categorical_cols)

Categorical columns: Index(['Weather', 'Traffic'], dtype='object')


In [79]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

label_encoders = {}

# Fit LabelEncoders on X_train
for col in categorical_cols:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col])
    label_encoders[col] = le

# Transform X_test safely (handle unseen categories by assigning -1)
for col in categorical_cols:
    le = label_encoders[col]
    # Add any unseen labels in test to the classes (temporarily)
    unseen = set(X_test[col]) - set(le.classes_)
    le.classes_ = np.append(le.classes_, list(unseen))  # expand label list
    X_test[col] = le.transform(X_test[col])

In [80]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [69]:
y_pred = model.predict(X_test)

In [70]:
import numpy as np
import mlflow
import mlflow.sklearn

In [71]:
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("✅ Model trained successfully!")
print(f"📊 MAE (Mean Absolute Error): {mae:.2f}")
print(f"📊 RMSE (Root Mean Squared Error): {rmse:.2f}")
print(f"📊 R² Score: {r2:.2f}")

✅ Model trained successfully!
📊 MAE (Mean Absolute Error): 26.44
📊 RMSE (Root Mean Squared Error): 37.44
📊 R² Score: 0.47


In [72]:
# Save model using joblib
import joblib
joblib.dump(model, '../scripts/delivery_model.pkl')
print("💾 Model saved as 'delivery_model.pkl'")

💾 Model saved as 'delivery_model.pkl'
