<a href="https://colab.research.google.com/github/itsalanthomas/shipping-optimization/blob/dev/src/Ecommerice_Optimization_XGBOOST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load libraries
from google.colab import files
from io import StringIO
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Upload and read the file
uploaded = files.upload()
filename = 'ecommerce_shipping.csv'

with open(filename, 'r') as file:
    csv_text = file.read()

df = pd.read_csv(StringIO(csv_text))

# Rename columns
df.columns = [col.strip().replace('_', ' ').title() for col in df.columns]
df.rename(columns={'Reached.On.Time Y.N': 'On-time Delivery'}, inplace=True)

# Feature Engineering
df['Net Price'] = df['Cost Of The Product'] - df['Discount Offered']
df['Discount Ratio'] = round(df['Discount Offered'] / df['Cost Of The Product'], 2)

# Status Of Mode By Shipment
late_rate_lookup = df.groupby('Mode Of Shipment')['On-time Delivery'].mean().round(2).to_dict()
df['Status Of Mode By Shipment'] = df['Mode Of Shipment'].map(late_rate_lookup)

# Label encode categorical variables
product_order = {'low': 0, 'medium': 1, 'high': 2}
df['Product Importance'] = df['Product Importance'].map(product_order)

label_cols = ['Warehouse Block', 'Mode Of Shipment', 'Gender']
for col in label_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# Drop columns not needed in the model
df.drop(columns=['ID'], inplace=True, errors='ignore')

# Define feature matrix X and target y
X = df.drop(columns=['On-time Delivery'])
y = df['On-time Delivery']

# Train-test split (70/30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train XGBoost Classifier
model_xgb = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model_xgb.fit(X_train, y_train)

# Predict and evaluate
y_pred = model_xgb.predict(X_test)

print("Accuracy:", round(accuracy_score(y_test, y_pred), 2))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))