In [1]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_excel(r"C:\Users\sadaa\OneDrive\Desktop\walmart Retail Data.xlsx")

# Feature Engineering: Calculate Shipping Delay (days)
df['Shipping Delay'] = (df['Ship Date'] - df['Order Date']).dt.days

df['Order Priority'] = df['Order Priority'].replace('Not Specified', np.nan)

imputer = SimpleImputer(strategy='most_frequent')
df['Order Priority'] = imputer.fit_transform(df[['Order Priority']]).ravel()

# Define delay threshold based on Ship Mode
delay_thresholds = {
    "Same Day": 0,
    "First Class": 1,
    "Regular Air": 3,
    "Delivery Truck": 5,
    "Express Air": 2
}

df['Expected Delay'] = df['Ship Mode'].map(delay_thresholds)
df['Target_Classification'] = (df['Shipping Delay'] > df['Expected Delay']).astype(int)

# Drop rows with missing target values
df = df.dropna(subset=['Target_Classification'])

df['Target_Classification'].unique()

array([0, 1])

In [3]:
# Define features and target
features = ['Order Priority', 'Ship Mode', 'Region', 'Shipping Cost', 'Order Quantity', 'Discount', 'Profit']
X = df[features]
y = df['Target_Classification']
# Preprocessing Pipeline
categorical_features = ['Order Priority', 'Ship Mode', 'Region']
numerical_features = ['Shipping Cost', 'Order Quantity', 'Discount', 'Profit']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', StandardScaler(), numerical_features)
    ]
)

# Transform features
X_transformed = preprocessor.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

# Train Classification Model using XGBoost
clf = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
clf.fit(X_train, y_train)

# Evaluate Model
y_pred = clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.92
              precision    recall  f1-score   support

           0       0.97      0.93      0.95      1460
           1       0.64      0.80      0.72       220

    accuracy                           0.92      1680
   macro avg       0.81      0.87      0.83      1680
weighted avg       0.93      0.92      0.92      1680



In [5]:
# Function to Predict Delay Status
def predict_delay_status(data):
    """
    Predict whether a shipment will be delayed or on-time.
    
    Args:
        data (dict): Input data as a dictionary containing feature values.

    Returns:
        str: "Delayed" if the shipment is predicted to be delayed, otherwise "On-Time".
    """
    df_input = pd.DataFrame([data])
     # Preprocess input data
    X_categorical = df_input[categorical_features]
    X_numerical = df_input[numerical_features]
    X_encoded = preprocessor.transform(df_input)
    
    # Make prediction
    prediction = clf.predict(X_encoded)
    return "Delayed" if prediction[0] == 1 else "On-Time"

# Example Usage
sample_input = {
    'Order Priority': 'High',
    'Ship Mode': 'First Class',
    'Region': 'East',
    'Shipping Cost': 20.0,
    'Order Quantity': 3,
    'Discount': 0.05,
    'Profit': 50.0
}

predicted_status = predict_delay_status(sample_input)
print(f"Predicted Shipping Status: {predicted_status}")


Predicted Shipping Status: On-Time


In [7]:
# Example Usage
sample_input = {
    'Order Priority': 'Low',
    'Ship Mode': 'First Class',
    'Region': 'East',
    'Shipping Cost': 10.0,
    'Order Quantity': 3,
    'Discount': 10,
    'Profit': 100.0
}

predicted_status = predict_delay_status(sample_input)
print(f"Predicted Shipping Status: {predicted_status}")


Predicted Shipping Status: Delayed
