In [None]:
import pandas as pd

orders = pd.read_csv("orders_clean - Sheet1.csv")
delivery = pd.read_csv("delivery_clean - Sheet1.csv")
inventory = pd.read_csv("Inventry_clean - Sheet1.csv")

print("ORDERS columns:", list(orders.columns))
print("DELIVERY columns:", list(delivery.columns))
print("INVENTORY columns:", list(inventory.columns))

orders.head(3), delivery.head(3), inventory.head(3)


In [None]:

import numpy as np

# === Parameters you can adjust ===


def find_col(candidates, columns):
    cols_low = {c.lower(): c for c in columns}
    for cand in candidates:
        for col in columns:
            if cand in col.lower():
                return col
    return None

# ORDERS mapping
order_ts_col = find_col(['order timestamp','order_date','order time','timestamp'], orders.columns)
customer_id_col = find_col(['customerid','customer id','cust_id'], orders.columns)
revenue_col = find_col(['revenue','amount','order value','aov','total'], orders.columns)
promo_col = find_col(['promo','promotion','coupon','offer'], orders.columns)
month_col = find_col(['month'], orders.columns)

# DELIVERY mapping
transit_col = find_col(['transit time','delivery time','time to deliver','time to delivery','transit'], delivery.columns)
dispatch_col = find_col(['time to dispatch','dispatch'], delivery.columns)
slot_col = find_col(['slot','time slot','delivery slot'], delivery.columns)
zone_col = find_col(['zone','area','region'], delivery.columns)
distance_col = find_col(['distance'], delivery.columns)
late_col = find_col(['late','delay','over sla'], delivery.columns)

# INVENTORY mapping
category_col = find_col(['product category','category'], inventory.columns)
week_col = find_col(['week'], inventory.columns)
stockout_col = find_col(['stockout incidents','stock out','stockout','oos'], inventory.columns)

mapping = {
    "orders": {
        "timestamp": order_ts_col,
        "customer_id": customer_id_col,
        "revenue": revenue_col,
        "promo_flag": promo_col,
        "month": month_col,
    },
    "delivery": {
        "transit_time_mins": transit_col,
        "dispatch_time_mins": dispatch_col,
        "slot": slot_col,
        "zone": zone_col,
        "distance_km": distance_col,
        "late_flag": late_col,
    },
    "inventory": {
        "product_category": category_col,
        "week": week_col,
        "stockout_incidents": stockout_col,
    }
}

mapping


In [None]:

# Parse dates if available
if mapping["orders"]["timestamp"] is not None:
    orders[mapping["orders"]["timestamp"]] = pd.to_datetime(orders[mapping["orders"]["timestamp"]], errors='coerce')

# Numeric coercions
def to_numeric(df, col):
    if col is None or col not in df.columns:
        return
    df[col] = pd.to_numeric(df[col], errors='coerce')

for col in [mapping["orders"]["revenue"]]:
    to_numeric(orders, col)

for col in [mapping["delivery"]["transit_time_mins"], mapping["delivery"]["dispatch_time_mins"], mapping["delivery"]["distance_km"]]:
    to_numeric(delivery, col)

# Construct late_flag from SLA if not present but transit time exists
if mapping["delivery"]["late_flag"] is None and mapping["delivery"]["transit_time_mins"] is not None:
    delivery['late_flag_auto'] = (delivery[mapping["delivery"]["transit_time_mins"]] > SLA_MINUTES).astype(int)
    mapping["delivery"]["late_flag"] = 'late_flag_auto'

# Derive month from timestamp if not present
if mapping["orders"]["month"] is None and mapping["orders"]["timestamp"] is not None:
    orders['Month'] = orders[mapping["orders"]["timestamp"]].dt.to_period('M').astype(str)
    mapping["orders"]["month"] = 'Month'

mapping


In [None]:

from scipy import stats
ttest_output = "Skipped (promo/coupon column not found)."

rev = mapping["orders"]["revenue"]
promo = mapping["orders"]["promo_flag"]

if rev and promo and rev in orders.columns and promo in orders.columns:

    promo_series = orders[promo].astype(str).str.strip().str.lower().isin(['1','true','yes','y','promo','applied'])
    a = pd.to_numeric(orders.loc[promo_series, rev], errors='coerce').dropna()
    b = pd.to_numeric(orders.loc[~promo_series, rev], errors='coerce').dropna()
    if len(a) > 2 and len(b) > 2:
        ttest_output = stats.ttest_ind(a, b, equal_var=False)
ttest_output


## 3) Chi-Square — Late Deliveries vs Slot

In [None]:

chi_output = "Skipped (slot or late_flag not found)."
slot = mapping["delivery"]["slot"]
late = mapping["delivery"]["late_flag"]

if slot and late and slot in delivery.columns and late in delivery.columns:
    table = pd.crosstab(delivery[slot], delivery[late])
    if table.shape[0] > 1 and table.shape[1] > 1:
        chi2, p, dof, expected = stats.chi2_contingency(table)
        chi_output = {"chi2": chi2, "p": p, "dof": dof}
chi_output
