In [None]:
import pandas as pd
import numpy as np


sl = pd.read_csv("../data/supervised_learning_predictions.csv")

xgb = pd.read_csv("../data/preds_train_test_op_xgb_v2.csv")

df = sl.merge(
        xgb[['user_id','order_id','product_id','substitute_id','GMM_cluster_id','pred_xgb']],
        on=['user_id','order_id','product_id','substitute_id','GMM_cluster_id'],
        how='left'
     )


df["final_score"] = (
      0.15 * df["pred_rf_proba"] +
      0.15 * df["pred_lr_proba"] +
      0.30 * df["pred_lgbm_proba"] +
      0.30 * df["pred_cat_proba"] +
      0.40 * df["pred_xgb"]
)


df["final_score"] = (df["final_score"] - df["final_score"].min()) / \
                    (df["final_score"].max() - df["final_score"].min())



def generate_signal(row):
    p = row["final_score"]

    if p >= 0.90:
        return "⚠️ CRITICAL — MUST SUBS RECOMMEND"
    elif p >= 0.75:
        return "⚠️ HIGH — STRONG SUBSTITUTE RECOMMENDATION"
    elif p >= 0.60:
        return "⚡ MEDIUM — POSSIBLE SUBSTITUTE"
    elif p >= 0.40:
        return "LOW — Only suggest if inventory < threshold"
    else:
        return "NO SIGNAL"

df["inventory_planner_signal"] = df.apply(generate_signal, axis=1)


agg = df.groupby(["product_id","substitute_id"]).agg(
    avg_score=("final_score", "mean"),
    max_score=("final_score", "max"),
    count=("user_id", "count")
).reset_index()

def map_signal(score):
    if score >= 0.90:
        return "CRITICAL"
    elif score >= 0.75:
        return "HIGH"
    elif score >= 0.60:
        return "MEDIUM"
    elif score >= 0.40:
        return "LOW"
    else:
        return "NONE"

agg["planner_signal"] = agg["avg_score"].apply(map_signal)

df.to_csv("../data/final_inventory_signals_row_level.csv", index=False)
agg.to_csv("../data/final_inventory_signals_aggregated.csv", index=False)

print("DONE — Signals created successfully!")
