In [1]:
# NHSRC PHC SUPPLY CHAIN - POLICY REFINEMENT & THRESHOLD TUNING
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

print("üéØ NHSRC PHC POLICY REFINEMENT & THRESHOLD TUNING")
print("=" * 70)

# 1Ô∏è‚É£ LOAD ALL POLICY INPUTS
print("üì• 1. Loading All Policy Inputs...")

# Load replenishment recommendations (Day 6 output)
repl = pd.read_csv("reports/replenishment_recommendations.csv")

# Load scenario summary (Day 7 output)
scenarios = pd.read_csv("reports/scenario_summary.csv")

# Load cleaned inventory for historical patterns
cleaned = pd.read_csv("data/cleaned_inventory.csv", parse_dates=['date', 'batch_expiry_date'])

print(f"   Replenishment data: {len(repl)} SKUs")
print(f"   Scenario data: {len(scenarios)} records")
print(f"   Historical inventory: {len(cleaned):,} records")

# Focus on Base scenario for now
base_scen = scenarios[scenarios["scenario"] == "Base"].copy()

# Keep only key risk columns from base scenario
base_scen_small = base_scen[[
    "sku_id", "scenario_forecast_14d", "days_until_stockout",
    "stockout_risk_flag", "expiry_risk_flag", "severity_score"
]].rename(columns={
    "scenario_forecast_14d": "base_forecast_14d",
    "days_until_stockout": "base_days_until_stockout",
    "severity_score": "base_severity_score"
})

# Merge base scenario info into the replenishment table
df = pd.merge(repl, base_scen_small, on="sku_id", how="left")
print(f"   Merged dataset: {len(df)} SKUs")

# 2Ô∏è‚É£ DEFINE NHSRC-INSPIRED SAFETY BANDS (DAYS COVER)
print("\nüõ°Ô∏è 2. Defining NHSRC-Inspired Safety Bands...")

def min_cover_required(row):
    ved = row["ved_category"]
    fsn = row["fsn_category"]

    # Vital & Fast: most sensitive
    if ved == "Vital" and fsn == "Fast":
        return 14  # minimum acceptable

    # Vital & Slow: lifeline but lower velocity
    if ved == "Vital" and fsn == "Slow":
        return 30

    # Essential: important but with alternatives
    if ved == "Essential":
        return 10

    # Desirable: low clinical impact
    return 7

df["min_days_cover"] = df.apply(min_cover_required, axis=1)

# Compute how far you are from that floor
df["cover_gap_vs_min"] = df["days_cover"] - df["min_days_cover"]

print("   NHSRC Minimum Days Cover by Category:")
min_cover_summary = df.groupby(['ved_category', 'fsn_category'])['min_days_cover'].first().reset_index()
print(min_cover_summary.to_string(index=False))

print(f"\n   Cover Gap Analysis:")
print(f"   - Below minimum: {(df['cover_gap_vs_min'] < 0).sum()} SKUs")
print(f"   - Within buffer (0-5 days): {((df['cover_gap_vs_min'] >= 0) & (df['cover_gap_vs_min'] <= 5)).sum()} SKUs")
print(f"   - Plenty of buffer (>5 days): {(df['cover_gap_vs_min'] > 5).sum()} SKUs")

# 3Ô∏è‚É£ ADD VENDOR RELIABILITY FACTOR (VRF)
print("\nüöö 3. Adding Vendor Reliability Factor...")

# Compute std dev of lead time per SKU
lt_stats = (
    cleaned
    .groupby("sku_id")["lead_time_days"]
    .agg(["mean", "std"])
    .reset_index()
    .rename(columns={"mean": "lt_mean", "std": "lt_std"})
)

df = pd.merge(df, lt_stats, on="sku_id", how="left")

# Map lt_std to a reliability factor
def vrf_from_variability(std):
    if pd.isna(std):
        return 1.2  # mild caution
    if std <= 1:
        return 1.0   # stable supplier
    if std <= 3:
        return 1.2   # moderate variability
    return 1.5       # unreliable

df["vendor_reliability_factor"] = df["lt_std"].apply(vrf_from_variability)
df["effective_lead_time"] = df["lead_time_days"] * df["vendor_reliability_factor"]

print("   Vendor Reliability Classification:")
vrf_counts = df["vendor_reliability_factor"].value_counts().sort_index()
for vrf, count in vrf_counts.items():
    if vrf == 1.0:
        reliability = "Stable"
    elif vrf == 1.2:
        reliability = "Moderate"
    else:
        reliability = "Unreliable"
    print(f"   - {reliability} (VRF={vrf}): {count} SKUs")

# 4Ô∏è‚É£ ADD PRICE-SENSITIVITY MODIFIER
print("\nüí∞ 4. Adding Price-Sensitivity Modifier...")

# Compute average price per SKU
price_stats = (
    cleaned
    .groupby("sku_id")["price_per_unit"]
    .mean()
    .reset_index()
    .rename(columns={"price_per_unit": "avg_price_per_unit"})
)

df = pd.merge(df, price_stats, on="sku_id", how="left")

# Define a price factor
def price_factor(row):
    p = row["avg_price_per_unit"]
    if pd.isna(p):
        return 1.0
    if p <= 5:
        return 1.0       # cheap: no penalty
    if p <= 20:
        return 0.9       # moderate: slight dampening
    return 0.8           # expensive: be more conservative

df["price_priority_factor"] = df.apply(price_factor, axis=1)

print("   Price Sensitivity Classification:")
price_counts = df["price_priority_factor"].value_counts().sort_index()
for factor, count in price_counts.items():
    if factor == 1.0:
        sensitivity = "Low (Cheap)"
    elif factor == 0.9:
        sensitivity = "Medium"
    else:
        sensitivity = "High (Expensive)"
    print(f"   - {sensitivity} (Factor={factor}): {count} SKUs")

# 5Ô∏è‚É£ BUILD A STABILITY / RESILIENCE SCORE (0-100)
print("\nüìä 5. Building Stability/Resilience Score (0-100)...")

# 5.1 Normalize base risk signals
# Cover risk: how far below minimum
df["cover_risk"] = df["cover_gap_vs_min"].apply(
    lambda x: 1.0 if x <= -7 else (0.5 if x < 0 else 0.0)
)

# Stockout risk from simulation
df["stockout_risk"] = df["base_severity_score"] / 10.0  # since you used 0 or 10 in severity

# Expiry risk from FEFO (need to get from cleaned data)
# For now, create a proxy from days_cover
df["expiry_risk"] = df["days_cover"].apply(
    lambda x: 0.7 if x > 90 else 0.3 if x > 60 else 0.0
)

# 5.2 Combine into a resilience score
def compute_resilience(row):
    # Base penalties
    penalty = 0
    penalty += row["cover_risk"] * 30
    penalty += row["stockout_risk"] * 40
    penalty += row["expiry_risk"] * 20

    # Vendor unreliability penalty (higher VRF = worse)
    vrf = row["vendor_reliability_factor"]
    if vrf > 1.4:
        penalty += 10
    elif vrf > 1.1:
        penalty += 5

    # Hard cap and floor
    score = 100 - penalty
    return max(0, min(100, score))

df["resilience_score"] = df.apply(compute_resilience, axis=1)

print("   Resilience Score Distribution:")
print(f"   - Min: {df['resilience_score'].min():.1f}")
print(f"   - Max: {df['resilience_score'].max():.1f}")
print(f"   - Mean: {df['resilience_score'].mean():.1f}")
print(f"   - Std: {df['resilience_score'].std():.1f}")

# 6Ô∏è‚É£ MAP RESILIENCE TO RISK BANDS
print("\nüéØ 6. Mapping Resilience to Risk Bands...")

def classify_resilience(score):
    if score >= 90:
        return "Stable"
    if score >= 70:
        return "Watchlist"
    if score >= 40:
        return "Risky"
    return "Critical"

df["resilience_band"] = df["resilience_score"].apply(classify_resilience)

print("   Resilience Band Distribution:")
band_counts = df["resilience_band"].value_counts()
for band, count in band_counts.items():
    percentage = (count / len(df)) * 100
    print(f"   - {band}: {count} SKUs ({percentage:.1f}%)")

# 7Ô∏è‚É£ FINAL DECISION LOGIC (OVERRIDE LAYER)
print("\n‚öñÔ∏è 7. Applying Final Decision Logic with Overrides...")

def final_action(row):
    ved = row["ved_category"]
    fsn = row["fsn_category"]
    band = row["resilience_band"]
    expiry_flag = row.get("expiry_risk_flag", 0)
    cover_gap = row["cover_gap_vs_min"]
    action = row.get("procurement_action", "")  # from earlier engine

    # 1. Clinical override for Vital Drugs
    if ved == "Vital":
        # Very low cover or high stockout risk ‚Üí must reorder
        if cover_gap < -3 or row.get("stockout_risk_flag", 0) == 1:
            return "EMERGENCY REPLENISH (VITAL)"
        if band in ["Risky", "Critical"]:
            return "HIGH PRIORITY REORDER (VITAL)"
        return "MONITOR VITAL STOCK"

    # 2. Expiry-based override
    if expiry_flag == 1 and row["days_cover"] > row["min_days_cover"] * 1.5:
        return "REDISTRIBUTE / USE-FIRST (EXPIRY RISK)"

    # 3. Desirable + Slow: avoid stupid overstock
    if ved == "Desirable" and fsn == "Slow":
        if band in ["Stable", "Watchlist"]:
            return "HOLD: LOW PRIORITY DRUG"
        if band in ["Risky", "Critical"]:
            return "ONLY REORDER IF BUDGET ALLOWS"

    # 4. Essential drugs
    if ved == "Essential":
        if band == "Critical":
            return "URGENT REPLENISH (ESSENTIAL)"
        if band == "Risky":
            return "REORDER SOON (ESSENTIAL)"
        if band == "Watchlist":
            return "MONITOR & REVIEW NEXT CYCLE"
        return "HEALTHY STOCK (ESSENTIAL)"

    # Fallback
    return "MANUAL REVIEW"

df["final_action"] = df.apply(final_action, axis=1)

print("   Final Action Distribution:")
action_counts = df["final_action"].value_counts()
for action, count in action_counts.items():
    percentage = (count / len(df)) * 100
    print(f"   - {action}: {count} SKUs ({percentage:.1f}%)")

# 8Ô∏è‚É£ APPLY PRICE ADJUSTMENT TO PRIORITY
print("\nüìã 8. Applying Price Adjustment to Priority...")

def adjusted_priority(row):
    base = row.get("action_priority", 3)  # default middle priority
    
    # If very expensive and not Vital, bump priority down (numerically up)
    if row["ved_category"] != "Vital" and row["price_priority_factor"] < 0.9:
        base += 1

    # Resilience-based refinement
    if row["resilience_band"] == "Stable":
        base = max(base, 4)
    if row["resilience_band"] == "Critical":
        base = min(base, 1)

    return max(1, min(5, base))

df["final_action_priority"] = df.apply(adjusted_priority, axis=1)

print("   Final Priority Distribution:")
priority_counts = df["final_action_priority"].value_counts().sort_index()
for priority, count in priority_counts.items():
    print(f"   - Priority {priority}: {count} SKUs")

# 9Ô∏è‚É£ SAVE FINAL DECISION MATRIX
print("\nüíæ 9. Saving Final Decision Matrix...")

# Get expiry risk bucket from cleaned data if available
expiry_risk_data = cleaned.groupby('sku_id').agg({
    'expiry_risk_bucket': lambda x: x.mode()[0] if not x.mode().empty else 'UNKNOWN'
}).reset_index()

df = pd.merge(df, expiry_risk_data, on='sku_id', how='left')

cols_export = [
    "sku_id", "sku_name", "ved_category", "fsn_category",
    "current_stock", "ADC", "days_cover", "min_days_cover", "cover_gap_vs_min",
    "base_forecast_14d", "effective_lead_time",
    "resilience_score", "resilience_band",
    "expiry_risk_bucket", "expiry_risk_flag",
    "final_action", "final_action_priority"
]

# Filter to columns that exist
available_cols = [col for col in cols_export if col in df.columns]
final_df = df[available_cols].sort_values(["final_action_priority", "resilience_score"])

output_path = "reports/final_decision_matrix.csv"
final_df.to_csv(output_path, index=False)
print(f"   ‚úÖ Saved: {output_path}")
print(f"   Records: {len(final_df)}")
print(f"   Columns: {len(final_df.columns)}")

# üîü FINAL OUTPUTS FOR TRAINER
print("\n" + "="*70)
print("üéØ TRAINER OUTPUTS")
print("="*70)

print("\n1. üîπ FIRST 10 ROWS OF FINAL_DECISION_MATRIX.CSV:")
print("-" * 70)
print(final_df.head(10).to_string())

print("\n2. üîπ COUNT OF SKUS PER FINAL_ACTION:")
print("-" * 70)
for action, count in action_counts.items():
    print(f"   {action}: {count} SKUs")

print("\n3. üîπ COUNT OF SKUS PER RESILIENCE_BAND:")
print("-" * 70)
for band, count in band_counts.items():
    print(f"   {band}: {count} SKUs")

print("\n4. üîπ UPDATED GIT LS-FILES:")
print("-" * 70)
import subprocess
result = subprocess.run(['git', 'ls-files'], capture_output=True, text=True)
print(result.stdout)

print("\n" + "="*70)
print("‚úÖ DAY 8 POLICY REFINEMENT COMPLETE")
print("="*70)
print("\nüìå POLICY REFINEMENT ACHIEVED:")
print("   ‚Ä¢ NHSRC-inspired safety bands established")
print("   ‚Ä¢ Vendor reliability factor incorporated")
print("   ‚Ä¢ Price-sensitivity modifier applied")
print("   ‚Ä¢ Clinical and expiry overrides implemented")
print("   ‚Ä¢ Mathematically paranoid ‚Üí Operationally intelligent")

üéØ NHSRC PHC POLICY REFINEMENT & THRESHOLD TUNING
üì• 1. Loading All Policy Inputs...
   Replenishment data: 12 SKUs
   Scenario data: 48 records
   Historical inventory: 6,480 records
   Merged dataset: 12 SKUs

üõ°Ô∏è 2. Defining NHSRC-Inspired Safety Bands...
   NHSRC Minimum Days Cover by Category:
ved_category fsn_category  min_days_cover
   Desirable         Fast               7
   Desirable         Slow               7
   Essential         Fast              10
   Essential         Slow              10
       Vital         Fast              14
       Vital         Slow              30

   Cover Gap Analysis:
   - Below minimum: 8 SKUs
   - Within buffer (0-5 days): 1 SKUs
   - Plenty of buffer (>5 days): 3 SKUs

üöö 3. Adding Vendor Reliability Factor...
   Vendor Reliability Classification:
   - Moderate (VRF=1.2): 12 SKUs

üí∞ 4. Adding Price-Sensitivity Modifier...
   Price Sensitivity Classification:
   - High (Expensive) (Factor=0.8): 5 SKUs
   - Medium (Factor=0.9): 7