# Enrichment Recovery Validation

This notebook validates the **Interrupted Time Series (ITS)** model by comparing its estimates against known true effects.

## Validation Approach

1. Generate **counterfactual** data (baseline - no enrichment)
2. Generate **factual** data (with enrichment applied)
3. Calculate **true effect** by comparing factual vs counterfactual
4. Run **ITS model** on factual data alone
5. Compare ITS estimate to true effect

## Setup

In [None]:
import json
from pathlib import Path

import pandas as pd
from impact_engine import evaluate_impact, parse_config_file
from impact_engine.metrics import create_metrics_manager
from online_retail_simulator import simulate

In [2]:
def calculate_true_effect(
    baseline_metrics: pd.DataFrame,
    enriched_metrics: pd.DataFrame,
    intervention_date: str,
    metric: str = "revenue",
) -> dict:
    """
    Calculate the TRUE causal effect by comparing factual vs counterfactual.

    Parameters
    ----------
    baseline_metrics : DataFrame
        Counterfactual data (no enrichment)
    enriched_metrics : DataFrame
        Factual data (with enrichment)
    intervention_date : str
        When enrichment started
    metric : str
        Which metric to analyze

    Returns
    -------
    dict
        True effect statistics
    """
    intervention = pd.Timestamp(intervention_date)

    # Aggregate by date
    baseline_daily = baseline_metrics.groupby("date")[metric].sum().reset_index()
    enriched_daily = enriched_metrics.groupby("date")[metric].sum().reset_index()
    baseline_daily["date"] = pd.to_datetime(baseline_daily["date"])
    enriched_daily["date"] = pd.to_datetime(enriched_daily["date"])

    # Post-intervention comparison
    baseline_post = baseline_daily[baseline_daily["date"] >= intervention][metric]
    enriched_post = enriched_daily[enriched_daily["date"] >= intervention][metric]

    baseline_mean = baseline_post.mean()
    enriched_mean = enriched_post.mean()
    absolute_effect = enriched_mean - baseline_mean
    percent_effect = (absolute_effect / baseline_mean * 100) if baseline_mean > 0 else 0

    return {
        "counterfactual_mean": float(baseline_mean),
        "factual_mean": float(enriched_mean),
        "absolute_effect": float(absolute_effect),
        "percent_effect": float(percent_effect),
    }

## Step 1: Create Products Catalog

In production, this would be your actual product catalog.

In [None]:
output_path = Path("output/demo_enrichment")
output_path.mkdir(parents=True, exist_ok=True)

job_info = simulate("configs/demo_enrichment_catalog.yaml", job_id="catalog")
products = job_info.load_df("products")

print(f"Generated {len(products)} products")
print(f"Products catalog: {job_info.get_store().full_path('products.csv')}")
products.head()

## Step 2: Define Enrichment Configuration

Configure a 30% quality boost starting on Nov 23.

In [None]:
config_path = "configs/demo_enrichment.yaml"
baseline_config_path = "configs/demo_enrichment_baseline.yaml"

intervention_date = "2024-11-23"
quality_boost = 0.3
metric = "revenue"

print(f"Enrichment: {quality_boost*100:.0f}% quality boost starting {intervention_date}")
print(f"Metric: {metric}")

## Step 3: Generate Counterfactual (No Enrichment)

In [None]:
parsed_baseline = parse_config_file(baseline_config_path)
baseline_manager = create_metrics_manager(parsed_baseline)
baseline_metrics = baseline_manager.retrieve_metrics(products)

print(f"Generated {len(baseline_metrics)} baseline (counterfactual) records")

## Step 4: Generate Factual (With Enrichment)

In [None]:
parsed_enriched = parse_config_file(config_path)
enriched_manager = create_metrics_manager(parsed_enriched)
enriched_metrics = enriched_manager.retrieve_metrics(products)

print(f"Generated {len(enriched_metrics)} enriched (factual) records")

## Step 5: Calculate True Effect

Compare factual vs counterfactual to get the **true causal effect**.

In [None]:
true_effect = calculate_true_effect(baseline_metrics, enriched_metrics, intervention_date, metric)

print("=" * 60)
print("TRUE CAUSAL EFFECT (Factual vs Counterfactual)")
print("=" * 60)
print(f"Counterfactual mean: ${true_effect['counterfactual_mean']:,.2f}")
print(f"Factual mean:        ${true_effect['factual_mean']:,.2f}")
print(f"True effect:         {true_effect['percent_effect']:.1f}%")

## Step 6: Run ITS Model on Factual Data

In [None]:
results_path = evaluate_impact(config_path, str(output_path), job_id="results")

with open(results_path) as f:
    its_results = json.load(f)

data = its_results["data"]
impact_estimates = data["impact_estimates"]

print("=" * 60)
print("ITS MODEL ESTIMATE (from factual data only)")
print("=" * 60)
print(f"Pre-intervention mean:  ${impact_estimates['pre_intervention_mean']:,.2f}")
print(f"Post-intervention mean: ${impact_estimates['post_intervention_mean']:,.2f}")
print(f"Estimated effect:       {impact_estimates['percent_change']:.1f}%")

## Step 7: Validate Model Accuracy

In [None]:
its_pct = impact_estimates["percent_change"]
true_pct = true_effect["percent_effect"]

# Calculate recovery accuracy
if true_pct != 0:
    recovery_accuracy = (1 - abs(1 - its_pct / true_pct)) * 100
else:
    recovery_accuracy = 100 if its_pct == 0 else 0

print("=" * 60)
print("MODEL VALIDATION")
print("=" * 60)
print(f"\nTrue effect:       {true_pct:.1f}%")
print(f"ITS estimate:      {its_pct:.1f}%")
print(f"Recovery accuracy: {max(0, recovery_accuracy):.1f}%")

model_summary = data["model_summary"]
print(f"\nObservations: {model_summary['n_observations']} days")
print(f"Pre-period:   {model_summary['pre_period_length']} days")
print(f"Post-period:  {model_summary['post_period_length']} days")

print("\n" + "=" * 60)
if recovery_accuracy >= 90:
    print("EXCELLENT: ITS model accurately recovered the true causal effect!")
elif recovery_accuracy >= 70:
    print("GOOD: ITS estimate is close to the true effect.")
elif recovery_accuracy >= 50:
    print("PARTIAL: ITS estimate differs from true effect.")
else:
    print("WARNING: ITS estimate differs significantly from true effect.")
print("Demo Complete!")
print("=" * 60)