# Enrichment Recovery Validation

This notebook validates the **Interrupted Time Series (ITS)** model by comparing its estimates against known true effects.

## Validation Approach

1. Generate **counterfactual** data (baseline - no enrichment)
2. Generate **factual** data (with enrichment applied)
3. Calculate **true effect** by comparing factual vs counterfactual
4. Run **ITS model** on factual data alone
5. Compare ITS estimate to true effect

## Setup

In [1]:
import copy
import json
import os
import tempfile

import pandas as pd
import yaml
from impact_engine import evaluate_impact, parse_config_file
from impact_engine.metrics import create_metrics_manager
from online_retail_simulator import simulate_characteristics

In [2]:
def calculate_true_effect(
    baseline_metrics: pd.DataFrame,
    enriched_metrics: pd.DataFrame,
    intervention_date: str,
    metric: str = "revenue",
) -> dict:
    """
    Calculate the TRUE causal effect by comparing factual vs counterfactual.

    Parameters
    ----------
    baseline_metrics : DataFrame
        Counterfactual data (no enrichment)
    enriched_metrics : DataFrame
        Factual data (with enrichment)
    intervention_date : str
        When enrichment started
    metric : str
        Which metric to analyze

    Returns
    -------
    dict
        True effect statistics
    """
    intervention = pd.Timestamp(intervention_date)

    # Aggregate by date
    baseline_daily = baseline_metrics.groupby("date")[metric].sum().reset_index()
    enriched_daily = enriched_metrics.groupby("date")[metric].sum().reset_index()
    baseline_daily["date"] = pd.to_datetime(baseline_daily["date"])
    enriched_daily["date"] = pd.to_datetime(enriched_daily["date"])

    # Post-intervention comparison
    baseline_post = baseline_daily[baseline_daily["date"] >= intervention][metric]
    enriched_post = enriched_daily[enriched_daily["date"] >= intervention][metric]

    baseline_mean = baseline_post.mean()
    enriched_mean = enriched_post.mean()
    absolute_effect = enriched_mean - baseline_mean
    percent_effect = (absolute_effect / baseline_mean * 100) if baseline_mean > 0 else 0

    return {
        "counterfactual_mean": float(baseline_mean),
        "factual_mean": float(enriched_mean),
        "absolute_effect": float(absolute_effect),
        "percent_effect": float(percent_effect),
    }

## Step 1: Generate Product Characteristics

In [3]:
# Create output directory
os.makedirs("output/demo_enrichment", exist_ok=True)
products_path = "output/demo_enrichment/products.csv"

# Catalog simulation config
catalog_config = {
    "STORAGE": {"PATH": "output/demo_enrichment"},
    "RULE": {
        "CHARACTERISTICS": {
            "FUNCTION": "simulate_characteristics_rule_based",
            "PARAMS": {"num_products": 50, "seed": 42},
        }
    },
}

with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
    yaml.dump(catalog_config, f)
    catalog_config_path = f.name

job_info = simulate_characteristics(catalog_config_path)
os.unlink(catalog_config_path)

products = job_info.load_df("products")
products.to_csv(products_path, index=False)

print(f"Generated {len(products)} products")

Generated 50 products


## Step 2: Define Enrichment Configuration

Configure a 30% quality boost starting on Nov 23.

In [4]:
# Enrichment configuration
enrichment_config = {
    "DATA": {
        "SOURCE": {
            "type": "simulator",
            "CONFIG": {
                "mode": "rule",
                "seed": 42,
                "path": products_path,
                "start_date": "2024-11-16",
                "end_date": "2024-11-30",
            },
        },
        "ENRICHMENT": {
            "FUNCTION": "product_detail_boost",
            "PARAMS": {
                "enrichment_fraction": 1.0,
                "enrichment_start": "2024-11-23",
                "quality_boost": 0.3,
                "seed": 42,
            },
        },
        "TRANSFORM": {"FUNCTION": "aggregate_by_date", "PARAMS": {"metric": "revenue"}},
    },
    "MEASUREMENT": {
        "MODEL": "interrupted_time_series",
        "PARAMS": {
            "intervention_date": "2024-11-23",
            "dependent_variable": "revenue",
            "start_date": "2024-11-16",
            "end_date": "2024-11-30",
        },
    },
    "OUTPUT": {"PATH": "output/demo_enrichment"},
}

intervention_date = enrichment_config["DATA"]["ENRICHMENT"]["PARAMS"]["enrichment_start"]
quality_boost = enrichment_config["DATA"]["ENRICHMENT"]["PARAMS"]["quality_boost"]
metric = enrichment_config["MEASUREMENT"]["PARAMS"]["dependent_variable"]

print(f"Enrichment: {quality_boost*100:.0f}% quality boost starting {intervention_date}")
print(f"Metric: {metric}")

Enrichment: 30% quality boost starting 2024-11-23
Metric: revenue


## Step 3: Generate Counterfactual (No Enrichment)

In [5]:
# Create config without enrichment
baseline_config = copy.deepcopy(enrichment_config)
del baseline_config["DATA"]["ENRICHMENT"]

baseline_config_path = "output/demo_enrichment/baseline_config.yaml"
with open(baseline_config_path, "w") as f:
    yaml.dump(baseline_config, f)

parsed_baseline = parse_config_file(baseline_config_path)
baseline_manager = create_metrics_manager(parsed_baseline)
baseline_metrics = baseline_manager.retrieve_metrics(products)

print(f"Generated {len(baseline_metrics)} baseline (counterfactual) records")

Generated 750 baseline (counterfactual) records


## Step 4: Generate Factual (With Enrichment)

In [6]:
# Save enrichment config
enrichment_config_path = "output/demo_enrichment/enrichment_config.yaml"
with open(enrichment_config_path, "w") as f:
    yaml.dump(enrichment_config, f)

parsed_enriched = parse_config_file(enrichment_config_path)
enriched_manager = create_metrics_manager(parsed_enriched)
enriched_metrics = enriched_manager.retrieve_metrics(products)

print(f"Generated {len(enriched_metrics)} enriched (factual) records")

Generated 750 enriched (factual) records


## Step 5: Calculate True Effect

Compare factual vs counterfactual to get the **true causal effect**.

In [7]:
true_effect = calculate_true_effect(baseline_metrics, enriched_metrics, intervention_date, metric)

print("=" * 50)
print("TRUE CAUSAL EFFECT (Factual vs Counterfactual)")
print("=" * 50)
print(f"Counterfactual mean: ${true_effect['counterfactual_mean']:,.2f}")
print(f"Factual mean:        ${true_effect['factual_mean']:,.2f}")
print(f"True effect:         {true_effect['percent_effect']:.1f}%")

TRUE CAUSAL EFFECT (Factual vs Counterfactual)
Counterfactual mean: $2,972.97
Factual mean:        $2,972.97
True effect:         0.0%


## Step 6: Run ITS Model on Factual Data

In [8]:
result_path = evaluate_impact(enrichment_config_path)

with open(result_path, "r") as f:
    its_results = json.load(f)

impact_estimates = its_results.get("impact_estimates", {})

print("=" * 50)
print("ITS MODEL ESTIMATE (from factual data only)")
print("=" * 50)
print(f"Pre-intervention mean:  ${impact_estimates.get('pre_intervention_mean', 0):,.2f}")
print(f"Post-intervention mean: ${impact_estimates.get('post_intervention_mean', 0):,.2f}")
print(f"Estimated effect:       {impact_estimates.get('percent_change', 0):.1f}%")

ITS MODEL ESTIMATE (from factual data only)
Pre-intervention mean:  $1,857.88
Post-intervention mean: $2,972.97
Estimated effect:       60.0%


## Step 7: Validate Model Accuracy

In [9]:
its_pct = impact_estimates.get("percent_change", 0)
true_pct = true_effect["percent_effect"]

# Calculate recovery accuracy
if true_pct != 0:
    recovery_accuracy = (1 - abs(1 - its_pct / true_pct)) * 100
else:
    recovery_accuracy = 100 if its_pct == 0 else 0

print("=" * 60)
print("MODEL VALIDATION")
print("=" * 60)
print(f"\nTrue effect:       {true_pct:.1f}%")
print(f"ITS estimate:      {its_pct:.1f}%")
print(f"Recovery accuracy: {max(0, recovery_accuracy):.1f}%")

model_summary = its_results.get("model_summary", {})
print(f"\nObservations: {model_summary.get('n_observations', 'N/A')} days")
print(f"Pre-period:   {model_summary.get('pre_period_length', 'N/A')} days")
print(f"Post-period:  {model_summary.get('post_period_length', 'N/A')} days")

print("\n" + "=" * 60)
if recovery_accuracy >= 90:
    print("EXCELLENT: ITS model accurately recovered the true causal effect!")
elif recovery_accuracy >= 70:
    print("GOOD: ITS estimate is close to the true effect.")
elif recovery_accuracy >= 50:
    print("PARTIAL: ITS estimate differs from true effect.")
else:
    print("WARNING: ITS estimate differs significantly from true effect.")
print("=" * 60)

MODEL VALIDATION

True effect:       0.0%
ITS estimate:      60.0%
Recovery accuracy: 0.0%

Observations: 15 days
Pre-period:   7 days
Post-period:  8 days

