# 04 - Validation

This notebook validates counterfactual predictions using:
1. Temporal validation with matched campaign pairs
2. Placebo tests
3. Manski bounds analysis

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from causal_inference import CounterfactualPredictor
from validation import (
    TemporalValidator,
    PlaceboTester,
    ManskiBoundsAnalyzer,
    run_full_validation
)

sns.set_theme(style='darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Load data and models
df = pd.read_csv('../data/raw/kickstarter_raw_data.csv')

predictor = CounterfactualPredictor()
predictor.load_models('../data/processed/causal_models.pkl')

print(f"Loaded {len(df)} campaigns and trained models")

## 1. Temporal Validation

Find pairs of similar campaigns with different strategies and validate predictions.

In [None]:
temporal = TemporalValidator()
pairs = temporal.identify_strategy_changes(df)
print(f"Found {len(pairs)} matched campaign pairs")
pairs.head(10)

In [None]:
# Visualize price changes and outcome changes
fig, ax = plt.subplots(figsize=(10, 6))

ax.scatter(pairs['price_change_pct'], pairs['outcome_change'], alpha=0.5)
ax.axhline(y=0, color='red', linestyle='--')
ax.axvline(x=0, color='red', linestyle='--')
ax.set_xlabel('Price Change (%)')
ax.set_ylabel('Outcome Change (Funding Ratio)')
ax.set_title('Price Changes vs Outcome Changes in Matched Pairs')

plt.tight_layout()
plt.show()

## 2. Placebo Test

For campaigns that didn't change, model should predict no effect.

In [None]:
placebo = PlaceboTester()
placebo_results = placebo.run_placebo_test(df, predictor, n_samples=100)

In [None]:
print("Placebo Test Results:")
print(f"  Mean Placebo Effect: {placebo_results['mean_placebo_effect']:.4f}")
print(f"  Std Placebo Effect: {placebo_results['std_placebo_effect']:.4f}")
print(f"  95th Percentile: {placebo_results['percentile_95']:.4f}")
print(f"  Test Passed: {placebo_results['passed']}")

if placebo_results['passed']:
    print("\n✓ Good! Model doesn't find spurious effects where none exist.")
else:
    print("\n✗ Warning: Model may be capturing noise rather than true causal effects.")

## 3. Manski Bounds

Establish plausible ranges for counterfactual estimates.

In [None]:
bounds = ManskiBoundsAnalyzer()
bounds_results = bounds.analyze_dataset_bounds(df, predictor, price_change_pct=-20)

In [None]:
print("Manski Bounds Results:")
print(f"  Samples Analyzed: {bounds_results['n_samples']}")
print(f"  Avg Point Estimate: {bounds_results['avg_point_estimate']:.4f}")
print(f"  Avg Interval Width: {bounds_results['avg_interval_width']:.4f}")
print(f"  Estimates Within Bounds: {bounds_results['estimates_within_bounds']*100:.1f}%")

In [None]:
# Visualize bounds for sample campaigns
if 'bounds_sample' in bounds_results:
    sample_bounds = bounds_results['bounds_sample']
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    x = range(len(sample_bounds))
    point_estimates = [b['point_estimate'] for b in sample_bounds]
    lower = [b['manski_lower'] for b in sample_bounds]
    upper = [b['manski_upper'] for b in sample_bounds]
    
    ax.scatter(x, point_estimates, color='red', s=100, zorder=5, label='Point Estimate')
    ax.vlines(x, lower, upper, color='blue', linewidth=2, label='Manski Bounds')
    
    ax.set_xlabel('Campaign')
    ax.set_ylabel('Counterfactual Funding Ratio')
    ax.set_title('Point Estimates with Manski Bounds')
    ax.legend()
    
    plt.tight_layout()
    plt.show()

## 4. Full Validation Summary

In [None]:
# Run full validation suite
all_results = run_full_validation(df, predictor)

In [None]:
# Summary table
summary = {
    'Metric': ['Temporal MAE', 'Placebo Mean Effect', 'Placebo Test Passed', 'Bounds Within Range'],
    'Value': [
        all_results.get('temporal', {}).get('mae', 'N/A'),
        all_results.get('placebo', {}).get('mean_placebo_effect', 'N/A'),
        all_results.get('placebo', {}).get('passed', 'N/A'),
        f"{all_results.get('bounds', {}).get('estimates_within_bounds', 0)*100:.1f}%"
    ]
}

summary_df = pd.DataFrame(summary)
print("\nValidation Summary:")
print(summary_df.to_string(index=False))