# Permutation Test Visualization

This notebook demonstrates how to visualize permutation test results and understand the null distribution of bias detection metrics.

## Contents
1. Basic permutation test visualization
2. Comparing observed vs null distribution
3. Multiple metrics comparison
4. Model card generation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from circular_bias_detector.core.permutation import permutation_test, adaptive_permutation_test
from circular_bias_detector.core.metrics import compute_psi, compute_ccs, compute_rho_pc
from circular_bias_detector.detection import BiasDetector

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Generate Sample Data

In [None]:
# Generate synthetic evaluation data
np.random.seed(42)

# Scenario 1: No bias (random data)
T, K, p = 20, 5, 3
perf_no_bias = np.random.rand(T, K)
const_no_bias = np.random.rand(T, p)

# Scenario 2: With bias (correlated performance and constraints)
perf_with_bias = np.random.rand(T, K)
const_with_bias = np.random.rand(T, p)
# Add correlation
for i in range(T):
    perf_with_bias[i, :] += 0.5 * np.mean(const_with_bias[i, :])

print("Data generated:")
print(f"  Time periods: {T}")
print(f"  Algorithms: {K}")
print(f"  Constraints: {p}")

## 2. Run Permutation Tests

In [None]:
# Run permutation tests for both scenarios
n_perm = 1000

print("Running permutation tests...")

# No bias scenario
results_no_bias = permutation_test(
    perf_no_bias, const_no_bias, compute_psi,
    n_permutations=n_perm, random_seed=42, n_jobs=-1
)

# With bias scenario
results_with_bias = permutation_test(
    perf_with_bias, const_with_bias, compute_psi,
    n_permutations=n_perm, random_seed=42, n_jobs=-1
)

print(f"\nNo Bias Scenario:")
print(f"  Observed PSI: {results_no_bias['observed']:.4f}")
print(f"  p-value: {results_no_bias['p_value']:.4f}")

print(f"\nWith Bias Scenario:")
print(f"  Observed PSI: {results_with_bias['observed']:.4f}")
print(f"  p-value: {results_with_bias['p_value']:.4f}")

## 3. Visualize Permutation Distributions

In [None]:
def plot_permutation_distribution(results, title="Permutation Test Results"):
    """
    Plot permutation distribution with observed value.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot histogram of permuted values
    ax.hist(results['permuted_values'], bins=50, alpha=0.7, 
            color='skyblue', edgecolor='black', density=True, label='Null distribution')
    
    # Plot observed value
    ax.axvline(results['observed'], color='red', linestyle='--', 
               linewidth=2, label=f"Observed: {results['observed']:.4f}")
    
    # Plot confidence intervals
    ax.axvline(results['ci_lower'], color='orange', linestyle=':', 
               linewidth=1.5, alpha=0.7, label=f"95% CI")
    ax.axvline(results['ci_upper'], color='orange', linestyle=':', 
               linewidth=1.5, alpha=0.7)
    
    # Add p-value annotation
    ax.text(0.02, 0.98, f"p-value = {results['p_value']:.4f}\nn = {results['n_permutations']}",
            transform=ax.transAxes, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
            fontsize=11)
    
    ax.set_xlabel('PSI Score', fontsize=12)
    ax.set_ylabel('Density', fontsize=12)
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.legend(loc='upper right', fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

# Plot both scenarios
fig1 = plot_permutation_distribution(results_no_bias, "No Bias Scenario")
plt.show()

fig2 = plot_permutation_distribution(results_with_bias, "With Bias Scenario")
plt.show()

## 4. Compare Multiple Metrics

In [None]:
# Run permutation tests for all three metrics
metrics = {
    'PSI': compute_psi,
    'CCS': lambda p, c: compute_ccs(c),
    'œÅ_PC': compute_rho_pc
}

results_all = {}
for name, metric_func in metrics.items():
    results_all[name] = permutation_test(
        perf_with_bias, const_with_bias, metric_func,
        n_permutations=500, random_seed=42, n_jobs=-1
    )
    print(f"{name}: observed={results_all[name]['observed']:.4f}, p={results_all[name]['p_value']:.4f}")

In [None]:
# Plot all metrics together
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for idx, (name, results) in enumerate(results_all.items()):
    ax = axes[idx]
    
    # Histogram
    ax.hist(results['permuted_values'], bins=40, alpha=0.7, 
            color='skyblue', edgecolor='black', density=True)
    
    # Observed value
    ax.axvline(results['observed'], color='red', linestyle='--', 
               linewidth=2, label=f"Observed: {results['observed']:.4f}")
    
    # CI
    ax.axvline(results['ci_lower'], color='orange', linestyle=':', linewidth=1.5, alpha=0.7)
    ax.axvline(results['ci_upper'], color='orange', linestyle=':', linewidth=1.5, alpha=0.7)
    
    # Annotations
    significance = "***" if results['p_value'] < 0.001 else "**" if results['p_value'] < 0.01 else "*" if results['p_value'] < 0.05 else "ns"
    ax.text(0.02, 0.98, f"p = {results['p_value']:.4f} {significance}",
            transform=ax.transAxes, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    ax.set_xlabel(f'{name} Score', fontsize=11)
    ax.set_ylabel('Density', fontsize=11)
    ax.set_title(name, fontsize=13, fontweight='bold')
    ax.legend(loc='upper right', fontsize=9)
    ax.grid(True, alpha=0.3)

plt.suptitle('Permutation Test Results for All Metrics', fontsize=15, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

## 5. Adaptive Permutation Test

In [None]:
# Run adaptive permutation test
adaptive_results = adaptive_permutation_test(
    perf_with_bias, const_with_bias, compute_psi,
    max_permutations=5000,
    min_permutations=100,
    precision=0.01,
    random_seed=42,
    n_jobs=-1,
    verbose=1
)

print(f"\nAdaptive Permutation Test:")
print(f"  Converged: {adaptive_results['converged']}")
print(f"  Permutations used: {adaptive_results['n_permutations']} / {adaptive_results['max_permutations']}")
print(f"  p-value: {adaptive_results['p_value']:.4f}")
print(f"  Observed: {adaptive_results['observed']:.4f}")

## 6. Generate Model Card

In [None]:
def generate_model_card(perf_matrix, const_matrix, results_dict, algorithm_names=None):
    """
    Generate a model card summarizing bias detection results.
    """
    T, K = perf_matrix.shape
    _, p = const_matrix.shape
    
    if algorithm_names is None:
        algorithm_names = [f"Algorithm_{i+1}" for i in range(K)]
    
    card = f"""
# Bias Detection Model Card

## Evaluation Metadata
- **Time Periods**: {T}
- **Algorithms Evaluated**: {K}
- **Constraint Types**: {p}
- **Algorithm Names**: {', '.join(algorithm_names)}

## Statistical Testing
- **Method**: Permutation Test
- **Permutations**: {results_dict['PSI']['n_permutations']}
- **Significance Level**: Œ± = 0.05

## Results Summary

### PSI (Parameter Stability Index)
- **Observed**: {results_dict['PSI']['observed']:.4f}
- **95% CI**: [{results_dict['PSI']['ci_lower']:.4f}, {results_dict['PSI']['ci_upper']:.4f}]
- **p-value**: {results_dict['PSI']['p_value']:.4f}
- **Interpretation**: {'‚ö†Ô∏è UNSTABLE' if results_dict['PSI']['p_value'] < 0.05 else '‚úÖ STABLE'}

### CCS (Constraint Consistency Score)
- **Observed**: {results_dict['CCS']['observed']:.4f}
- **95% CI**: [{results_dict['CCS']['ci_lower']:.4f}, {results_dict['CCS']['ci_upper']:.4f}]
- **p-value**: {results_dict['CCS']['p_value']:.4f}
- **Interpretation**: {'‚ö†Ô∏è INCONSISTENT' if results_dict['CCS']['p_value'] < 0.05 else '‚úÖ CONSISTENT'}

### œÅ_PC (Performance-Constraint Correlation)
- **Observed**: {results_dict['œÅ_PC']['observed']:.4f}
- **95% CI**: [{results_dict['œÅ_PC']['ci_lower']:.4f}, {results_dict['œÅ_PC']['ci_upper']:.4f}]
- **p-value**: {results_dict['œÅ_PC']['p_value']:.4f}
- **Interpretation**: {'‚ö†Ô∏è CORRELATED' if results_dict['œÅ_PC']['p_value'] < 0.05 else '‚úÖ INDEPENDENT'}

## Overall Assessment

{'üö® **BIAS DETECTED**: Multiple indicators suggest circular reasoning bias in the evaluation process.' if sum(r['p_value'] < 0.05 for r in results_dict.values()) >= 2 else '‚úÖ **NO BIAS DETECTED**: Evaluation appears free from circular reasoning bias.'}

## Recommendations

{'- Review evaluation methodology for potential circular dependencies\n- Consider independent validation dataset\n- Audit constraint specification process' if sum(r['p_value'] < 0.05 for r in results_dict.values()) >= 2 else '- Continue current evaluation practices\n- Maintain documentation of methodology\n- Periodic re-evaluation recommended'}

---
*Generated by Sleuth - Circular Bias Detector*
"""
    return card

# Generate and display model card
model_card = generate_model_card(
    perf_with_bias, const_with_bias, results_all,
    algorithm_names=[f"Model_{chr(65+i)}" for i in range(perf_with_bias.shape[1])]
)

print(model_card)

## 7. Save Results

In [None]:
# Save model card to file
with open('bias_detection_model_card.md', 'w') as f:
    f.write(model_card)

print("Model card saved to: bias_detection_model_card.md")

# Save figures
fig1.savefig('permutation_no_bias.png', dpi=300, bbox_inches='tight')
fig2.savefig('permutation_with_bias.png', dpi=300, bbox_inches='tight')

print("Figures saved:")
print("  - permutation_no_bias.png")
print("  - permutation_with_bias.png")

## 8. Interactive Exploration

In [None]:
# Create interactive plot (requires plotly)
try:
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    
    fig = make_subplots(
        rows=1, cols=3,
        subplot_titles=('PSI', 'CCS', 'œÅ_PC')
    )
    
    for idx, (name, results) in enumerate(results_all.items(), 1):
        # Histogram
        fig.add_trace(
            go.Histogram(
                x=results['permuted_values'],
                name=f'{name} Null',
                opacity=0.7,
                nbinsx=40,
                histnorm='probability density'
            ),
            row=1, col=idx
        )
        
        # Observed line
        fig.add_vline(
            x=results['observed'],
            line_dash="dash",
            line_color="red",
            annotation_text=f"Observed: {results['observed']:.3f}",
            row=1, col=idx
        )
    
    fig.update_layout(
        title_text="Interactive Permutation Test Results",
        showlegend=False,
        height=400
    )
    
    fig.show()
    
except ImportError:
    print("Plotly not installed. Install with: pip install plotly")

## Summary

This notebook demonstrated:
1. ‚úÖ Running permutation tests with parallel processing
2. ‚úÖ Visualizing null distributions and observed values
3. ‚úÖ Comparing multiple bias detection metrics
4. ‚úÖ Using adaptive permutation testing for efficiency
5. ‚úÖ Generating model cards for audit trails

For more information, see the [documentation](https://github.com/hongping-zh/circular-bias-detection).