# Batch Processing & Statistics

Run multiple simulations and analyze aggregate statistics.

## What You'll Learn
- How to run 100+ simulations efficiently
- How to compute mean, std, min, max statistics
- How to save and load results

## Setup: Configuration

In [None]:
from engine.simulator.batch import BatchSimulator
from engine.domain.config import HappyGeneConfig, DamageProfile, KineticsConfig
import statistics

# Configure simulation
damage_profile = DamageProfile(dose_gy=3.0, population_size=1000)
kinetics = KineticsConfig(
    recognition_rate=0.1,
    repair_rate=0.05,
    misrepair_rate=0.01,
    recovery_rate=0.02
)
config = HappyGeneConfig(damage_profile=damage_profile, kinetics=kinetics)

print("✓ Configuration ready")

## Run Batch Simulations

Execute 100 independent simulations with the same configuration.

In [None]:
# Create batch simulator
batch = BatchSimulator(config)

# Run 100 simulations
print("Running 100 simulations...")
results = batch.run_batch(num_runs=100)

print(f"✓ Complete! Got {len(results)} results")

## Compute Statistics

In [None]:
# Compute aggregate statistics
stats = BatchSimulator.compute_statistics(results)

print("\n=== Repair Time Statistics ===")
print(f"Mean:   {stats['mean_repair_time']:.4f} seconds")
print(f"Std:    {stats['std_repair_time']:.4f} seconds")
print(f"Min:    {stats['min_repair_time']:.4f} seconds")
print(f"Max:    {stats['max_repair_time']:.4f} seconds")

print("\n=== Repair Count Statistics ===")
print(f"Mean:   {stats['mean_repair_count']:.1f} lesions")
print(f"Std:    {stats['std_repair_count']:.1f} lesions")
print(f"Runs:   {int(stats['num_runs'])}")

## Explore Individual Results

In [None]:
# Show first 5 results
print("First 5 runs:")
print("-" * 60)
print(f"{'Run':<4} {'Time(s)':<10} {'Repairs':<10} {'Status':<10}")
print("-" * 60)

for i, result in enumerate(results[:5]):
    print(f"{result['run_id']:<4} {result['completion_time']:<10.4f} {result['final_repair_count']:<10} {result['status']:<10}")

# Find extremes
fastest = min(results, key=lambda r: r['completion_time'])
slowest = max(results, key=lambda r: r['completion_time'])
most_repaired = max(results, key=lambda r: r['final_repair_count'])
least_repaired = min(results, key=lambda r: r['final_repair_count'])

print("\nExtremes:")
print(f"  Fastest: {fastest['completion_time']:.4f}s (run {fastest['run_id']})")
print(f"  Slowest: {slowest['completion_time']:.4f}s (run {slowest['run_id']})")
print(f"  Most repairs: {most_repaired['final_repair_count']} (run {most_repaired['run_id']})")
print(f"  Least repairs: {least_repaired['final_repair_count']} (run {least_repaired['run_id']})")

## Save Results

In [None]:
import tempfile
from pathlib import Path

# Create temp file for demo
output_file = Path(tempfile.gettempdir()) / "batch_results.h5"

# Save results
batch.save_results(results, output_file)
print(f"✓ Saved to {output_file}")
print(f"  File size: {output_file.stat().st_size / 1024:.1f} KB")

## Load Results

In [None]:
# Load saved results
loaded = BatchSimulator.load_results(output_file)
print(f"✓ Loaded {len(loaded)} results")
print(f"  First run ID: {loaded[0]['run_id']}")
print(f"  Last run ID: {loaded[-1]['run_id']}")

# Verify statistics match
stats_loaded = BatchSimulator.compute_statistics(loaded)
print(f"\n  Mean repairs (original): {stats['mean_repair_count']:.1f}")
print(f"  Mean repairs (loaded):   {stats_loaded['mean_repair_count']:.1f}")
print(f"  ✓ Match: {abs(stats['mean_repair_count'] - stats_loaded['mean_repair_count']) < 0.01}")

## Statistical Analysis

In [None]:
# Extract repair counts
repair_counts = [r['final_repair_count'] for r in results]

# Calculate additional statistics
median = statistics.median(repair_counts)
cv = stats['std_repair_count'] / stats['mean_repair_count']  # Coefficient of variation

print("\nAdditional Statistics:")
print(f"  Median repairs: {median:.0f}")
print(f"  Coefficient of variation: {cv:.3f}")
print(f"  Range: {min(repair_counts)} - {max(repair_counts)}")
print(f"  95% CI: [{stats['mean_repair_count'] - 1.96*stats['std_repair_count']:.1f}, {stats['mean_repair_count'] + 1.96*stats['std_repair_count']:.1f}]")

## Next Steps

1. **Visualize results**: See notebook 03 for interactive dashboards
2. **Parameter sensitivity**: See notebook 05 to understand how parameters affect outcomes
3. **Export to COPASI**: See notebook 04 for SBML workflow