# Association Parameter Optimizer Demo

This notebook demonstrates how to use the `AssociationOptimizer` to find optimal association parameters for your data.

The optimizer automatically tests different association methods and parameters, comparing results to ground truth data to find the best configuration.

## Setup

In [None]:
import sys
import tempfile
from pathlib import Path
import pandas as pd
import numpy as np
import json

# Import the optimizer
from neutron_event_analyzer.optimizer import (
    AssociationOptimizer,
    AssociationMetrics,
    optimize_for_synthetic_data
)

# Import synthetic data generation functions
sys.path.insert(0, str(Path('../tests')))
from test_association_validation import (
    create_synthetic_photon_data,
    create_synthetic_event_data,
    write_csv_files
)

## Example 1: Basic Grid Search

Let's start with a simple example: find the best association method and parameters for well-separated events.

In [None]:
# Define synthetic events
event_configs = [
    {
        'event_id': 0,
        'center_x': 50.0,
        'center_y': 50.0,
        't_ns': 1000.0,
        'n_photons': 8,
        'photon_spread_spatial': 15.0,  # Moderate spatial spread
        'photon_spread_temporal': 50.0   # Moderate temporal spread
    },
    {
        'event_id': 1,
        'center_x': 150.0,
        'center_y': 150.0,
        't_ns': 10000.0,
        'n_photons': 8,
        'photon_spread_spatial': 15.0,
        'photon_spread_temporal': 50.0
    },
    {
        'event_id': 2,
        'center_x': 200.0,
        'center_y': 100.0,
        't_ns': 20000.0,
        'n_photons': 8,
        'photon_spread_spatial': 15.0,
        'photon_spread_temporal': 50.0
    }
]

# Generate synthetic data
photon_df = create_synthetic_photon_data(event_configs)
event_df = create_synthetic_event_data(event_configs)

print(f"Created {len(event_configs)} events with {len(photon_df)} photons total")
print(f"\nPhoton data preview:")
print(photon_df.head())
print(f"\nEvent data preview:")
print(event_df)

In [None]:
# Create temporary directory for data
tmpdir = tempfile.mkdtemp()
data_path = Path(tmpdir) / "data"
data_path.mkdir(parents=True, exist_ok=True)

# Write CSV files
write_csv_files(None, photon_df, event_df, data_path, file_index=0)

print(f"Synthetic data written to: {data_path}")
print(f"\nFiles created:")
for file in sorted(data_path.glob('*')):
    print(f"  - {file.name}")

In [None]:
# Initialize optimizer
optimizer = AssociationOptimizer(
    synthetic_data_dir=str(data_path),
    ground_truth_photons=photon_df,
    ground_truth_events=event_df,
    verbosity=1  # Show progress
)

# Run grid search
best = optimizer.grid_search(
    methods=['simple', 'kdtree'],
    spatial_thresholds_px=[10.0, 20.0, 30.0, 50.0],
    temporal_thresholds_ns=[50.0, 100.0, 200.0, 500.0],
    metric='f1_score'
)

print("\n" + "="*70)
print("Best Parameters Found:")
print("="*70)
print(best)

## Example 2: Visualize Results

Let's examine all the results to understand the parameter space.

In [None]:
# Convert results to DataFrame for easy analysis
results_df = pd.DataFrame([r.to_dict() for r in optimizer.results])

print(f"Tested {len(results_df)} parameter combinations\n")
print("Top 10 configurations by F1 score:")
print(results_df.sort_values('f1_score', ascending=False)[[
    'method', 'spatial_threshold_px', 'temporal_threshold_ns', 
    'f1_score', 'accuracy', 'association_rate'
]].head(10).to_string(index=False))

In [None]:
# Analyze by method
print("\nPerformance by method:")
print("="*70)
for method in results_df['method'].unique():
    method_results = results_df[results_df['method'] == method]
    best_for_method = method_results.loc[method_results['f1_score'].idxmax()]
    print(f"\n{method}:")
    print(f"  Best F1 Score: {best_for_method['f1_score']:.4f}")
    print(f"  Best Spatial: {best_for_method['spatial_threshold_px']:.1f} px")
    print(f"  Best Temporal: {best_for_method['temporal_threshold_ns']:.1f} ns")
    print(f"  Accuracy: {best_for_method['accuracy']:.2%}")
    print(f"  Association Rate: {best_for_method['association_rate']:.2%}")

## Example 3: Save Best Parameters

Export the best parameters in a format that can be used with your analysis pipeline.

In [None]:
# Get best parameters in empir format
best_params = optimizer.get_best_parameters_json()

print("Best parameters in empir/NEA format:")
print(json.dumps(best_params, indent=2))

# Save to file
output_path = Path(tmpdir) / "optimized_parameters.json"
optimizer.save_best_parameters(str(output_path))

print(f"\nParameters saved to: {output_path}")

## Example 4: Recursive Optimization

If you already have good starting parameters, you can fine-tune them using recursive optimization.

In [None]:
# Start from the grid search result and fine-tune
print("Starting recursive optimization from grid search result...\n")

best_recursive = optimizer.recursive_optimize(
    method='simple',
    initial_spatial_px=best.spatial_threshold_px,
    initial_temporal_ns=best.temporal_threshold_ns,
    max_iterations=5,
    convergence_threshold=0.001,
    metric='f1_score'
)

print("\n" + "="*70)
print("Comparison: Grid Search vs Recursive Optimization")
print("="*70)
print(f"\nGrid Search:")
print(f"  Spatial: {best.spatial_threshold_px:.2f} px")
print(f"  Temporal: {best.temporal_threshold_ns:.2f} ns")
print(f"  F1 Score: {best.f1_score:.4f}")
print(f"\nRecursive Optimization:")
print(f"  Spatial: {best_recursive.spatial_threshold_px:.2f} px")
print(f"  Temporal: {best_recursive.temporal_threshold_ns:.2f} ns")
print(f"  F1 Score: {best_recursive.f1_score:.4f}")
print(f"\nImprovement: {(best_recursive.f1_score - best.f1_score):.6f}")

## Example 5: Optimize for Different Scenarios

Let's test optimization for different types of clustering patterns.

In [None]:
scenarios = {
    'Tight Clustering': {
        'event_id': 0,
        'center_x': 100.0,
        'center_y': 100.0,
        't_ns': 1000.0,
        'n_photons': 10,
        'photon_spread_spatial': 5.0,   # Very tight
        'photon_spread_temporal': 20.0
    },
    'Loose Clustering': {
        'event_id': 0,
        'center_x': 100.0,
        'center_y': 100.0,
        't_ns': 1000.0,
        'n_photons': 10,
        'photon_spread_spatial': 50.0,  # Very loose
        'photon_spread_temporal': 200.0
    },
    'Moderate Clustering': {
        'event_id': 0,
        'center_x': 100.0,
        'center_y': 100.0,
        't_ns': 1000.0,
        'n_photons': 10,
        'photon_spread_spatial': 20.0,
        'photon_spread_temporal': 80.0
    }
}

scenario_results = {}

for scenario_name, config in scenarios.items():
    print(f"\nOptimizing for: {scenario_name}")
    print("="*70)
    
    # Create synthetic data
    photon_df_scenario = create_synthetic_photon_data([config])
    event_df_scenario = create_synthetic_event_data([config])
    
    # Write to temporary directory
    scenario_path = Path(tmpdir) / scenario_name.replace(' ', '_')
    scenario_path.mkdir(exist_ok=True)
    write_csv_files(None, photon_df_scenario, event_df_scenario, scenario_path, file_index=0)
    
    # Optimize
    scenario_optimizer = AssociationOptimizer(
        synthetic_data_dir=str(scenario_path),
        ground_truth_photons=photon_df_scenario,
        ground_truth_events=event_df_scenario,
        verbosity=0  # Silent for comparison
    )
    
    best_scenario = scenario_optimizer.grid_search(
        methods=['simple'],
        spatial_thresholds_px=[5.0, 10.0, 20.0, 50.0, 100.0],
        temporal_thresholds_ns=[20.0, 50.0, 100.0, 200.0, 500.0],
        metric='f1_score'
    )
    
    scenario_results[scenario_name] = best_scenario
    
    print(f"  Best Spatial: {best_scenario.spatial_threshold_px:.1f} px")
    print(f"  Best Temporal: {best_scenario.temporal_threshold_ns:.1f} ns")
    print(f"  F1 Score: {best_scenario.f1_score:.4f}")
    print(f"  Association Rate: {best_scenario.association_rate:.2%}")

In [None]:
# Compare scenarios
print("\nScenario Comparison:")
print("="*90)
print(f"{'Scenario':<20} {'Spatial (px)':<15} {'Temporal (ns)':<15} {'F1 Score':<12} {'Assoc Rate':<12}")
print("-"*90)

for scenario_name, result in scenario_results.items():
    print(f"{scenario_name:<20} {result.spatial_threshold_px:<15.1f} "
          f"{result.temporal_threshold_ns:<15.1f} "
          f"{result.f1_score:<12.4f} "
          f"{result.association_rate:<12.2%}")

## Example 6: Using Convenience Function

The `optimize_for_synthetic_data` function provides a quick way to run optimization and save results.

In [None]:
# Create new synthetic data
quick_configs = [
    {
        'event_id': i,
        'center_x': 50.0 + i * 80.0,
        'center_y': 50.0 + i * 60.0,
        't_ns': 1000.0 + i * 5000.0,
        'n_photons': 7,
        'photon_spread_spatial': 18.0,
        'photon_spread_temporal': 60.0
    }
    for i in range(3)
]

quick_photon_df = create_synthetic_photon_data(quick_configs)
quick_event_df = create_synthetic_event_data(quick_configs)

quick_data_path = Path(tmpdir) / "quick_test"
quick_output_path = Path(tmpdir) / "quick_results"
quick_data_path.mkdir(exist_ok=True)

write_csv_files(None, quick_photon_df, quick_event_df, quick_data_path, file_index=0)

# One-line optimization with file output
best_quick = optimize_for_synthetic_data(
    synthetic_data_dir=str(quick_data_path),
    ground_truth_photons=quick_photon_df,
    ground_truth_events=quick_event_df,
    mode='grid',
    output_dir=str(quick_output_path),
    verbosity=1,
    methods=['simple'],
    spatial_thresholds_px=[10.0, 20.0, 30.0],
    temporal_thresholds_ns=[50.0, 100.0, 200.0]
)

print(f"\nResults saved to: {quick_output_path}")
print("Files created:")
for file in sorted(quick_output_path.glob('*')):
    print(f"  - {file.name}")

## Summary

This notebook demonstrated:

1. **Grid Search**: Systematically test parameter combinations
2. **Result Analysis**: Examine and compare all tested configurations
3. **Parameter Export**: Save optimal parameters for use in analysis
4. **Recursive Optimization**: Fine-tune parameters from a starting point
5. **Scenario Comparison**: Test different clustering patterns
6. **Convenience Function**: Quick one-line optimization with file output

### Next Steps

- Create synthetic data that matches your real data characteristics
- Run optimization to find best parameters
- Export parameters and use them in your analysis pipeline
- Validate on real data

### Additional Resources

- Full documentation: `docs/OPTIMIZER.md`
- Example scripts: `notebooks/example_optimizer_usage.py`
- Tests: `tests/test_association_optimizer.py`

In [None]:
# Cleanup temporary directory
import shutil
shutil.rmtree(tmpdir)
print(f"Cleaned up temporary directory: {tmpdir}")