# Detection Runs Analysis

Comprehensive analysis and interpretation of all detection runs from `malca detect`.

This notebook:
1. Discovers all runs in `output/runs/`
2. Reads `run_params.json` to extract mag bin and parameters
3. Combines results across runs
4. Provides detailed statistics and visualizations
5. Identifies top candidates for follow-up

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from datetime import datetime

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

# Larger default figure size
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100

## 1. Discover Detection Runs

In [None]:
def find_repo_root(start: Path) -> Path:
    """Find repository root by looking for pyproject.toml."""
    for p in (start, *start.parents):
        if (p / "pyproject.toml").exists() and (p / "malca").is_dir():
            return p
    return start

REPO_ROOT = find_repo_root(Path.cwd().resolve())
RUNS_DIR = REPO_ROOT / "output" / "runs"

print(f"Repository root: {REPO_ROOT}")
print(f"Runs directory: {RUNS_DIR}")
print(f"Runs directory exists: {RUNS_DIR.exists()}")

In [None]:
def load_run_metadata(run_dir: Path) -> dict:
    """Load run parameters and metadata from a run directory."""
    metadata = {
        "run_id": run_dir.name,
        "run_path": str(run_dir),
        "timestamp": None,
        "mag_bin": None,
        "n_sources": 0,
        "params": {},
    }
    
    # Parse timestamp from directory name (format: YYYYMMDD_HHMMSS)
    try:
        ts_str = run_dir.name
        metadata["timestamp"] = datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
    except ValueError:
        pass
    
    # Load run_params.json
    params_file = run_dir / "run_params.json"
    if params_file.exists():
        with open(params_file) as f:
            params = json.load(f)
            metadata["params"] = params
            metadata["mag_bin"] = params.get("mag_bin", params.get("mag_bins", ["unknown"]))
            if isinstance(metadata["mag_bin"], list):
                metadata["mag_bin"] = metadata["mag_bin"][0] if metadata["mag_bin"] else "unknown"
    
    # Check for results
    results_dir = run_dir / "results"
    if results_dir.exists():
        result_files = list(results_dir.glob("*.csv")) + list(results_dir.glob("*.parquet"))
        metadata["result_files"] = [f.name for f in result_files]
    else:
        metadata["result_files"] = []
    
    return metadata


def discover_runs(runs_dir: Path) -> pd.DataFrame:
    """Discover all detection runs and return summary DataFrame."""
    runs = []
    
    for run_dir in sorted(runs_dir.iterdir()):
        if not run_dir.is_dir():
            continue
        
        try:
            metadata = load_run_metadata(run_dir)
            runs.append(metadata)
        except Exception as e:
            print(f"Warning: Failed to load {run_dir}: {e}")
    
    if not runs:
        print("No runs found!")
        return pd.DataFrame()
    
    df = pd.DataFrame(runs)
    return df


runs_df = discover_runs(RUNS_DIR)
print(f"\nDiscovered {len(runs_df)} detection runs:")
runs_df[["run_id", "timestamp", "mag_bin", "result_files"]]

## 2. Load and Combine Results

In [None]:
def load_run_results(run_path: str) -> pd.DataFrame:
    """Load detection results from a run directory."""
    run_dir = Path(run_path)
    results_dir = run_dir / "results"
    
    if not results_dir.exists():
        return pd.DataFrame()
    
    # Try different result file patterns
    result_files = (
        list(results_dir.glob("lc_events_results_filtered.csv")) +
        list(results_dir.glob("*_filtered.csv")) +
        list(results_dir.glob("lc_events_results.csv")) +
        list(results_dir.glob("*.csv"))
    )
    
    if not result_files:
        # Try parquet
        result_files = list(results_dir.glob("*.parquet"))
    
    if not result_files:
        return pd.DataFrame()
    
    result_file = result_files[0]
    
    if result_file.suffix == ".parquet":
        df = pd.read_parquet(result_file)
    else:
        df = pd.read_csv(result_file)
    
    df["run_id"] = run_dir.name
    df["source_file"] = result_file.name
    
    return df


# Load all results
all_results = []
for _, run in runs_df.iterrows():
    if run["result_files"]:
        df = load_run_results(run["run_path"])
        if not df.empty:
            df["mag_bin"] = run["mag_bin"]
            all_results.append(df)
            print(f"Loaded {len(df):,} rows from {run['run_id']} (mag_bin={run['mag_bin']})")

if all_results:
    combined_df = pd.concat(all_results, ignore_index=True)
    print(f"\n=== Combined Results ===")
    print(f"Total rows: {len(combined_df):,}")
    print(f"Unique runs: {combined_df['run_id'].nunique()}")
    print(f"Unique mag bins: {combined_df['mag_bin'].nunique()}")
else:
    combined_df = pd.DataFrame()
    print("No results loaded!")

In [None]:
# Quick overview of columns
if not combined_df.empty:
    print("Available columns:")
    for i, col in enumerate(combined_df.columns):
        print(f"  {i:2d}. {col}")
else:
    print("No data available")

## 3. Detection Summary Statistics

In [None]:
if not combined_df.empty:
    print("=" * 60)
    print("DETECTION SUMMARY")
    print("=" * 60)
    print(f"Total light curves analyzed: {len(combined_df):,}")
    print()
    
    if 'dip_significant' in combined_df.columns:
        n_dip = combined_df['dip_significant'].sum()
        n_jump = combined_df['jump_significant'].sum()
        n_either = (combined_df['dip_significant'] | combined_df['jump_significant']).sum()
        n_both = (combined_df['dip_significant'] & combined_df['jump_significant']).sum()
        
        print(f"Significant dip detections:   {n_dip:6,} ({n_dip/len(combined_df)*100:5.2f}%)")
        print(f"Significant jump detections:  {n_jump:6,} ({n_jump/len(combined_df)*100:5.2f}%)")
        print(f"Either dip or jump:           {n_either:6,} ({n_either/len(combined_df)*100:5.2f}%)")
        print(f"Both dip and jump:            {n_both:6,} ({n_both/len(combined_df)*100:5.2f}%)")
    
    print()
    print("=== Per Magnitude Bin ===")
    for mag_bin in sorted(combined_df['mag_bin'].unique()):
        subset = combined_df[combined_df['mag_bin'] == mag_bin]
        if 'dip_significant' in subset.columns:
            n_sig = (subset['dip_significant'] | subset['jump_significant']).sum()
            print(f"  {mag_bin}: {len(subset):,} sources, {n_sig:,} significant ({n_sig/len(subset)*100:.2f}%)")

In [None]:
# Post-filter statistics (if available)
filter_cols = [c for c in combined_df.columns if c.startswith('failed_')]

if filter_cols:
    print("=== Post-Filter Failure Rates ===")
    for col in filter_cols:
        n_failed = combined_df[col].sum()
        filter_name = col.replace('failed_', '')
        print(f"  {filter_name:30s}: {n_failed:6,} / {len(combined_df):,} ({n_failed/len(combined_df)*100:.1f}%)")
    
    # Count passing all filters
    all_pass = ~combined_df[filter_cols].any(axis=1)
    print(f"\n  Passed ALL filters: {all_pass.sum():,} ({all_pass.sum()/len(combined_df)*100:.2f}%)")
else:
    print("No post-filter columns found (run with --run-post-filter to get these)")

## 4. Light Curve Quality Metrics

In [None]:
if not combined_df.empty and 'n_points' in combined_df.columns:
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # Number of points
    axes[0, 0].hist(combined_df['n_points'], bins=50, edgecolor='black', alpha=0.7)
    axes[0, 0].set_xlabel('Number of Points')
    axes[0, 0].set_ylabel('Count')
    axes[0, 0].set_title('Distribution of Light Curve Length')
    axes[0, 0].axvline(combined_df['n_points'].median(), color='red', linestyle='--', 
                       label=f'Median: {combined_df["n_points"].median():.0f}')
    axes[0, 0].legend()
    
    # Time span
    if 'jd_first' in combined_df.columns and 'jd_last' in combined_df.columns:
        time_span = combined_df['jd_last'] - combined_df['jd_first']
        axes[0, 1].hist(time_span, bins=50, edgecolor='black', alpha=0.7)
        axes[0, 1].set_xlabel('Time Span (days)')
        axes[0, 1].set_ylabel('Count')
        axes[0, 1].set_title('Distribution of Observing Baseline')
        axes[0, 1].axvline(time_span.median(), color='red', linestyle='--', 
                           label=f'Median: {time_span.median():.0f} days')
        axes[0, 1].legend()
    
    # Cadence
    if 'cadence_median_days' in combined_df.columns:
        axes[0, 2].hist(combined_df['cadence_median_days'], bins=50, edgecolor='black', alpha=0.7)
        axes[0, 2].set_xlabel('Median Cadence (days)')
        axes[0, 2].set_ylabel('Count')
        axes[0, 2].set_title('Distribution of Cadence')
        axes[0, 2].axvline(combined_df['cadence_median_days'].median(), color='red', linestyle='--')
    
    # Number of cameras
    if 'n_cameras' in combined_df.columns:
        axes[1, 0].hist(combined_df['n_cameras'], bins=range(0, int(combined_df['n_cameras'].max())+2), 
                        edgecolor='black', alpha=0.7)
        axes[1, 0].set_xlabel('Number of Cameras')
        axes[1, 0].set_ylabel('Count')
        axes[1, 0].set_title('Camera Coverage')
    
    # Points vs time span
    if 'jd_first' in combined_df.columns:
        axes[1, 1].scatter(combined_df['n_points'], time_span, alpha=0.1, s=5)
        axes[1, 1].set_xlabel('Number of Points')
        axes[1, 1].set_ylabel('Time Span (days)')
        axes[1, 1].set_title('Points vs Time Span')
    
    # By mag bin
    mag_bins = sorted(combined_df['mag_bin'].unique())
    colors = plt.cm.viridis(np.linspace(0, 1, len(mag_bins)))
    for i, mag_bin in enumerate(mag_bins):
        subset = combined_df[combined_df['mag_bin'] == mag_bin]
        axes[1, 2].hist(subset['n_points'], bins=50, alpha=0.5, label=mag_bin, color=colors[i])
    axes[1, 2].set_xlabel('Number of Points')
    axes[1, 2].set_ylabel('Count')
    axes[1, 2].set_title('Points by Magnitude Bin')
    axes[1, 2].legend()
    
    plt.tight_layout()
    plt.show()

## 5. Detection Metrics

In [None]:
if not combined_df.empty and 'dip_bayes_factor' in combined_df.columns:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Bayes Factor distributions
    dip_bf = combined_df['dip_bayes_factor'].replace([np.inf, -np.inf], np.nan).dropna()
    jump_bf = combined_df['jump_bayes_factor'].replace([np.inf, -np.inf], np.nan).dropna()
    
    # Only plot positive BF values on log scale
    dip_bf_pos = dip_bf[dip_bf > 0]
    jump_bf_pos = jump_bf[jump_bf > 0]
    
    axes[0, 0].hist(np.log10(dip_bf_pos), bins=50, alpha=0.6, label='Dip', edgecolor='black')
    axes[0, 0].hist(np.log10(jump_bf_pos), bins=50, alpha=0.6, label='Jump', edgecolor='black')
    axes[0, 0].set_xlabel('log₁₀(Bayes Factor)')
    axes[0, 0].set_ylabel('Count')
    axes[0, 0].set_title('Bayes Factor Distribution')
    axes[0, 0].axvline(np.log10(10), color='red', linestyle='--', label='BF=10')
    axes[0, 0].axvline(np.log10(100), color='orange', linestyle='--', label='BF=100')
    axes[0, 0].legend()
    
    # Event probability
    if 'dip_max_event_prob' in combined_df.columns:
        axes[0, 1].hist(combined_df['dip_max_event_prob'].dropna(), bins=50, alpha=0.6, label='Dip')
        axes[0, 1].hist(combined_df['jump_max_event_prob'].dropna(), bins=50, alpha=0.6, label='Jump')
        axes[0, 1].set_xlabel('Max Event Probability')
        axes[0, 1].set_ylabel('Count')
        axes[0, 1].set_title('Event Probability Distribution')
        axes[0, 1].axvline(0.5, color='red', linestyle='--', label='p=0.5')
        axes[0, 1].legend()
    
    # Run counts
    if 'dip_count' in combined_df.columns:
        max_runs = max(combined_df['dip_count'].max(), combined_df['jump_count'].max())
        bins = range(0, min(int(max_runs) + 2, 50))
        axes[1, 0].hist(combined_df['dip_count'], bins=bins, alpha=0.6, label='Dip', edgecolor='black')
        axes[1, 0].hist(combined_df['jump_count'], bins=bins, alpha=0.6, label='Jump', edgecolor='black')
        axes[1, 0].set_xlabel('Event Count')
        axes[1, 0].set_ylabel('Count')
        axes[1, 0].set_title('Number of Events per Light Curve')
        axes[1, 0].legend()
    
    # Scatter: BF vs event count
    if 'dip_count' in combined_df.columns:
        sig_mask = combined_df['dip_significant'] | combined_df['jump_significant']
        axes[1, 1].scatter(
            combined_df.loc[~sig_mask, 'dip_count'],
            np.log10(combined_df.loc[~sig_mask, 'dip_bayes_factor'].clip(lower=0.1)),
            alpha=0.1, s=5, label='Not significant', color='gray'
        )
        axes[1, 1].scatter(
            combined_df.loc[sig_mask, 'dip_count'],
            np.log10(combined_df.loc[sig_mask, 'dip_bayes_factor'].clip(lower=0.1)),
            alpha=0.5, s=20, label='Significant', color='red'
        )
        axes[1, 1].set_xlabel('Dip Count')
        axes[1, 1].set_ylabel('log₁₀(Dip Bayes Factor)')
        axes[1, 1].set_title('Detection Strength vs Event Count')
        axes[1, 1].legend()
    
    plt.tight_layout()
    plt.show()

## 6. Morphology Analysis

In [None]:
if not combined_df.empty and 'dip_best_morph' in combined_df.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Dip morphologies (exclude 'none')
    dip_morphs = combined_df[combined_df['dip_best_morph'] != 'none']['dip_best_morph'].value_counts()
    if not dip_morphs.empty:
        colors = plt.cm.Set2(np.linspace(0, 1, len(dip_morphs)))
        bars = axes[0].bar(range(len(dip_morphs)), dip_morphs.values, color=colors, edgecolor='black')
        axes[0].set_xticks(range(len(dip_morphs)))
        axes[0].set_xticklabels(dip_morphs.index, rotation=45, ha='right')
        axes[0].set_ylabel('Count')
        axes[0].set_title(f'Dip Morphologies (n={dip_morphs.sum():,})')
        
        # Add count labels
        for bar, count in zip(bars, dip_morphs.values):
            axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height(), 
                        f'{count:,}', ha='center', va='bottom', fontsize=9)
    
    # Jump morphologies
    jump_morphs = combined_df[combined_df['jump_best_morph'] != 'none']['jump_best_morph'].value_counts()
    if not jump_morphs.empty:
        colors = plt.cm.Set2(np.linspace(0, 1, len(jump_morphs)))
        bars = axes[1].bar(range(len(jump_morphs)), jump_morphs.values, color=colors, edgecolor='black')
        axes[1].set_xticks(range(len(jump_morphs)))
        axes[1].set_xticklabels(jump_morphs.index, rotation=45, ha='right')
        axes[1].set_ylabel('Count')
        axes[1].set_title(f'Jump Morphologies (n={jump_morphs.sum():,})')
        
        for bar, count in zip(bars, jump_morphs.values):
            axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                        f'{count:,}', ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary
    print("\n=== Morphology Summary ===")
    print("\nDip morphologies:")
    for morph, count in dip_morphs.items():
        print(f"  {morph:15s}: {count:6,} ({count/dip_morphs.sum()*100:5.1f}%)")
    
    print("\nJump morphologies:")
    for morph, count in jump_morphs.items():
        print(f"  {morph:15s}: {count:6,} ({count/jump_morphs.sum()*100:5.1f}%)")

## 7. Top Candidates

In [None]:
if not combined_df.empty and 'dip_bayes_factor' in combined_df.columns:
    # Create combined score
    combined_df['max_bayes_factor'] = combined_df[['dip_bayes_factor', 'jump_bayes_factor']].max(axis=1)
    
    # Filter to significant detections only
    if 'dip_significant' in combined_df.columns:
        significant = combined_df[combined_df['dip_significant'] | combined_df['jump_significant']].copy()
    else:
        significant = combined_df[combined_df['max_bayes_factor'] > 10].copy()
    
    print(f"=== Top 20 Candidates by Bayes Factor ===")
    print(f"(From {len(significant):,} significant detections)\n")
    
    # Select display columns
    display_cols = ['path', 'mag_bin', 'dip_bayes_factor', 'jump_bayes_factor', 
                    'dip_best_morph', 'jump_best_morph', 'dip_count', 'jump_count']
    display_cols = [c for c in display_cols if c in significant.columns]
    
    top_20 = significant.nlargest(20, 'max_bayes_factor')[display_cols]
    
    # Extract source ID from path
    if 'path' in top_20.columns:
        top_20['source_id'] = top_20['path'].str.extract(r'(\d+)')[0]
        top_20 = top_20.drop(columns=['path'])
    
    print(top_20.to_string(index=False))

In [None]:
# Candidates by morphology (exclude noise)
if not combined_df.empty and 'dip_best_morph' in combined_df.columns:
    print("\n=== Top Candidates by Morphology (Non-Noise) ===")
    
    for morph_type in ['gaussian', 'paczynski', 'exponential', 'linear']:
        dip_matches = combined_df[
            (combined_df['dip_best_morph'] == morph_type) & 
            (combined_df['dip_significant'] == True)
        ]
        jump_matches = combined_df[
            (combined_df['jump_best_morph'] == morph_type) & 
            (combined_df['jump_significant'] == True)
        ]
        
        matches = pd.concat([dip_matches, jump_matches]).drop_duplicates()
        
        if not matches.empty:
            print(f"\n{morph_type.upper()} morphology ({len(matches)} candidates):")
            top_5 = matches.nlargest(5, 'max_bayes_factor')
            for _, row in top_5.iterrows():
                source = row.get('path', 'unknown')
                if isinstance(source, str) and '/' in source:
                    source = source.split('/')[-1].replace('.csv', '').replace('.dat2', '')
                bf = row['max_bayes_factor']
                print(f"  • {source}: BF={bf:.1f}")

## 8. Comparison Across Magnitude Bins

In [None]:
if not combined_df.empty and combined_df['mag_bin'].nunique() > 1:
    print("=== Detection Rates by Magnitude Bin ===")
    
    summary_data = []
    
    for mag_bin in sorted(combined_df['mag_bin'].unique()):
        subset = combined_df[combined_df['mag_bin'] == mag_bin]
        n_total = len(subset)
        
        if 'dip_significant' in subset.columns:
            n_dip = subset['dip_significant'].sum()
            n_jump = subset['jump_significant'].sum()
            n_either = (subset['dip_significant'] | subset['jump_significant']).sum()
        else:
            n_dip = n_jump = n_either = 0
        
        summary_data.append({
            'mag_bin': mag_bin,
            'n_total': n_total,
            'n_dip': n_dip,
            'n_jump': n_jump,
            'n_either': n_either,
            'pct_dip': n_dip / n_total * 100 if n_total > 0 else 0,
            'pct_jump': n_jump / n_total * 100 if n_total > 0 else 0,
            'pct_either': n_either / n_total * 100 if n_total > 0 else 0,
        })
    
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))
    
    # Plot comparison
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    x = range(len(summary_df))
    width = 0.35
    
    axes[0].bar([i - width/2 for i in x], summary_df['n_dip'], width, label='Dips', color='#e74c3c')
    axes[0].bar([i + width/2 for i in x], summary_df['n_jump'], width, label='Jumps', color='#3498db')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(summary_df['mag_bin'], rotation=45, ha='right')
    axes[0].set_ylabel('Count')
    axes[0].set_title('Detection Counts by Magnitude Bin')
    axes[0].legend()
    
    axes[1].bar([i - width/2 for i in x], summary_df['pct_dip'], width, label='Dips', color='#e74c3c')
    axes[1].bar([i + width/2 for i in x], summary_df['pct_jump'], width, label='Jumps', color='#3498db')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(summary_df['mag_bin'], rotation=45, ha='right')
    axes[1].set_ylabel('Detection Rate (%)')
    axes[1].set_title('Detection Rates by Magnitude Bin')
    axes[1].legend()
    
    plt.tight_layout()
    plt.show()
else:
    print("Only one magnitude bin found - no comparison available.")

## 9. Filter Threshold Analysis

In [None]:
if not combined_df.empty and 'dip_bayes_factor' in combined_df.columns:
    print("=== Detection Counts at Different Thresholds ===")
    print()
    
    bf_thresholds = [1, 3, 10, 30, 100, 1000]
    print("Bayes Factor Thresholds:")
    for thresh in bf_thresholds:
        n_dip = (combined_df['dip_bayes_factor'] > thresh).sum()
        n_jump = (combined_df['jump_bayes_factor'] > thresh).sum()
        n_either = ((combined_df['dip_bayes_factor'] > thresh) | 
                    (combined_df['jump_bayes_factor'] > thresh)).sum()
        print(f"  BF > {thresh:4d}: dip={n_dip:5,} jump={n_jump:5,} either={n_either:5,}")
    
    if 'dip_max_event_prob' in combined_df.columns:
        print("\nEvent Probability Thresholds:")
        for thresh in [0.5, 0.7, 0.9, 0.95, 0.99]:
            n_dip = (combined_df['dip_max_event_prob'] > thresh).sum()
            n_jump = (combined_df['jump_max_event_prob'] > thresh).sum()
            print(f"  P > {thresh:.2f}: dip={n_dip:5,} jump={n_jump:5,}")
    
    if 'dip_count' in combined_df.columns:
        print("\nEvent Count Requirements:")
        for min_count in [1, 2, 3, 5, 10]:
            n_dip = (combined_df['dip_count'] >= min_count).sum()
            n_jump = (combined_df['jump_count'] >= min_count).sum()
            print(f"  >= {min_count:2d} events: dip={n_dip:5,} jump={n_jump:5,}")

## 10. Export Filtered Candidates

In [None]:
if not combined_df.empty:
    # Define filtering criteria
    criteria = (
        (combined_df['max_bayes_factor'] > 100) &  # Strong detection
        (
            (combined_df['dip_best_morph'].isin(['gaussian', 'exponential', 'linear', 'paczynski'])) |
            (combined_df['jump_best_morph'].isin(['gaussian', 'exponential', 'linear', 'paczynski']))
        )  # Non-noise morphology
    )
    
    high_quality = combined_df[criteria].copy()
    
    print(f"=== High-Quality Candidates ===")
    print(f"Criteria: BF > 100 AND non-noise morphology")
    print(f"Found: {len(high_quality):,} candidates")
    
    if not high_quality.empty:
        # Save to output
        output_path = REPO_ROOT / "output" / "high_quality_candidates.csv"
        high_quality.to_csv(output_path, index=False)
        print(f"\nSaved to: {output_path}")
        
        # Show top 10
        print("\nTop 10:")
        display_cols = ['path', 'mag_bin', 'max_bayes_factor', 'dip_best_morph', 'jump_best_morph']
        display_cols = [c for c in display_cols if c in high_quality.columns]
        print(high_quality.nlargest(10, 'max_bayes_factor')[display_cols].to_string(index=False))

## 11. Run Parameters Comparison

In [None]:
# Compare parameters across runs
if not runs_df.empty:
    print("=== Run Parameters ===")
    
    for _, run in runs_df.iterrows():
        params = run.get('params', {})
        if params:
            print(f"\n{run['run_id']} (mag_bin={run['mag_bin']}):")
            # Print key parameters
            key_params = [
                'logbf_threshold_dip', 'logbf_threshold_jump',
                'run_min_points', 'run_max_gap_days',
                'baseline_func', 'workers'
            ]
            for param in key_params:
                if param in params:
                    print(f"  {param}: {params[param]}")

## 12. Summary and Next Steps

In [None]:
if not combined_df.empty:
    print("=" * 60)
    print("ANALYSIS SUMMARY")
    print("=" * 60)
    print(f"\nAnalyzed {len(runs_df)} detection runs")
    print(f"Total light curves: {len(combined_df):,}")
    
    if 'dip_significant' in combined_df.columns:
        n_sig = (combined_df['dip_significant'] | combined_df['jump_significant']).sum()
        print(f"Significant detections: {n_sig:,} ({n_sig/len(combined_df)*100:.2f}%)")
    
    if 'max_bayes_factor' in combined_df.columns:
        print(f"\nTop detection strength: BF = {combined_df['max_bayes_factor'].max():,.1f}")
    
    # Morphology breakdown for significant detections
    if 'dip_best_morph' in combined_df.columns:
        sig_mask = combined_df['dip_significant'] | combined_df['jump_significant']
        sig_df = combined_df[sig_mask]
        
        dip_morphs = sig_df[sig_df['dip_significant']]['dip_best_morph'].value_counts()
        jump_morphs = sig_df[sig_df['jump_significant']]['jump_best_morph'].value_counts()
        
        print("\nMorphology of significant detections:")
        print(f"  Dips - noise: {dip_morphs.get('noise', 0):,}, " 
              f"gaussian: {dip_morphs.get('gaussian', 0):,}, "
              f"other: {(dip_morphs.sum() - dip_morphs.get('noise', 0) - dip_morphs.get('gaussian', 0)):,}")
        print(f"  Jumps - noise: {jump_morphs.get('noise', 0):,}, "
              f"paczynski: {jump_morphs.get('paczynski', 0):,}, "
              f"other: {(jump_morphs.sum() - jump_morphs.get('noise', 0) - jump_morphs.get('paczynski', 0)):,}")
    
    print("\n" + "=" * 60)
    print("NEXT STEPS")
    print("=" * 60)
    print("""
1. Generate plots for top candidates:
   python -m malca postprocess --detect-run output/runs/<run_id> --max-plots 50 -v

2. Run characterization to get stellar parameters:
   python -m malca detect --mag-bin <bin> --run-characterize --gaia-cache output/gaia_cache.parquet

3. Cross-match with known variable catalogs:
   - VSX (Variable Star Index)
   - GCVS (General Catalog of Variable Stars)
   - Gaia DR3 variables

4. Visual inspection of light curves for promising candidates
    """)