# 1. Set up parameters for ASL

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from obspy import read_inventory
from importlib import reload
from flovopy.asl.wrappers2 import run_single_event, find_event_files, run_all_events
from flovopy.core.mvo import dome_location, REGION_DEFAULT
from flovopy.processing.sam import VSAM, DSAM 
from flovopy.asl.config import ASLConfig
# -------------------------- Config --------------------------
# directories
HOME = Path.home()
PROJECTDIR      = HOME / "Dropbox" / "BRIEFCASE" / "SSADenver"
LOCALPROJECTDIR = HOME / "work" / "PROJECTS" / "SSADenver_local"
OUTPUT_DIR      = LOCALPROJECTDIR / "asl_results"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
INPUT_DIR       = PROJECTDIR / "ASL_inputs" / "biggest_pdc_events"
GLOBAL_CACHE    = PROJECTDIR / "asl_global_cache"
METADATA_DIR    = PROJECTDIR / "metadata" 
STATION_CORRECTIONS_DIR = PROJECTDIR / "station_correction_analysis"

# master files
INVENTORY_XML   = METADATA_DIR / "MV_Seismic_and_GPS_stations.xml"
DEM_DEFAULT     = METADATA_DIR / "MONTSERRAT_DEM_WGS84_MASTER.tif"
GRIDFILE_DEFAULT= METADATA_DIR / "MASTER_GRID_MONTSERRAT.pkl"

# parameters for envelopes and cross-correlation
SMOOTH_SECONDS  = 1.0
MAX_LAG_SECONDS = 8.0
MIN_XCORR       = 0.5

# other parameters
DIST_MODE = "3d" # or 2d. will essentially squash Montserrat topography and stations onto a sea-level plane, ignored elevation data, e.g. for computing distances

# Inventory of Montserrat stations
from obspy import read_inventory
INV     = read_inventory(INVENTORY_XML)
print(f"[INV] Networks: {len(INV)}  Stations: {sum(len(n) for n in INV)}  Channels: {sum(len(sta) for net in INV for sta in net)}")

# Montserrat station corrections estimated from regionals
station_corrections_csv = STATION_CORRECTIONS_DIR / "station_gains_intervals.csv"
annual_station_corrections_csv = STATION_CORRECTIONS_DIR / "station_gains_intervals_by_year.csv"
station_corrections_df = pd.read_csv(station_corrections_csv)
annual_station_corrections_df = pd.read_csv(annual_station_corrections_csv)

# Montserrat pre-defined Grid (from 02 tutorial)
from flovopy.asl.grid import Grid
gridobj = Grid.load(GRIDFILE_DEFAULT)
print(gridobj)


# Montserrat constants
from flovopy.core.mvo import dome_location, REGION_DEFAULT
print("Dome (assumed source) =", dome_location)

# events and wrappers
from flovopy.asl.wrappers2 import run_single_event, find_event_files, run_all_events
event_files = list(find_event_files(INPUT_DIR))
eventcsvfile = Path(OUTPUT_DIR) / "mseed_files.csv"
if not eventcsvfile.is_file():
    rows = [{"num": num, "f": str(f)} for num, f in enumerate(event_files)]
    df = pd.DataFrame(rows)
    df.to_csv(eventcsvfile, index=False)
best_file_nums  = [35, 36, 40, 52, 82, 83, 84, 116, 310, 338]
best_event_files = [event_files[i] for i in best_file_nums]
print(f'Best miniseed files are: {best_event_files}')
REFINE_SECTOR = False   # enable triangular dome-to-sea refinement

# Parameters to pass for making pygmt topo maps
topo_kw = {
    "inv": INV,
    "add_labels": True,
    "cmap": "gray",
    "region": REGION_DEFAULT,
    "dem_tif": DEM_DEFAULT,  # basemap shading from your GeoTIFF - but does not actually seem to use this unless topo_color=True and cmap=None
    "frame": True,
    "dome_location": dome_location,
}

# Run events from last cell, one event at a time, to check it works

In [None]:
# Build an ASL Configuration. This is inherited by various downstream functions
# This describes the physical parameters, the station metadata, the grid, the misfit algorithm, etc.
cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR, # str?
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind = "body", # "surface" or "body"
    speed = 3.0, # km/s
    Q = 100, # attenuation quality factor
    peakf = 2.0, # Hz
    dist_mode = DIST_MODE, # or "2d"
    misfit_engine = "r2", # l2, r2, lin?
    window_seconds = 5.0, # length of time window for amplitude measurement
    min_stations = 5, # minimum number of stations required to locate event
    sam_class = VSAM, # or DSAM
    sam_metric = "VT", # or one of "mean", "median", "max", "rms", "VLP", or "LP"
    debug=True,
)
cfg.build()
summaries = []

REFINE_SECTOR=False
for i, ev in zip(best_file_nums, best_event_files):
    print(f"[{i}/{len(event_files)}] {ev}")
    result = run_single_event(
        mseed_file=str(ev),
        cfg=cfg,
        refine_sector=REFINE_SECTOR,
        station_gains_df=None,
        switch_event_ctag = True,
        topo_kw=topo_kw,
        mseed_units='m/s', # default units for miniseed files being used - probably "Counts" or "m/s"        
        reduce_time=True,
        debug=True,
    )
    summaries.append(result)

# Summarize
df = pd.DataFrame(summaries)
display(df)

summary_csv = Path(OUTPUT_DIR) / f"{cfg.tag()}__summary.csv"
df.to_csv(summary_csv, index=False)
print(f"Summary saved to: {summary_csv}")

if not df.empty:
    n_ok = int((~df.get("error").notna()).sum()) if "error" in df.columns else len(df)
    print(f"Success: {n_ok}/{len(df)}")


In [None]:
# --- Minimal orchestrator: build variants, resolve CSVs, append comparisons ---

from dataclasses import replace
from pathlib import Path
import pandas as pd
import numpy as np

# Import the real compare helpers, but alias safe_compare to avoid name clash
from flovopy.asl.analyze_run_pairs import (
    safe_compare as append_compare,
    load_all_event_comparisons,
    add_composite_score,
    summarize_variants,
    per_event_winner,
)

# ---------- 2) One-change variants ----------
def cfg_variants_from(baseline: ASLConfig) -> dict[str, ASLConfig]:
    return {
        "Q100":             replace(baseline, Q=100).build(),
        "Q10":              replace(baseline, Q=10).build(),
        "v1.0":             replace(baseline, speed=1.0).build(),
        "v3.0":             replace(baseline, speed=3.0).build(),
        "win1s":            replace(baseline, window_seconds=1.0).build(),
        "win10s":           replace(baseline, window_seconds=10.0).build(),
        "metric_median":    replace(baseline, sam_metric="median").build(),
        "metric_LP":        replace(baseline, sam_metric="LP").build(),
        "metric_VT":        replace(baseline, sam_metric="VT").build(),
        "no_stacorr":       replace(baseline, station_correction_dataframe=None).build(),
        "annual_stacorr":   replace(baseline, station_correction_dataframe=annual_station_corrections_df).build(),
        "l2_engine":        replace(baseline, misfit_engine="l2").build(),
        "lin_engine":       replace(baseline, misfit_engine="lin").build(),
        "body":             replace(baseline, wave_kind="body").build(), # change speed too?
        "f5hz":             replace(baseline, peakf=5.0).build(),
        "f8hz":             replace(baseline, peakf=8.0).build(),
        "2d":               replace(baseline, dist_mode='2d').build(),
        "landgrid":         replace(baseline, gridobj=landgridobj).build(), 
    }

REFINE_SECTOR = False
landgridobj = Grid.load(GLOBAL_CACHE / "land" / "Grid_9c2fd59b.pkl")


# ---------- 1) Build baseline config ----------
baseline_cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR,
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind="surface",
    speed=1.5,
    Q=23,
    peakf=2.0,
    dist_mode="3d", 
    misfit_engine="r2",
    window_seconds=5.0,
    min_stations=5,
    sam_class=VSAM,
    sam_metric="mean",
    debug=False,
).build()





variants = cfg_variants_from(baseline_cfg)

# ---------- 3) Locate each run’s CSV by tag ----------
def _products_dir_for(cfg: ASLConfig, mseed_file: str | Path) -> Path:
    mseed_file = Path(mseed_file)
    event_dir = Path(cfg.output_base) / mseed_file.stem
    return event_dir / Path(cfg.outdir).name

def csv_for_run(cfg: ASLConfig, mseed_file: str | Path) -> Path | None:
    pdir = _products_dir_for(cfg, mseed_file)
    tag = cfg.tag()
    candidates = [
        pdir / f"source_{tag}_refined.csv",
        pdir / f"source_{tag}.csv",
        pdir / f"{tag}_refined.csv",
        pdir / f"{tag}.csv",
    ]
    for c in candidates:
        if c.exists():
            return c
    return None

# (Optional) auto-run a variant if its CSV is missing
RUN_IF_MISSING_BASELINE = True      # auto-run baseline if its CSV is missing
RUN_IF_MISSING_VARIANTS = True     # keep variants manual (or flip to True if you want)

def ensure_csv_for(cfg: ASLConfig, mseed_file: str | Path, *, run_if_missing: bool | None = None) -> Path | None:
    """Return path to run CSV, optionally auto-running if missing."""
    run_flag = RUN_IF_MISSING if run_if_missing is None else run_if_missing  # keeps backward-compat if you still use RUN_IF_MISSING elsewhere
    csv = csv_for_run(cfg, mseed_file)
    if (csv is None or not csv.exists()) and run_flag:
        try:
            _ = run_single_event(
                mseed_file=str(mseed_file),
                cfg=cfg,
                refine_sector=REFINE_SECTOR,
                station_gains_df=None,
                switch_event_ctag=True,
                topo_kw=topo_kw,
                mseed_units="m/s",
                reduce_time=True,
                debug=True,
            )
            csv = csv_for_run(cfg, mseed_file)
        except Exception as e:
            print(f"  [run error] {Path(mseed_file).stem} · {cfg.tag()}: {e}")
            return None
    return csv

# ---------- 4) Compare baseline vs each variant ----------
base_tag = baseline_cfg.tag()

for i, ev in zip(best_file_nums, best_event_files):
    ev_key = Path(ev).stem
    print(f"\n[{i}/{len(event_files)}] {ev_key}")
    event_dir = Path(OUTPUT_DIR) / ev_key

    # Encode the baseline tag so different baselines don't overwrite each other
    summary_csv = event_dir / f"pairwise_{baseline_cfg.tag()}_vs_variants.csv"

    # Baseline: auto-run only if missing
    base_csv = ensure_csv_for(baseline_cfg, ev, run_if_missing=RUN_IF_MISSING_BASELINE)
    if base_csv is None:
        print("  [skip] no baseline CSV; cannot compare.")
        continue

    # Variants: respect their own flag (default False here)
    for key, vcfg in variants.items():
        alt_csv = ensure_csv_for(vcfg, ev, run_if_missing=RUN_IF_MISSING_VARIANTS)
        try:
            # If you already removed label_map, pass a generated label here (e.g., vcfg.tag())
            append_compare(summary_csv, base_csv, alt_csv, label=vcfg.tag())
        except Exception as e:
            print(f"  [compare error] {vcfg.tag()}: {e}")

# ---------- 5) Roll-up ----------
ROOT = OUTPUT_DIR
allcmp = load_all_event_comparisons(ROOT)
print(f"\nstacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique() if not allcmp.empty else 0}")

if not allcmp.empty:
    scored  = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
    summary = summarize_variants(scored)
    winners, win_counts = per_event_winner(scored)
    display(summary.head(10))
    display(win_counts)

In [None]:
# --- Minimal orchestrator: build variants, resolve CSVs, append comparisons ---

from dataclasses import replace
from pathlib import Path
import pandas as pd
import numpy as np

from flovopy.asl.analyze_run_pairs import (
    safe_compare as append_compare,
    load_all_event_comparisons,
    add_composite_score,
    summarize_variants,
    per_event_winner,
)

# ---------- 2) One-change variants ----------
def cfg_variants_from(baseline: ASLConfig) -> dict[str, ASLConfig]:
    return {
        "Q100":             replace(baseline, Q=100).build(),
        "Q10":              replace(baseline, Q=10).build(),
        "v1.0":             replace(baseline, speed=1.0).build(),
        "v3.0":             replace(baseline, speed=3.0).build(),
        "win1s":            replace(baseline, window_seconds=1.0).build(),
        "win10s":           replace(baseline, window_seconds=10.0).build(),
        "metric_median":    replace(baseline, sam_metric="median").build(),
        "metric_LP":        replace(baseline, sam_metric="LP").build(),
        "metric_VT":        replace(baseline, sam_metric="VT").build(),
        "no_stacorr":       replace(baseline, station_correction_dataframe=None).build(),
        "annual_stacorr":   replace(baseline, station_correction_dataframe=annual_station_corrections_df).build(),
        "l2_engine":        replace(baseline, misfit_engine="l2").build(),
        "lin_engine":       replace(baseline, misfit_engine="lin").build(),
        "body":             replace(baseline, wave_kind="body").build(),  # (optionally also change speed)
        "f5hz":             replace(baseline, peakf=5.0).build(),
        "f8hz":             replace(baseline, peakf=8.0).build(),
        "2d":               replace(baseline, dist_mode="2d").build(),
        "landgrid":         replace(baseline, gridobj=landgridobj).build(),
    }

# ---------- 3) Locate each run’s CSV by tag ----------
def _products_dir_for(cfg: ASLConfig, mseed_file: str | Path) -> Path:
    mseed_file = Path(mseed_file)
    event_dir = Path(cfg.output_base) / mseed_file.stem
    return event_dir / Path(cfg.outdir).name

def csv_for_run(cfg: ASLConfig, mseed_file: str | Path) -> Path | None:
    pdir = _products_dir_for(cfg, mseed_file)
    tag = cfg.tag()
    candidates = [
        pdir / f"source_{tag}_refined.csv",
        pdir / f"source_{tag}.csv",
        pdir / f"{tag}_refined.csv",
        pdir / f"{tag}.csv",
    ]
    for c in candidates:
        if c.exists():
            return c
    return None

# ---------- 3.5) Auto-run switches ----------
def ensure_csv_for(cfg: ASLConfig, mseed_file: str | Path, *, is_baseline: bool | None = None) -> Path | None:
    """
    Return path to run CSV, optionally auto-running if missing.
    If is_baseline is None, infer by object identity against baseline_cfg.
    """
    if is_baseline is None:
        is_baseline = (cfg is baseline_cfg)
    run_flag = RUN_IF_MISSING_BASELINE if is_baseline else RUN_IF_MISSING_VARIANTS

    csv = csv_for_run(cfg, mseed_file)
    if (csv is None or not csv.exists()) and run_flag:
        try:
            _ = run_single_event(
                mseed_file=str(mseed_file),
                cfg=cfg,
                refine_sector=REFINE_SECTOR,
                station_gains_df=None,
                switch_event_ctag=True,
                topo_kw=topo_kw,
                mseed_units="m/s",
                reduce_time=True,
                debug=True,
            )
            csv = csv_for_run(cfg, mseed_file)
        except Exception as e:
            print(f"  [run error] {Path(mseed_file).stem} · {cfg.tag()}: {e}")
            return None
    return csv




# RUN

landgridobj = Grid.load(GLOBAL_CACHE / "land" / "Grid_9c2fd59b.pkl")
REFINE_SECTOR = False

# ---------- 1) Build baseline config ----------
baseline_cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR,
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind="surface",
    speed=1.5,
    Q=23,
    peakf=2.0,
    dist_mode="3d",
    misfit_engine="r2",
    window_seconds=5.0,
    min_stations=5,
    sam_class=VSAM,
    sam_metric="mean",
    debug=False,
).build()

# ---------- 2) One-change variants ----------
variants = cfg_variants_from(baseline_cfg)

# ---------- 3) Locate each run’s CSV by tag ----------

# ---------- 3.5) Auto-run switches ----------
RUN_IF_MISSING_BASELINE = True
RUN_IF_MISSING_VARIANTS = True


# ---------- 4) Compare baseline vs each variant ----------
base_tag = baseline_cfg.tag()

for i, ev in zip(best_file_nums, best_event_files):
    ev_key = Path(ev).stem
    print(f"\n[{i}/{len(event_files)}] {ev_key}")
    event_dir = Path(OUTPUT_DIR) / ev_key

    # Encode baseline in the filename to avoid clobbering across baselines
    summary_csv = event_dir / f"pairwise_{base_tag}_vs_variants.csv"

    # Baseline: auto-run only if missing
    base_csv = ensure_csv_for(baseline_cfg, ev, is_baseline=True)
    if base_csv is None:
        print("  [skip] no baseline CSV; cannot compare.")
        continue

    # Variants
    for _, vcfg in variants.items():
        alt_csv = ensure_csv_for(vcfg, ev, is_baseline=False)
        try:
            append_compare(summary_csv, base_csv, alt_csv, label=vcfg.tag())
        except Exception as e:
            print(f"  [compare error] {vcfg.tag()}: {e}")

# ---------- 5) Roll-up ----------
ROOT = OUTPUT_DIR
allcmp = load_all_event_comparisons(ROOT)
print(f"\nstacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique() if not allcmp.empty else 0}")

if not allcmp.empty:
    scored  = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
    summary = summarize_variants(scored)
    winners, win_counts = per_event_winner(scored)
    #print(summary)
    display(summary)
    display(win_counts)

# Run this

In [None]:

# ✅ use the new module:
from flovopy.asl.compare_runs import (
    cfg_variants_from,
    compare_runs,            # orchestrator
    load_all_event_comparisons,  # optional if you want to re-load later
    add_composite_score,         # optional
    summarize_variants,          # optional
    per_event_winner,            # optional
)

# --- Build baseline & variants ---
landgridobj = Grid.load(GLOBAL_CACHE / "land" / "Grid_9c2fd59b.pkl")
baseline_cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR,
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind="surface",
    speed=1.5,
    Q=23,
    peakf=2.0,
    dist_mode="3d",
    misfit_engine="r2",
    window_seconds=5.0,
    min_stations=5,
    sam_class=VSAM,
    sam_metric="mean",
    debug=False,
).build()

variants = cfg_variants_from(
    baseline_cfg,
    landgridobj=landgridobj,
    annual_station_corrections_df=annual_station_corrections_df,
)

# --- Run comparisons (auto-run baseline & variants if missing) ---
scored, summary, win_counts = compare_runs(
    baseline_cfg,
    events=best_event_files,            # iterable of mseed paths
    variants=variants,
    run_single_event=run_single_event,  # your existing function
    refine_sector=False,
    topo_kw=topo_kw,
    run_if_missing_baseline=True,
    run_if_missing_variants=True,       # flip to False if you want variants manual
    w_sep=1.0, w_misfit=0.5, w_azgap=0.1,
)

# --- Show results ---
if scored is not None:
    display(summary)
    display(win_counts)
summary.to_csv(LOCALPROJECTDIR / "pairwise_summary_surface.csv", index=False)

In [None]:
from flovopy.asl.compare_runs import compare_runs, build_intrinsic_table, add_baseline_free_scores, summarize_absolute_runs, per_event_winner_abs

# 1) (optional) still run pairwise comparisons
'''
scored_pairwise, summary_pairwise, win_counts_pairwise = compare_runs(
    baseline_cfg,
    events=best_event_files,
    variants=variants,
    run_single_event=run_single_event,
    refine_sector=False,
    topo_kw=topo_kw,
    run_if_missing_baseline=True,
    run_if_missing_variants=True,
)
'''

# 2) Build absolute (baseline-free) table and scores
abs_tbl = build_intrinsic_table(
    baseline_cfg,
    events=best_event_files,
    variants=variants,
    run_single_event=run_single_event,
    refine_sector=False,
    topo_kw=topo_kw,
    run_if_missing_baseline=False,   # set True if you want to autorun here too
    run_if_missing_variants=False,
)

abs_scored = add_baseline_free_scores(abs_tbl)  # you can pass custom weights=...
abs_summary = summarize_absolute_runs(abs_scored)
winners_abs, win_counts_abs = per_event_winner_abs(abs_scored)

abs_summary.to_csv(index=False)
win_counts_abs.to_csv(index=False)

In [None]:
from flovopy.asl.compare_runs import crawl_intrinsic_runs
abs_tbl = crawl_intrinsic_runs(OUTPUT_DIR)
abs_scored  = add_baseline_free_scores(abs_tbl)
abs_summary = summarize_absolute_runs(abs_scored)
winners_abs, win_counts_abs = per_event_winner_abs(abs_scored)

In [None]:

i from flovopy.asl.analyze_run_pairs import (...)

In [None]:
# --- Run ASL per event (cell 6) ---
'''
from typing import List, Dict, Any
summaries: List[Dict[str, Any]] = []

for i, ev in zip(best_file_nums, best_event_files):
    print(f"[{i}/{len(event_files)}] {ev}")
    result = run_single_event(
        mseed_file=str(ev),
        cfg=cfg,
        refine_sector=REFINE_SECTOR,
        station_gains_df=None,
        topo_kw=topo_kw,
        debug=True,
    )
    summaries.append(result)
    break

# Summarize
df = pd.DataFrame(summaries)
display(df)

summary_csv = Path(OUTPUT_DIR) / f"{cfg.tag()}__summary.csv"
df.to_csv(summary_csv, index=False)
print(f"Summary saved to: {summary_csv}")

if not df.empty:
    n_ok = int((~df.get("error").notna()).sum()) if "error" in df.columns else len(df)
    print(f"Success: {n_ok}/{len(df)}")
'''

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

def load_all_event_comparisons(root: Path) -> pd.DataFrame:
    """
    Crawl event folders under `root` and stack `pairwise_run_comparisons.csv`.
    Returns a tidy DF with event_id inferred from folder name.
    """
    rows = []
    for csv in root.rglob("pairwise_run_comparisons.csv"):
        try:
            df = pd.read_csv(csv)
            df["event_id"] = csv.parent.name            # the event folder name
            rows.append(df)
        except Exception as e:
            print(f"[skip] {csv}: {e}")
    if not rows:
        return pd.DataFrame()
    out = pd.concat(rows, ignore_index=True)
    # normalize label text to a short key
    out["variant"] = out["label"].astype(str)
    # guard presence of expected columns
    for c in ["mean_sep_km","delta_misfit_B_minus_A","delta_azgap_B_minus_A"]:
        if c not in out.columns: out[c] = np.nan
    return out

def add_composite_score(df: pd.DataFrame,
                        w_sep=1.0, w_misfit=0.5, w_azgap=0.1) -> pd.DataFrame:
    """
    Lower is better. Negative deltas are good if they reduce misfit/azgap.
    """
    d = df.copy()
    # z-score each metric for comparability (event-wise optional)
    # here: global z-scores; switch to per-event z if events differ strongly in scale
    for col in ["mean_sep_km","delta_misfit_B_minus_A","delta_azgap_B_minus_A"]:
        x = d[col].to_numpy(dtype=float)
        mu, sd = np.nanmean(x), np.nanstd(x) if np.nanstd(x)>0 else 1.0
        d[col+"_z"] = (x - mu)/sd
    d["score"] = (
        w_sep    * d["mean_sep_km_z"] +
        w_misfit * d["delta_misfit_B_minus_A_z"] +
        w_azgap  * d["delta_azgap_B_minus_A_z"]
    )
    return d

def summarize_variants(df: pd.DataFrame) -> pd.DataFrame:
    """
    One line per variant: mean±SE of core metrics and composite score,
    plus 'wins' (how often variant beats baseline the most for an event).
    """
    g = df.groupby("variant", dropna=False)
    agg = g.agg(
        n_events          = ("event_id", "nunique"),
        n_rows            = ("event_id", "size"),
        mean_sep_km_mean  = ("mean_sep_km", "mean"),
        mean_sep_km_med   = ("mean_sep_km", "median"),
        mean_sep_km_se    = ("mean_sep_km", lambda x: np.nanstd(x)/np.sqrt(max(1,(x.notna().sum())))),
        dmisfit_mean      = ("delta_misfit_B_minus_A", "mean"),
        dmisfit_med       = ("delta_misfit_B_minus_A", "median"),
        dazgap_mean       = ("delta_azgap_B_minus_A", "mean"),
        score_mean        = ("score", "mean"),
        score_med         = ("score", "median"),
    ).reset_index().sort_values("score_mean")
    return agg

def per_event_winner(df_scored: pd.DataFrame) -> pd.DataFrame:
    """
    For each event, pick the variant with the lowest composite score.
    """
    # keep only the best per (event_id)
    idx = df_scored.groupby("event_id")["score"].idxmin()
    winners = df_scored.loc[idx, ["event_id","variant","score"]]
    win_counts = winners.groupby("variant").size().rename("wins").reset_index()
    return winners, win_counts.sort_values("wins", ascending=False)

# --- run it ---
ROOT = OUTPUT_DIR  # your existing OUTDIR base
allcmp = load_all_event_comparisons(ROOT)
print(f"stacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique()}")

scored = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
summary = summarize_variants(scored)
winners, win_counts = per_event_winner(scored)

# quick looks
display(summary.head(10))
display(win_counts)

# Run all events efficiently

In [None]:
print(INPUT_DIR)
print(cfg)
print(topo_kw)
print(REFINE_SECTOR)
'''
run_all_events(
    input_dir=INPUT_DIR,
    station_gains_df = None,
    cfg=cfg,
    refine_sector=REFINE_SECTOR,
    topo_kw=topo_kw,
    debug=True,
    max_events=999999,
    use_multiprocessing=True,
    workers=4,
)
'''

# Run Monte Carlo sweep of parameters for 1 event


In [None]:

from flovopy.asl.wrappers2 import run_event_monte_carlo
from flovopy.processing.sam import VSAM, DSAM
'''
# Simple 6-draw sweep (replace with your own priors/sequences)
configs = ASLConfig.generate_config_list(
    inventory=None,
    output_base=None,
    gridobj=None,
    global_cache=None,      
    wave_kinds=("surface","body"),
    station_corr_tables=(station_corrections_df), #annual_station_corrections_df),
    speeds=(1.0, 3.0),
    Qs=(23, 1000),
    dist_modes=("3d",), # 2d needs a different grid and different distance and amplitude corrections
    misfit_engines=("l2","r2", "lin"),
    peakfs=(2.0, 8.0),
    window_seconds = 5.0, # change to be a tuple 10.0) not implemented yet
    min_stations = 5,
    sam_class = (VSAM), #, DSAM), # not implemented yet
    sam_metric = ("mean"),# "median", "rms", "VT", "LP"), # this doesn't seem to be implemented yet
    # context can be set later; set here if you like:
    debug=False,
)

configs = ASLConfig.generate_config_list(
    inventory=None,
    output_base=None,
    gridobj=None,
    global_cache=None,      
    wave_kinds=("surface",),
    station_corr_tables=(station_corrections_df), #annual_station_corrections_df),
    speeds=(1.0, 3.0),
    Qs=(23, 1000),
    dist_modes=("3d",), # 2d needs a different grid and different distance and amplitude corrections
    misfit_engines=("l2"),
    peakfs=(8.0),
    window_seconds = 5.0, # change to be a tuple 10.0) not implemented yet
    min_stations = 5,
    sam_class = (VSAM), #, DSAM), # not implemented yet
    sam_metric = ("mean"),# "median", "rms", "VT", "LP"), # this doesn't seem to be implemented yet
    # context can be set later; set here if you like:
    debug=False,
)


configs = ASLConfig.generate_config_list(    
    inventory=INV,
    output_base=str(OUTPUT_DIR),
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
) 

print(len(configs))
'''


In [None]:

# Shared run context
mseed_file   = event_files[116]
'''
results = run_event_monte_carlo(
    mseed_file=mseed_file,
    configs=configs,
    inventory=INV,
    output_base=str(OUTPUT_DIR),
    gridobj=gridobj,
    topo_kw=topo_kw,
    station_gains_df=None,
    parallel=False,
    max_workers=1,
    global_cache=GLOBAL_CACHE,
    debug=True,
)

# Inspect or summarize results as needed
n_ok = sum(1 for r in results if "error" not in r)
print(f"[MC] Completed {n_ok}/{len(results)} runs OK")
'''