# 1. Set up parameters for ASL

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
from obspy import read_inventory
from importlib import reload
from flovopy.asl.wrappers2 import run_single_event, find_event_files, run_all_events
from flovopy.core.mvo import dome_location, REGION_DEFAULT
from flovopy.processing.sam import VSAM, DSAM 
from flovopy.asl.config import ASLConfig
# -------------------------- Config --------------------------
# directories
HOME = Path.home()
PROJECTDIR      = HOME / "Dropbox" / "BRIEFCASE" / "SSADenver"
LOCALPROJECTDIR = HOME / "work" / "PROJECTS" / "SSADenver_local"
OUTPUT_DIR      = LOCALPROJECTDIR / "asl_results"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
INPUT_DIR       = PROJECTDIR / "ASL_inputs" / "biggest_pdc_events"
GLOBAL_CACHE    = PROJECTDIR / "asl_global_cache"
METADATA_DIR    = PROJECTDIR / "metadata" 
STATION_CORRECTIONS_DIR = PROJECTDIR / "station_correction_analysis"

# master files
INVENTORY_XML   = METADATA_DIR / "MV_Seismic_and_GPS_stations.xml"
DEM_DEFAULT     = METADATA_DIR / "MONTSERRAT_DEM_WGS84_MASTER.tif"
GRIDFILE_DEFAULT= METADATA_DIR / "MASTER_GRID_MONTSERRAT.pkl"

# parameters for envelopes and cross-correlation
SMOOTH_SECONDS  = 1.0
MAX_LAG_SECONDS = 8.0
MIN_XCORR       = 0.5

# other parameters
DIST_MODE = "3d" # or 2d. will essentially squash Montserrat topography and stations onto a sea-level plane, ignored elevation data, e.g. for computing distances

# Inventory of Montserrat stations
from obspy import read_inventory
INV     = read_inventory(INVENTORY_XML)
print(f"[INV] Networks: {len(INV)}  Stations: {sum(len(n) for n in INV)}  Channels: {sum(len(sta) for net in INV for sta in net)}")

# Montserrat station corrections estimated from regionals
station_corrections_csv = STATION_CORRECTIONS_DIR / "station_gains_intervals.csv"
annual_station_corrections_csv = STATION_CORRECTIONS_DIR / "station_gains_intervals_by_year.csv"
station_corrections_df = pd.read_csv(station_corrections_csv)
annual_station_corrections_df = pd.read_csv(annual_station_corrections_csv)

# Montserrat pre-defined Grid (from 02 tutorial)
from flovopy.asl.grid import Grid
gridobj = Grid.load(GRIDFILE_DEFAULT)
print(gridobj)


# Montserrat constants
from flovopy.core.mvo import dome_location, REGION_DEFAULT
print("Dome (assumed source) =", dome_location)

# events and wrappers
from flovopy.asl.wrappers2 import run_single_event, find_event_files, run_all_events
event_files = list(find_event_files(INPUT_DIR))
eventcsvfile = Path(OUTPUT_DIR) / "mseed_files.csv"
if not eventcsvfile.is_file():
    rows = [{"num": num, "f": str(f)} for num, f in enumerate(event_files)]
    df = pd.DataFrame(rows)
    df.to_csv(eventcsvfile, index=False)
best_file_nums  = [35, 36, 40, 52, 82, 83, 84, 116, 310, 338]
best_event_files = [event_files[i] for i in best_file_nums]
print(f'Best miniseed files are: {best_event_files}')
REFINE_SECTOR = False   # enable triangular dome-to-sea refinement

# Parameters to pass for making pygmt topo maps
topo_kw = {
    "inv": INV,
    "add_labels": True,
    "cmap": "gray",
    "region": REGION_DEFAULT,
    "dem_tif": DEM_DEFAULT,  # basemap shading from your GeoTIFF - but does not actually seem to use this unless topo_color=True and cmap=None
    "frame": True,
    "dome_location": dome_location,
}

[INV] Networks: 1  Stations: 48  Channels: 77
[INFO] Grid loaded from /Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/metadata/MASTER_GRID_MONTSERRAT.pkl
Dome (assumed source) = {'lat': 16.7106, 'lon': -62.17747, 'elev': 1000.0}
Best miniseed files are: ['/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2000-08-05-1840-17S.MVO___019.cleaned', '/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2000-08-07-0441-43S.MVO___019.cleaned', '/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2000-09-14-1900-58S.MVO___019.cleaned', '/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2000-11-26-2123-08S.MVO___019.cleaned', '/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2006-04-12-0025-40S.MVO___031.cleaned', '/Users/GlennThompson/Dropbox/BRIEFCASE/SSADenver/ASL_inputs/biggest_pdc_events/2006-04-13-1534-00S.MVO___031.cleaned', '/Users/GlennThompson/Dr

# Run events from last cell, one event at a time, to check it works

In [None]:
# Build an ASL Configuration. This is inherited by various downstream functions
# This describes the physical parameters, the station metadata, the grid, the misfit algorithm, etc.
cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR, # str?
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind = "body", # "surface" or "body"
    speed = 3.0, # km/s
    Q = 100, # attenuation quality factor
    peakf = 2.0, # Hz
    dist_mode = DIST_MODE, # or "2d"
    misfit_engine = "r2", # l2, r2, lin?
    window_seconds = 5.0, # length of time window for amplitude measurement
    min_stations = 5, # minimum number of stations required to locate event
    sam_class = VSAM, # or DSAM
    sam_metric = "VT", # or one of "mean", "median", "max", "rms", "VLP", or "LP"
    debug=True,
)
cfg.build()
summaries = []

REFINE_SECTOR=False
for i, ev in zip(best_file_nums, best_event_files):
    print(f"[{i}/{len(event_files)}] {ev}")
    result = run_single_event(
        mseed_file=str(ev),
        cfg=cfg,
        refine_sector=REFINE_SECTOR,
        station_gains_df=None,
        switch_event_ctag = True,
        topo_kw=topo_kw,
        mseed_units='m/s', # default units for miniseed files being used - probably "Counts" or "m/s"        
        reduce_time=True,
        debug=True,
    )
    summaries.append(result)

# Summarize
df = pd.DataFrame(summaries)
display(df)

summary_csv = Path(OUTPUT_DIR) / f"{cfg.tag()}__summary.csv"
df.to_csv(summary_csv, index=False)
print(f"Summary saved to: {summary_csv}")

if not df.empty:
    n_ok = int((~df.get("error").notna()).sum()) if "error" in df.columns else len(df)
    print(f"Success: {n_ok}/{len(df)}")


In [None]:
from dataclasses import replace
from pathlib import Path
import pandas as pd

# --- build the baseline cfg first (as you already do) ---
# cfg = ASLConfig(...).build()

def cfg_variants_from(baseline_cfg: ASLConfig):
    """
    Create ONE-change variants from a built baseline ASLConfig.
    Returns dict[label] -> built ASLConfig.
    """
    variants = {}

    # 1) Q 100 -> 23
    v_Q23 = replace(baseline_cfg, Q=23).build()
    variants["Q23"] = v_Q23

    # 2) speed 3.0 -> 1.5 km/s
    v_v15 = replace(baseline_cfg, speed=1.5).build()
    variants["v1.5"] = v_v15

    # 3) window 5 -> 2 s
    v_win2 = replace(baseline_cfg, window_seconds=2.0).build()
    variants["win2s"] = v_win2

    # 4) metric VT -> mean
    v_mean = replace(baseline_cfg, sam_metric="mean").build()
    variants["metric_mean"] = v_mean

    # 5) station corr ON -> OFF
    v_nosc = replace(baseline_cfg, station_correction_dataframe=None).build()
    variants["no_station_corr"] = v_nosc

    return variants


# --- make variants ---
variants = cfg_variants_from(baseline_cfg=cfg)   # dict[label] -> built ASLConfig

def csv_for_run(mseed_file: str | Path, cfg: ASLConfig) -> Path:
    """Locate the source CSV written by wrappers2.run_single_event for a given cfg."""
    mseed_file = Path(mseed_file)
    event_dir = Path(cfg.output_base) / mseed_file.stem
    products_dir = event_dir / Path(cfg.outdir).name
    refined = products_dir / f"{cfg.tag()}_refined.csv"
    plain   = products_dir / f"{cfg.tag()}.csv"
    return refined if refined.exists() else plain

# Build baseline config
cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR,
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind="body",
    speed=3.0,
    Q=100,
    peakf=2.0,
    dist_mode=DIST_MODE,
    misfit_engine="r2",
    window_seconds=5.0,
    min_stations=5,
    sam_class=VSAM,
    sam_metric="VT",
    debug=True,
).build()

# Generate variants (you’ll need cfg_variants_from() from wrappers2.py or config.py)
variants = cfg_variants_from(baseline_cfg=cfg)

REFINE_SECTOR = False
summaries = []

for i, ev in zip(best_file_nums, best_event_files):
    ev_key = Path(ev).stem
    print(f"\n[{i}/{len(event_files)}] Processing event: {ev_key}")

    for k, variant in variants.items():
        try:
            result = run_single_event(
                mseed_file=str(ev),
                cfg=variant,
                refine_sector=REFINE_SECTOR,
                station_gains_df=None,
                switch_event_ctag=True,
                topo_kw=topo_kw,
                mseed_units="m/s",
                reduce_time=True,
                debug=True,
            )
            summaries.append(result)
            print(f"  ✓ Variant {k} finished successfully")
        except Exception as e:
            print(f"  ✗ Variant {k} failed: {e}")


from flovopy.asl.analyze_run_pairs import safe_compare, load_all_event_comparisons, add_composite_score, summarize_variants, per_event_winner

for i, ev in zip(best_file_nums, best_event_files):
    print(f"[{i}/{len(event_files)}] {ev}")
    mseed_file=str(ev)

    # Build paths for baseline + variants for ONE event:
    event_dir    = OUTPUT_DIR / Path(mseed_file).stem
    products_dir = event_dir / Path(cfg.outdir).name

    baseline = products_dir / "source_VSAM_VT_5s_body_v3_Q100_F2_3d_r2_SC.csv"
    alt_Q23  = event_dir / "VSAM_VT_5s_body_v3_Q23_F2_3d_r2_SC" / "source_VSAM_VT_5s_body_v3_Q23_F2_3d_r2_SC.csv"
    alt_v15  = event_dir / "VSAM_VT_5s_body_v1.5_Q100_F2_3d_r2_SC" / "source_VSAM_VT_5s_body_v1.5_Q100_F2_3d_r2_SC.csv"
    alt_win2 = event_dir / "VSAM_VT_2s_body_v3_Q100_F2_3d_r2_SC" / "source_VSAM_VT_2s_body_v3_Q100_F2_3d_r2_SC.csv"
    alt_mean = event_dir / "VSAM_mean_5s_body_v3_Q100_F2_3d_r2_SC" / "source_VSAM_mean_5s_body_v3_Q100_F2_3d_r2_SC.csv"
    alt_nosc = event_dir / "VSAM_VT_5s_body_v3_Q100_F2_3d_r2"  / "source_VSAM_VT_5s_body_v3_Q100_F2_3d_r2.csv"# station corr OFF

    summary_csv = event_dir / "pairwise_run_comparisons.csv"

    # Append comparisons (CSV vs CSV; aligns by time if present, else by index)
    safe_compare(summary_csv, baseline, alt_Q23,  label="Q 100→23")
    safe_compare(summary_csv, baseline, alt_v15,  label="speed 3.0→1.5 km/s")
    #safe_compare(summary_csv, baseline, alt_win2, label="window 5→2 s")
    safe_compare(summary_csv, baseline, alt_mean, label="metric VT→mean")
    safe_compare(summary_csv, baseline, alt_nosc, label="station corr ON→OFF")


allcmp = load_all_event_comparisons(ROOT)
print(f"stacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique()}")

scored = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
summary = summarize_variants(scored)
winners, win_counts = per_event_winner(scored)

# quick looks
display(summary.head(10))
display(win_counts)

In [None]:
# --- Minimal orchestrator: build variants, resolve CSVs, append comparisons ---

from dataclasses import replace
from pathlib import Path
import pandas as pd
import numpy as np

# Import the real compare helpers, but alias safe_compare to avoid name clash
from flovopy.asl.analyze_run_pairs import (
    safe_compare as append_compare,
    load_all_event_comparisons,
    add_composite_score,
    summarize_variants,
    per_event_winner,
)

# ---------- 1) Build baseline config ----------
baseline_cfg = ASLConfig(
    inventory=INV,
    output_base=OUTPUT_DIR,
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
    station_correction_dataframe=station_corrections_df,
    wave_kind="body",
    speed=3.0,
    Q=100,
    peakf=2.0,
    dist_mode=DIST_MODE,        # e.g., "3d"
    misfit_engine="r2",
    window_seconds=5.0,
    min_stations=5,
    sam_class=VSAM,
    sam_metric="LP", # changed from VT
    debug=True,
).build()

# ---------- 2) One-change variants ----------
def cfg_variants_from(baseline: ASLConfig) -> dict[str, ASLConfig]:
    return {
        "Q23":              replace(baseline, Q=23).build(),
        "v1.5":             replace(baseline, speed=1.5).build(),
        "win2s":            replace(baseline, window_seconds=2.0).build(),
        "metric_mean":      replace(baseline, sam_metric="mean").build(),
        "no_station_corr":  replace(baseline, station_correction_dataframe=None).build(),
        "VT":               replace(baseline, sam_metric="VT").build(),
        "l2":               replace(baseline, misfit_engine="l2").build(),
        "lin":              replace(baseline, misfit_engine="lin").build(),
        "surface":          replace(baseline, wave_kind="surface").build(), # change speed too?
        "peakf5hz":         replace(baseline, peakf=5.0).build(),
        "2d":               replace(baseline, dist_mode='2d').build(),
        "rect_grid":        replace(baseline, gridobj=rect_gridobj).build(), # just an unmasked version of gridobj?
        "annual_SC":        replace(baseline, station_correction_dataframe=annual_station_corrections_df).build(),
    }

variants = cfg_variants_from(baseline_cfg)

# ---------- 3) Locate each run’s CSV by tag ----------
def _products_dir_for(cfg: ASLConfig, mseed_file: str | Path) -> Path:
    mseed_file = Path(mseed_file)
    event_dir = Path(cfg.output_base) / mseed_file.stem
    return event_dir / Path(cfg.outdir).name

def csv_for_run(cfg: ASLConfig, mseed_file: str | Path) -> Path | None:
    pdir = _products_dir_for(cfg, mseed_file)
    tag = cfg.tag()
    candidates = [
        pdir / f"source_{tag}_refined.csv",
        pdir / f"source_{tag}.csv",
        pdir / f"{tag}_refined.csv",
        pdir / f"{tag}.csv",
    ]
    for c in candidates:
        if c.exists():
            return c
    return None

# (Optional) auto-run a variant if its CSV is missing
RUN_IF_MISSING = False
REFINE_SECTOR  = False

def ensure_csv_for(cfg: ASLConfig, mseed_file: str | Path) -> Path | None:
    csv = csv_for_run(cfg, mseed_file)
    if (csv is None or not csv.exists()) and RUN_IF_MISSING:
        try:
            _ = run_single_event(
                mseed_file=str(mseed_file),
                cfg=cfg,
                refine_sector=REFINE_SECTOR,
                station_gains_df=None,
                switch_event_ctag=True,
                topo_kw=topo_kw,
                mseed_units="m/s",
                reduce_time=True,
                debug=True,
            )
            csv = csv_for_run(cfg, mseed_file)
        except Exception as e:
            print(f"  [run error] {Path(mseed_file).stem} · {cfg.tag()}: {e}")
            return None
    return csv

# ---------- 4) Compare baseline vs each variant ----------
label_map = {
    "Q23": "Q 100→23",
    "v1.5": "speed 3.0→1.5 km/s",
    "win2s": "window 5→2 s",
    "metric_mean": "metric VT→mean",
    "no_station_corr": "station corr ON→OFF",
}

for i, ev in zip(best_file_nums, best_event_files):
    ev_key = Path(ev).stem
    print(f"\n[{i}/{len(event_files)}] {ev_key}")
    event_dir = Path(OUTPUT_DIR) / ev_key
    summary_csv = event_dir / "pairwise_run_comparisons.csv"

    base_csv = ensure_csv_for(baseline_cfg, ev)
    if base_csv is None:
        print("  [skip] no baseline CSV; cannot compare.")
        continue

    for key, vcfg in variants.items():
        alt_csv = ensure_csv_for(vcfg, ev)
        try:
            append_compare(summary_csv, base_csv, alt_csv, label=label_map[key])
        except Exception as e:
            print(f"  [compare error] {label_map[key]}: {e}")

# ---------- 5) Roll-up ----------
ROOT = OUTPUT_DIR
allcmp = load_all_event_comparisons(ROOT)
print(f"\nstacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique() if not allcmp.empty else 0}")

if not allcmp.empty:
    scored  = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
    summary = summarize_variants(scored)
    winners, win_counts = per_event_winner(scored)
    display(summary.head(10))
    display(win_counts)

[ASLConfig.build] Inventory loaded: Inventory created at 2025-10-03T18:50:18.304745Z
	Created by: ObsPy 1.4.2
		    https://www.obspy.org
	Sending institution: Merged and patched by merge_inventories()
	Contains:
		Networks (1):
			MV
		Stations (48):
			MV.CASTLE PEAK (AUTO_NAME_CASTLE PEAK)
			MV.CENTRE HILLS (AUTO_NAME_CENTRE HILLS)
			MV.CHANCES EDM (AUTO_NAME_CHANCES EDM)
			MV.FARRELLS (AUTO_NAME_FARRELLS)
			MV.FERGUS RIDGE (AUTO_NAME_FERGUS RIDGE)
			MV.FT3 (AUTO_NAME_FT3)
			MV.GAGES MTN (AUTO_NAME_GAGES MTN)
			MV.GAL MTN (AUTO_NAME_GAL MTN)
			MV.GALS (AUTO_NAME_GALS)
			MV.GALWAYS (SSH) (AUTO_NAME_GALWAYS (SSH))
			MV.HARR (AUTO_NAME_HARR)
			MV.HERM (AUTO_NAME_HERM)
			MV.JACKBOY REPEATER (AUTO_NAME_JACKBOY REPEATER)
			MV.LEES YARD (AUTO_NAME_LEES YARD)
			MV.LL_DOAS (AUTO_NAME_LL_DOAS)
			MV.LOOKOUT YARD (AUTO_NAME_LOOKOUT YARD)
			MV.M17 (AUTO_NAME_M17)
			MV.M27 (AUTO_NAME_M27)
			MV.MBBE (AUTO_NAME_MBBE)
			MV.MBBY (AUTO_NAME_MBBY)
			MV.MBFL (AUTO_NAME_MBFL)
			MV.MB

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-08-07-0441-43S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-08-07-0441-43S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=296)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-08-07-0441-43S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-08-07-0441-43S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-08-07-0441-43S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)

[40/368] 2000-09-14-1900-58S.MVO___019
both CSV files exist
[compare] appen

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-09-14-1900-58S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-09-14-1900-58S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-09-14-1900-58S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=299)

[52/368] 2000-11-26-2123-08S.MVO___019
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-11-26-2123-08S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=207)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-11-26-2123-08S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=204)
both CSV files exist
[compare] appen

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2000-11-26-2123-08S.MVO___019/pairwise_run_comparisons.csv (time alignment, n=207)

[82/368] 2006-04-12-0025-40S.MVO___031
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-12-0025-40S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=104)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-12-0025-40S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=101)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-12-0025-40S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=104)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-12-0025-40S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=104)
both CSV files exist
[compare] appen

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-13-1534-00S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=121)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-13-1534-00S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=121)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-13-1534-00S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=121)

[84/368] 2006-04-14-1024-25S.MVO___031
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-14-1024-25S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=92)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-04-14-1024-25S.MVO___031/pairwise_run_comparisons.csv (time alignment, n=89)
both CSV files exist
[compare] appende

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime6

[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-12-02-0448-14S.MVO___025/pairwise_run_comparisons.csv (time alignment, n=108)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-12-02-0448-14S.MVO___025/pairwise_run_comparisons.csv (time alignment, n=105)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-12-02-0448-14S.MVO___025/pairwise_run_comparisons.csv (time alignment, n=108)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-12-02-0448-14S.MVO___025/pairwise_run_comparisons.csv (time alignment, n=108)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/2006-12-02-0448-14S.MVO___025/pairwise_run_comparisons.csv (time alignment, n=108)

[310/368] 9901-13-1030-16S.MVO_14_1
both CSV files exist
[compare] appended

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9901-13-1030-16S.MVO_14_1/pairwise_run_comparisons.csv (time alignment, n=117)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9901-13-1030-16S.MVO_14_1/pairwise_run_comparisons.csv (time alignment, n=119)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9901-13-1030-16S.MVO_14_1/pairwise_run_comparisons.csv (time alignment, n=119)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9901-13-1030-16S.MVO_14_1/pairwise_run_comparisons.csv (time alignment, n=119)

[338/368] 9907-11-1916-38S.MVO_19_1
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9907-11-1916-38S.MVO_19_1/pairwise_run_comparisons.csv (time alignment, n=119)
both CSV files exist
[compare] appended to /Users/GlennThom

  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9907-11-1916-38S.MVO_19_1/pairwise_run_comparisons.csv (time alignment, n=119)
both CSV files exist
[compare] appended to /Users/GlennThompson/work/PROJECTS/SSADenver_local/asl_results/9907-11-1916-38S.MVO_19_1/pairwise_run_comparisons.csv (time alignment, n=119)

stacked rows: 94, events: 10


  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")
  t = t_parsed.dt.round("S").to_numpy("datetime64[ns]")


Unnamed: 0,variant,n_events,n_rows,mean_sep_km_mean,mean_sep_km_med,mean_sep_km_se,dmisfit_mean,dmisfit_med,dazgap_mean,score_mean,score_med
0,Q 100→23,10,21,0.163961,0.204426,0.020492,-0.022537,-0.020743,-3.487486,-1.064359,-0.93787
2,speed 3.0→1.5 km/s,10,21,0.171432,0.195011,0.015206,-0.007635,-0.006683,-0.916829,-0.820696,-0.708126
4,window 5→2 s,10,10,0.245833,0.263761,0.017922,0.007266,0.007997,0.161731,-0.291633,-0.210323
1,metric VT→mean,10,21,0.580514,0.496717,0.043728,-0.017605,-0.006721,3.293124,0.948847,0.759205
3,station corr ON→OFF,10,21,0.416463,0.531318,0.042873,0.062641,0.042047,-6.921781,1.07508,1.249425


Unnamed: 0,variant,wins
0,Q 100→23,8
1,speed 3.0→1.5 km/s,2


In [3]:
print(summary)

               variant  n_events  n_rows  mean_sep_km_mean  mean_sep_km_med  \
0             Q 100→23        10      21          0.163961         0.204426   
2   speed 3.0→1.5 km/s        10      21          0.171432         0.195011   
4         window 5→2 s        10      10          0.245833         0.263761   
1       metric VT→mean        10      21          0.580514         0.496717   
3  station corr ON→OFF        10      21          0.416463         0.531318   

   mean_sep_km_se  dmisfit_mean  dmisfit_med  dazgap_mean  score_mean  \
0        0.020492     -0.022537    -0.020743    -3.487486   -1.064359   
2        0.015206     -0.007635    -0.006683    -0.916829   -0.820696   
4        0.017922      0.007266     0.007997     0.161731   -0.291633   
1        0.043728     -0.017605    -0.006721     3.293124    0.948847   
3        0.042873      0.062641     0.042047    -6.921781    1.075080   

   score_med  
0  -0.937870  
2  -0.708126  
4  -0.210323  
1   0.759205  
3   1.24942

In [None]:
# --- Run ASL per event (cell 6) ---
'''
from typing import List, Dict, Any
summaries: List[Dict[str, Any]] = []

for i, ev in zip(best_file_nums, best_event_files):
    print(f"[{i}/{len(event_files)}] {ev}")
    result = run_single_event(
        mseed_file=str(ev),
        cfg=cfg,
        refine_sector=REFINE_SECTOR,
        station_gains_df=None,
        topo_kw=topo_kw,
        debug=True,
    )
    summaries.append(result)
    break

# Summarize
df = pd.DataFrame(summaries)
display(df)

summary_csv = Path(OUTPUT_DIR) / f"{cfg.tag()}__summary.csv"
df.to_csv(summary_csv, index=False)
print(f"Summary saved to: {summary_csv}")

if not df.empty:
    n_ok = int((~df.get("error").notna()).sum()) if "error" in df.columns else len(df)
    print(f"Success: {n_ok}/{len(df)}")
'''

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

def load_all_event_comparisons(root: Path) -> pd.DataFrame:
    """
    Crawl event folders under `root` and stack `pairwise_run_comparisons.csv`.
    Returns a tidy DF with event_id inferred from folder name.
    """
    rows = []
    for csv in root.rglob("pairwise_run_comparisons.csv"):
        try:
            df = pd.read_csv(csv)
            df["event_id"] = csv.parent.name            # the event folder name
            rows.append(df)
        except Exception as e:
            print(f"[skip] {csv}: {e}")
    if not rows:
        return pd.DataFrame()
    out = pd.concat(rows, ignore_index=True)
    # normalize label text to a short key
    out["variant"] = out["label"].astype(str)
    # guard presence of expected columns
    for c in ["mean_sep_km","delta_misfit_B_minus_A","delta_azgap_B_minus_A"]:
        if c not in out.columns: out[c] = np.nan
    return out

def add_composite_score(df: pd.DataFrame,
                        w_sep=1.0, w_misfit=0.5, w_azgap=0.1) -> pd.DataFrame:
    """
    Lower is better. Negative deltas are good if they reduce misfit/azgap.
    """
    d = df.copy()
    # z-score each metric for comparability (event-wise optional)
    # here: global z-scores; switch to per-event z if events differ strongly in scale
    for col in ["mean_sep_km","delta_misfit_B_minus_A","delta_azgap_B_minus_A"]:
        x = d[col].to_numpy(dtype=float)
        mu, sd = np.nanmean(x), np.nanstd(x) if np.nanstd(x)>0 else 1.0
        d[col+"_z"] = (x - mu)/sd
    d["score"] = (
        w_sep    * d["mean_sep_km_z"] +
        w_misfit * d["delta_misfit_B_minus_A_z"] +
        w_azgap  * d["delta_azgap_B_minus_A_z"]
    )
    return d

def summarize_variants(df: pd.DataFrame) -> pd.DataFrame:
    """
    One line per variant: mean±SE of core metrics and composite score,
    plus 'wins' (how often variant beats baseline the most for an event).
    """
    g = df.groupby("variant", dropna=False)
    agg = g.agg(
        n_events          = ("event_id", "nunique"),
        n_rows            = ("event_id", "size"),
        mean_sep_km_mean  = ("mean_sep_km", "mean"),
        mean_sep_km_med   = ("mean_sep_km", "median"),
        mean_sep_km_se    = ("mean_sep_km", lambda x: np.nanstd(x)/np.sqrt(max(1,(x.notna().sum())))),
        dmisfit_mean      = ("delta_misfit_B_minus_A", "mean"),
        dmisfit_med       = ("delta_misfit_B_minus_A", "median"),
        dazgap_mean       = ("delta_azgap_B_minus_A", "mean"),
        score_mean        = ("score", "mean"),
        score_med         = ("score", "median"),
    ).reset_index().sort_values("score_mean")
    return agg

def per_event_winner(df_scored: pd.DataFrame) -> pd.DataFrame:
    """
    For each event, pick the variant with the lowest composite score.
    """
    # keep only the best per (event_id)
    idx = df_scored.groupby("event_id")["score"].idxmin()
    winners = df_scored.loc[idx, ["event_id","variant","score"]]
    win_counts = winners.groupby("variant").size().rename("wins").reset_index()
    return winners, win_counts.sort_values("wins", ascending=False)

# --- run it ---
ROOT = OUTPUT_DIR  # your existing OUTDIR base
allcmp = load_all_event_comparisons(ROOT)
print(f"stacked rows: {len(allcmp)}, events: {allcmp['event_id'].nunique()}")

scored = add_composite_score(allcmp, w_sep=1.0, w_misfit=0.5, w_azgap=0.1)
summary = summarize_variants(scored)
winners, win_counts = per_event_winner(scored)

# quick looks
display(summary.head(10))
display(win_counts)

# Run all events efficiently

In [None]:
print(INPUT_DIR)
print(cfg)
print(topo_kw)
print(REFINE_SECTOR)
'''
run_all_events(
    input_dir=INPUT_DIR,
    station_gains_df = None,
    cfg=cfg,
    refine_sector=REFINE_SECTOR,
    topo_kw=topo_kw,
    debug=True,
    max_events=999999,
    use_multiprocessing=True,
    workers=4,
)
'''

# Run Monte Carlo sweep of parameters for 1 event


In [None]:

from flovopy.asl.wrappers2 import run_event_monte_carlo
from flovopy.processing.sam import VSAM, DSAM
'''
# Simple 6-draw sweep (replace with your own priors/sequences)
configs = ASLConfig.generate_config_list(
    inventory=None,
    output_base=None,
    gridobj=None,
    global_cache=None,      
    wave_kinds=("surface","body"),
    station_corr_tables=(station_corrections_df), #annual_station_corrections_df),
    speeds=(1.0, 3.0),
    Qs=(23, 1000),
    dist_modes=("3d",), # 2d needs a different grid and different distance and amplitude corrections
    misfit_engines=("l2","r2", "lin"),
    peakfs=(2.0, 8.0),
    window_seconds = 5.0, # change to be a tuple 10.0) not implemented yet
    min_stations = 5,
    sam_class = (VSAM), #, DSAM), # not implemented yet
    sam_metric = ("mean"),# "median", "rms", "VT", "LP"), # this doesn't seem to be implemented yet
    # context can be set later; set here if you like:
    debug=False,
)

configs = ASLConfig.generate_config_list(
    inventory=None,
    output_base=None,
    gridobj=None,
    global_cache=None,      
    wave_kinds=("surface",),
    station_corr_tables=(station_corrections_df), #annual_station_corrections_df),
    speeds=(1.0, 3.0),
    Qs=(23, 1000),
    dist_modes=("3d",), # 2d needs a different grid and different distance and amplitude corrections
    misfit_engines=("l2"),
    peakfs=(8.0),
    window_seconds = 5.0, # change to be a tuple 10.0) not implemented yet
    min_stations = 5,
    sam_class = (VSAM), #, DSAM), # not implemented yet
    sam_metric = ("mean"),# "median", "rms", "VT", "LP"), # this doesn't seem to be implemented yet
    # context can be set later; set here if you like:
    debug=False,
)


configs = ASLConfig.generate_config_list(    
    inventory=INV,
    output_base=str(OUTPUT_DIR),
    gridobj=gridobj,
    global_cache=GLOBAL_CACHE,
) 

print(len(configs))
'''


In [None]:

# Shared run context
mseed_file   = event_files[116]
'''
results = run_event_monte_carlo(
    mseed_file=mseed_file,
    configs=configs,
    inventory=INV,
    output_base=str(OUTPUT_DIR),
    gridobj=gridobj,
    topo_kw=topo_kw,
    station_gains_df=None,
    parallel=False,
    max_workers=1,
    global_cache=GLOBAL_CACHE,
    debug=True,
)

# Inspect or summarize results as needed
n_ok = sum(1 for r in results if "error" not in r)
print(f"[MC] Completed {n_ok}/{len(results)} runs OK")
'''