# Amplitude Source Location (ASL) - Recreating Jacob's notebook with flovopy

## 1. Imports

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from obspy import read_inventory, UTCDateTime
from importlib import reload
from flovopy.asl.wrappers import run_single_event, find_event_files, run_all_events
from flovopy.processing.sam import VSAM, DSAM 
from flovopy.asl.config import ASLConfig, tweak_config

# Core ASL + utilities
from flovopy.asl.asl import ASL
from flovopy.asl.wrappers import asl_sausage
from flovopy.asl.grid import Grid, make_grid
from flovopy.asl.distances import compute_or_load_distances, distances_signature
from flovopy.asl.ampcorr import AmpCorr, AmpCorrParams
from flovopy.asl.misfit import StdOverMeanMisfit, R2DistanceMisfit, LinearizedDecayMisfit
from flovopy.asl.map import topo_map

# --- Diagnostics / comparisons ---
from flovopy.asl.compare import extract_asl_diagnostics, compare_asl_sources

# --- Simulation helpers ---
from flovopy.asl.simulate import simulate_SAM, plot_SAM, synthetic_source_from_grid

# -------------------------- Config --------------------------
# directories
HOME = Path.home()
DATA_DIR = HOME / 'Dropbox' / 'BRIEFCASE'/ 'SSADenver'  /'Jacob'

# master files
INVENTORY_XML   = DATA_DIR / "6Q.xml"

REGION_DEFAULT = [-90.98, -90.78, 14.365, 14.49]
DEM_DEFAULT = None

# other parameters
DIST_MODE = "2d"

# Inventory of Montserrat stations
INV     = read_inventory(INVENTORY_XML)
print(f"[INV] Networks: {len(INV)}  Stations: {sum(len(n) for n in INV)}  Channels: {sum(len(sta) for net in INV for sta in net)}")

MAT_FILE = DATA_DIR / "outputfromReadMapData.mat"
MSEED_DIR = DATA_DIR / "ClipMSEED"                # directory containing MiniSEED


'''
SRTM_ASC_GZ = DATA_DIR / "srtm_18_10.asc.gz"      # optional background
SRTM_CELL = 0.00083333333333333
SRTM_XLL  = -95.0
SRTM_YLL  = 10.0

stations = ['FEJ1', 'FEC1', 'FEC2', 'FEC4']       # order must match sta rows

# Seismo params
sps = 200
lc, hc = 1.0, 99.0                                # bandpass (Hz)
pre_filt = (0.5, 0.8, 90.0, 100.0)                # for remove_response
beta = 1250.0                                     # m/s, assumed wave speed
m_slope = -1.0                                    # -1 body, -0.5 surface

winlength_seconds = 10
plot_limits_sec = (7500, 10000)
t_start_sec = 8000
t_end_sec   = 9200

# Local-grid -> UTM offsets (apply BEFORE transforming to geographic)
UTM_E_OFFSET = 715_901.84
UTM_N_OFFSET = 1_584_182.68

# CRS (example: UTM zone 15N; change if needed)
CRS_UTM = CRS.from_epsg(32615)
CRS_WGS84 = CRS.from_epsg(4326)
TO_WGS84 = Transformer.from_crs(CRS_UTM, CRS_WGS84, always_xy=True)

'''

# Montserrat constants
dome_location = {'lat': 14.475, 'lon':-90.88}
print("Dome (assumed source) =", dome_location)

# define grid size and spacing
GRID_SIZE_LAT_M = 18_000   
GRID_SIZE_LON_M = 18_000  
NODE_SPACING_M  = 50       


gridobj = make_grid(
    center_lat=dome_location["lat"],
    center_lon=dome_location["lon"],
    node_spacing_m=NODE_SPACING_M,
    grid_size_lat_m=GRID_SIZE_LAT_M,
    grid_size_lon_m=GRID_SIZE_LON_M,
    dem=None,
)
print(gridobj)

# Parameters to pass for making pygmt topo maps
topo_kw = {
    "inv": INV,
    "add_labels": True,
    "cmap": "gray",
    "region": REGION_DEFAULT,
    "dem_tif": DEM_DEFAULT,  # basemap shading from your GeoTIFF - but does not actually seem to use this unless topo_color=True and cmap=None
    "frame": True,
    "dome_location": dome_location,
    "topo_color": False,
}

gridobj.plot(show=True, min_display_spacing=300, scale=2.0, topo_map_kwargs=topo_kw);



In [None]:
from pathlib import Path
from flovopy.asl.find_channels import run_find_channels, nodegrid_from_channels_dir

DATA_DIR = Path.home() / "Dropbox" / "BRIEFCASE" / "SSADenver" / "Jacob"
outdir = DATA_DIR / "jacob_channels"

REGION = [-90.96, -90.80, 14.39, 14.51]  # tighter box around Fuego/Ceniza; tweak if needed

# Run the pipeline using only supported args
run_find_channels(
    region=REGION_DEFAULT,
    outdir=outdir,
    earth_relief="01s",
    extra_args=[
        "--prep",
        "--breach",
        "--fa-percentile", "90",   # try 85–92 if Ceniza is still thin
        "--min-cells", "60",
        "--top-n", "60",           # keep more channels, prune later
        "--min-len-m", "150",      # don't prune too aggressively here
        # "--no-plots",            # optional
        # "--no-nodegrid",         # optional (we’ll build NodeGrid explicitly below)
    ],
)

# Build a NodeGrid directly from the outputs
ng = nodegrid_from_channels_dir(outdir, approx_spacing_m=20.0)
print(f"NodeGrid nodes: {ng.node_lon.size}  spacing≈{ng.approx_spacing_m} m  dem_tag={ng.dem_tag}")

In [None]:
channels_grid, mask2d, matches = ng.mask_grid_with_nodes(
    gridobj,
    k=12,            # broaden neighbor search a bit
    max_m=40.0,      # allow 40 m snap distance
    flatten_copy=True,
    return_matches=True,
)

channels_grid.plot(
    topo_map_kwargs=topo_kw,
    symbol="c", scale=2.0, fill="red", force_all_nodes=True, show=True,
);

In [None]:
from pathlib import Path
import numpy as np
import subprocess, json, math
import rasterio as rio
from rasterio.features import shapes
import geopandas as gpd
from shapely.geometry import shape, LineString
from shapely.ops import linemerge
from pyproj import CRS

WBT = "whitebox"  # if your binary is `whitebox`, set WBT="whitebox"

def _run(cmd, cwd=None):
    print("[cmd]", " ".join(cmd))
    r = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
    if r.returncode != 0:
        print(r.stdout); print(r.stderr)
        raise RuntimeError(f"cmd failed: {r.returncode}")
    return r.stdout

def extract_streams_ceniza_friendly(
    dem_utm_tif: Path, outdir: Path, summit_lon=-90.88, summit_lat=14.475,
    fa_q=0.88, min_cells=40, min_len_m=1200, quadrant=(210, 270)  # SW wedge
):
    """
    DEM must be UTM meters. Produces:
      - 04_fa_dinf.tif    (flow accumulation)
      - 05_streams_bin.tif (binary mask)
      - 05_streams_clean.tif (thinned + de-spurred)
      - 05_streams.gpkg    (vectorized, pruned to quadrant & length)
    """
    outdir = Path(outdir); outdir.mkdir(parents=True, exist_ok=True)
    pntr = outdir/"04_dinf_pntr.tif"
    fa   = outdir/"04_fa_dinf.tif"
    streams_bin = outdir/"05_streams_bin.tif"
    streams_thn = outdir/"05_streams_clean.tif"
    streams_vec = outdir/"05_streams.gpkg"

    # 1) Mild breach + D-Infinity pointer/FA
    _run([WBT, f'--run=BreachDepressionsLeastCost', f'--dem={dem_utm_tif}', f'--output={outdir/"03_dem_breached.tif"}', '-v'])
    dem_breached = outdir/"03_dem_breached.tif"
    _run([WBT, f'--run=DInfPointer', f'--dem={dem_breached}', f'--output={pntr}', '-v'])
    _run([WBT, f'--run=DInfFlowAccumulation', f'--dem={dem_breached}', f'--output={fa}', '--out_type=cells', '-v'])

    # 2) Choose FA threshold from local histogram (cone flanks)
    with rio.open(fa) as ds:
        fa_arr = ds.read(1, masked=True)
        # ignore nodata and very low FA
        vals = np.asarray(fa_arr.compressed(), float)
        vals = vals[vals >= min_cells]
        thr  = np.quantile(vals, fa_q) if vals.size else float(min_cells)
        print(f"[FA] q={fa_q:.2f}  -> threshold ~ {thr:.1f} cells")

    # 3) Extract, thin, remove spurs (binary lines)
    _run([WBT, f'--run=ExtractStreams', f'--flow_accum={fa}', f'--threshold={thr}',
          f'--d8_pntr={pntr}', f'--output={streams_bin}', '-v'])
    _run([WBT, f'--run=LineThinning', f'--i={streams_bin}', f'--output={streams_thn}', '-v'])
    _run([WBT, f'--run=RemoveSpurs', f'--input={streams_thn}', f'--output={streams_thn}', '--iterations=2', '-v'])

    # 4) Raster -> vector (and prune)
    with rio.open(streams_thn) as ds:
        mask = ds.read(1) > 0
        shapes_iter = shapes(mask.astype(np.uint8), mask=None, transform=ds.transform)
        lines = []
        for geom, val in shapes_iter:
            if val != 1:
                continue
            poly = shape(geom)
            # skeleton pixels → we polygonize and then take boundaries; simpler: trace pixel centers:
            # Here, take polygon boundary segments as proxy polylines
            for seg in poly.boundary.geoms if hasattr(poly.boundary, "geoms") else [poly.boundary]:
                if seg.length > 0:
                    lines.append(seg)

    if not lines:
        print("[warn] no stream segments created."); return None

    gdf = gpd.GeoDataFrame(geometry=lines, crs=CRS.from_wkt(rio.open(dem_utm_tif).crs.wkt))
    gdf["len_m"] = gdf.length
    gdf = gdf[gdf["len_m"] >= float(min_len_m)]

    # 5) Keep quadrant rays from summit (helps isolate Ceniza)
    #    Compute azimuth at each segment midpoint relative to summit and keep within wedge.
    from pyproj import Transformer
    utm = CRS.from_wkt(rio.open(dem_utm_tif).crs.wkt)
    to_utm = Transformer.from_crs("EPSG:4326", utm, always_xy=True)
    sx, sy = to_utm.transform(summit_lon, summit_lat)

    def _azi(x0,y0,x1,y1):
        return (math.degrees(math.atan2(x1-x0, y1-y0)) + 360) % 360

    az0, az1 = quadrant
    keep = []
    for geom in gdf.geometry:
        m = geom.interpolate(0.5, normalized=True)
        ax = _azi(sx, sy, m.x, m.y)
        if az0 <= az1:
            ok = (ax >= az0) & (ax <= az1)
        else:
            ok = (ax >= az0) | (ax <= az1)
        keep.append(ok)
    gdf = gdf[np.array(keep, bool)]
    if gdf.empty:
        print("[warn] no segments within quadrant; writing unpruned result.")
        gdf = gpd.GeoDataFrame(geometry=lines, crs=gdf.crs)

    gdf.to_file(streams_vec, driver="GPKG")
    print("→", streams_vec)
    return {
        "fa_tif": fa, "streams_bin": streams_bin, "streams_clean": streams_thn, "streams_vec": streams_vec
    }

In [None]:
# You already produced 03_dem_utm.tif earlier; if not, reproject your DEM to UTM zone 15N first.
dem_utm = outdir / "03_dem_utm.tif"   # adjust if different
res = extract_streams_ceniza_friendly(
    dem_utm_tif=dem_utm, outdir=outdir,
    summit_lon=-90.88, summit_lat=14.475,
    fa_q=0.88, min_cells=40, min_len_m=1200, quadrant=(210,270)  # SW wedge
)

# Plot the result quickly with GeoPandas (UTM), or reproject to WGS84 for overlay:
if res:
    gdf = gpd.read_file(res["streams_vec"])
    gdf_ll = gdf.to_crs(4326)
    ax = gdf_ll.plot(figsize=(8,8), linewidth=1, color="red")

## 2. Load seismic data

In [None]:
from obspy import Stream, read
MSEED_DIR = DATA_DIR / "ClipMSEED"   
st = Stream()
for f in MSEED_DIR.glob('*.mseed'):
    if 'HHZ' in str(f):
        print(f)
        tr = read(f)[0]
        st.append(tr)
print(st)
st.plot();


## 3. Remove instrument response

In [None]:
st.detrend('linear')
pre_filt = [0.5, 1.0, 80.0, 95.00]
st.remove_response(pre_filt=pre_filt, inventory=INV, output='VEL')
st.plot();


In [None]:
'''
st_downsampled = st.copy()
st_downsampled.decimate(factor=4)

st_downsampled.spectrogram(dbscale=False)
'''

# Configure

In [None]:
window_seconds = 10 
beta = 1.25
t_start_sec = 8000
t_end_sec   = 9200
event_st = st.copy()
t0 = event_st[0].stats.starttime
event_st.trim(starttime = t0+t_start_sec, endtime=t0+t_end_sec)
peakf = 5.0
Q = 30.0

cfg = ASLConfig(
    inventory=INV, 
    output_base=DATA_DIR / "asl_results", 
    gridobj=gridobj,
    wave_kind='surface',
    speed=beta,
    peakf = peakf,
    Q = Q,
    window_seconds=window_seconds,
    global_cache='/tmp',
    station_correction_dataframe=None,
    dist_mode="2d", 
    misfit_engine="lin",
    min_stations=4,
    sam_class=VSAM, 
    sam_metric="mean",
    debug=True,
)
cfg.build()



# Locate

In [None]:
mseed_file = '/tmp/jacob_event.mseed'
event_st.write(mseed_file, format='MSEED')
result = run_single_event(
    mseed_file=mseed_file,
    cfg=cfg,
    station_gains_df=None,
    switch_event_ctag = True,
    topo_kw=topo_kw,
    mseed_units='m/s', # default units for miniseed files being used - probably "Counts" or "m/s"        
    reduce_time=True,
    refine_sector=False,
    debug=True,
)