In [1]:
from __future__ import annotations

from pathlib import Path
from typing import Optional, List, Tuple, Union, Dict, Any
from datetime import datetime, timezone, timedelta
import re

import numpy as np
import pandas as pd
from scipy.io import loadmat

# Optional: allow ObsPy types without a hard dependency
try:
    from obspy import UTCDateTime  # type: ignore
except Exception:  # pragma: no cover
    UTCDateTime = object  # for typing only


# ---------------------------------------------------------------------
# .mat → DataFrame
# ---------------------------------------------------------------------

# Matches 'YYYY-mm-dd-HHMM-SS' (optionally with a third digit in SS)
_START_RE = re.compile(r"(?P<stamp>\d{4}-\d{2}-\d{2}-\d{4}-\d{2,3})")

# Optional arrays we’ll copy into the DataFrame if present
_OPTIONAL_SERIES_KEYS: Tuple[str, ...] = (
    "rms_fin", "res_ry_fin", "res_ss_fin", "res_by_fin",
    "res_gh_fin", "res_gb_fin", "res_wh_fin", "res_lg_fin", "res_mh_fin",
    "start_point", "end_point", "FFT_v", "cut_off_v", "damp_v",
)


def _parse_start_time_from_filename(path: Union[str, Path]) -> datetime:
    """
    Extract UTC datetime from 'YYYY-mm-dd-HHMM-SS*.mat' found in filename.
    If seconds has 3 digits, trim to 2 (e.g., 42[5] → 42).
    """
    name = Path(path).name
    m = _START_RE.search(name)
    if not m:
        raise ValueError(f"Cannot infer start time from filename: {name}")
    stamp = m.group("stamp")
    parts = stamp.split("-")
    if len(parts[-1]) == 3:  # trim SSS → SS
        stamp = "-".join(parts[:-1] + [parts[-1][:2]])
    dt = datetime.strptime(stamp, "%Y-%m-%d-%H%M-%S")
    return dt.replace(tzinfo=timezone.utc)


def _as_scalar(x: Any, default: Optional[float] = None) -> Optional[float]:
    """Return x as a Python float scalar if possible; otherwise default."""
    if x is None:
        return default
    arr = np.asarray(x)
    if arr.size == 0:
        return default
    return float(arr.squeeze())


def mat_to_dataframe(
    mat_path: Union[str, Path],
    *,
    fs_hz: float = 75.0,
    save_csv: bool = True,
    outdir: Optional[Union[str, Path]] = None,
) -> pd.DataFrame:
    """
    Convert one AMPMAP .mat file into a tidy DataFrame with columns:
      t (ISO8601 'Z'), lat, lon, DR, and optional Q + residuals if present.

    Time vector is inferred from filename start time and (time_step_v / fs_hz).
    """
    mat_path = Path(mat_path)
    d: Dict[str, Any] = loadmat(mat_path, squeeze_me=True)

    loc_fin = np.asarray(d.get("loc_fin"))
    amp_fin = np.asarray(d.get("amp_fin"))
    if loc_fin is None or amp_fin is None:
        raise ValueError(f"{mat_path.name}: requires 'loc_fin' and 'amp_fin'")
    if loc_fin.ndim != 2 or loc_fin.shape[1] != 2:
        raise ValueError(f"{mat_path.name}: loc_fin must be (N, 2) of [lat, lon]")

    # Frame count
    N = min(loc_fin.shape[0], int(np.asarray(amp_fin).size))
    loc_fin = loc_fin[:N, :]
    amp_fin = np.asarray(amp_fin).reshape(-1)[:N]

    # Scalars and timing
    Q = _as_scalar(d.get("Q_v"))
    time_step_samples = _as_scalar(d.get("time_step_v"), default=256.0) or 256.0
    dt_sec = float(time_step_samples) / float(fs_hz)

    t0 = _parse_start_time_from_filename(mat_path)
    times = [
        (t0 + timedelta(seconds=i * dt_sec)).isoformat().replace("+00:00", "Z") for i in range(N)
    ]

    # Base DataFrame
    df = pd.DataFrame(
        {
            "t": times,
            "lat": loc_fin[:, 0],
            "lon": loc_fin[:, 1],
            "DR": amp_fin.astype(float),
        }
    )
    if Q is not None:
        df["Q"] = Q

    # Optional arrays (scalar repeated or vector per frame)
    for key in _OPTIONAL_SERIES_KEYS:
        val = d.get(key)
        if val is None:
            continue
        arr = np.asarray(val)
        if arr.ndim == 0 or arr.size == 1:
            df[key] = _as_scalar(arr)
        else:
            flat = arr.squeeze()
            if flat.ndim == 1 and flat.size >= N:
                df[key] = flat[:N].astype(float)

    if save_csv:
        odir = Path(outdir) if outdir is not None else mat_path.parent
        odir.mkdir(parents=True, exist_ok=True)
        out_csv = odir / f"{mat_path.stem}.csv"
        df.to_csv(out_csv, index=False)

    return df


def batch_convert_ampmap_dir(
    mat_dir: Union[str, Path],
    *,
    fs_hz: float = 75.0,
    outdir: Optional[Union[str, Path]] = None,
    glob_pattern: str = "*.mat",
    tag: str = "VSAM_mean_13s_surface_v1.5_Q25_F8_2d_l2",
    overwrite: bool = False,
) -> List[Path]:
    """
    Convert all matching .mat files to CSV and store them in a per-event/tag layout:

        <outdir or mat_dir>/<mat_stem>/<tag>/source_<tag>.csv

    Returns a list of written CSV Paths.
    """
    mat_dir = Path(mat_dir)
    base_out = Path(outdir) if outdir is not None else mat_dir

    written: List[Path] = []
    for mat_path in sorted(mat_dir.glob(glob_pattern)):
        # Convert in-memory; write to the desired structured path
        df = mat_to_dataframe(mat_path, fs_hz=fs_hz, save_csv=False)
        csv_path = base_out / mat_path.stem / tag / f"source_{tag}.csv"
        csv_path.parent.mkdir(parents=True, exist_ok=True)

        if csv_path.exists() and not overwrite:
            # Skip if exists and overwrite=False
            continue

        df.to_csv(csv_path, index=False)
        written.append(csv_path)

    return written


# ---------------------------------------------------------------------
# Time parsing helper (shared across your code)
# ---------------------------------------------------------------------

def _to_utc_datetime(dt_like: Union[str, datetime, "UTCDateTime", pd.Timestamp]) -> datetime:
    """Normalize input into a timezone-aware UTC datetime."""
    if isinstance(dt_like, pd.Timestamp):
        return (
            dt_like.tz_convert("UTC").to_pydatetime()
            if dt_like.tzinfo
            else dt_like.tz_localize("UTC").to_pydatetime()
        )
    if UTCDateTime and isinstance(dt_like, UTCDateTime):  # type: ignore
        return dt_like.datetime.replace(tzinfo=timezone.utc)
    if isinstance(dt_like, datetime):
        return dt_like.astimezone(timezone.utc) if dt_like.tzinfo else dt_like.replace(tzinfo=timezone.utc)
    if isinstance(dt_like, str):
        try:
            d = datetime.fromisoformat(dt_like)
        except ValueError:
            d = datetime.strptime(dt_like, "%Y-%m-%d-%H%M-%S")
        return d.astimezone(timezone.utc) if d.tzinfo else d.replace(tzinfo=timezone.utc)
    raise TypeError(f"Unsupported datetime-like type: {type(dt_like)}")

'''
def make_asl_heatmap_for_ampmap(
    startdate: Union[str, "UTCDateTime", pd.Timestamp, datetime],
    enddate:   Union[str, "UTCDateTime", pd.Timestamp, datetime],
    *,
    localprojectdir: Path,
    lat_col: str = "lat",
    lon_col: str = "lon",
    amp_col: str = "DR",
    # Optional row filters (only applied if column exists)
    misfit_max: Optional[float] = None,
    nsta_min: Optional[int] = None,
    connectedness_min: Optional[float] = None,
    azgap_max: Optional[float] = None,
    dr_min: Optional[float] = None,
    dr_max: Optional[float] = None,
    # Plot options
    inventory=None,
    cmap: str = "turbo",
    log_scale: bool = True,
    node_spacing_m: int = 50,
    region: Optional[List[float]] = None,
    dem_tif: Optional[Union[str, Path]] = None,
    title_fmt: str = "ASL Heatmap (AMPMAP): {start}–{end} UTC",
    outfile: Optional[Union[str, Path]] = None,  # e.g. "heatmaps/ampmap_{start}_{end}.png"
    # File discovery
    glob_pattern: str = "*.csv",
    recursive: bool = False,
    topo_kw: Optional[Dict[str, Any]] = None,
    return_df: bool = True,
) -> Tuple["pygmt.Figure", Optional[pd.DataFrame]]:
    """
    Load AMPMAP CSVs from a directory, filter by time and optional quality fields,
    concatenate rows, and plot a heatmap.
    """
    lp = Path(localprojectdir)
    if not lp.exists():
        raise FileNotFoundError(f"localprojectdir not found: {lp}")

    start_dt = _to_utc_datetime(startdate)
    end_dt   = _to_utc_datetime(enddate)
    if end_dt < start_dt:
        raise ValueError("enddate is earlier than startdate")

    rows: List[pd.DataFrame] = []
    csv_iter = lp.rglob(glob_pattern) if recursive else lp.glob(glob_pattern)

    for csv_path in csv_iter:
        try:
            df = pd.read_csv(csv_path)
        except Exception as e:
            print(f"[warn] Could not read {csv_path}: {e}")
            continue

        # Require t, lat, lon, DR (or custom amp_col)
        needed = {"t", lat_col, lon_col, amp_col}
        if not needed.issubset(df.columns):
            continue

        t_parsed = pd.to_datetime(df["t"], utc=True, errors="coerce")
        mask_time = (t_parsed >= pd.Timestamp(start_dt)) & (t_parsed <= pd.Timestamp(end_dt))
        df = df.loc[mask_time].copy()
        if df.empty:
            continue

        # Optional filters (only if columns exist)
        if misfit_max is not None and "misfit" in df.columns:
            df = df[df["misfit"] <= misfit_max]
        if nsta_min is not None and "nsta" in df.columns:
            df = df[df["nsta"] >= nsta_min]
        if connectedness_min is not None and "connectedness" in df.columns:
            df = df[df["connectedness"] >= connectedness_min]
        if azgap_max is not None and "azgap" in df.columns:
            df = df[df["azgap"] <= azgap_max]
        if dr_min is not None:
            df = df[df[amp_col] >= dr_min]
        if dr_max is not None:
            df = df[df[amp_col] <= dr_max]
        if df.empty:
            continue

        df = df[[lat_col, lon_col, amp_col]].astype(float)
        finite = np.isfinite(df[lat_col]) & np.isfinite(df[lon_col]) & np.isfinite(df[amp_col])
        df = df.loc[finite]
        if not df.empty:
            rows.append(df)

    if not rows:
        raise ValueError("No AMPMAP CSV rows matched the date range and filters in the specified directory.")

    df_all = pd.concat(rows, ignore_index=True)

    start_str = pd.Timestamp(start_dt).strftime("%Y-%m-%d %H:%M:%S")
    end_str   = pd.Timestamp(end_dt).strftime("%Y-%m-%d %H:%M:%S")
    title = title_fmt.format(start=start_str, end=end_str)

    # Allow tokens in outfile
    if outfile:
        start_safe = pd.Timestamp(start_dt).strftime("%Y%m%dT%H%M%S")
        end_safe   = pd.Timestamp(end_dt).strftime("%Y%m%dT%H%M%S")
        outfile = str(outfile).format(start=start_safe, end=end_safe)

    fig = plot_heatmap_colored(
        df_all,
        lat_col=lat_col,
        lon_col=lon_col,
        amp_col=amp_col,
        inventory=inventory,
        cmap=cmap,
        log_scale=log_scale,
        node_spacing_m=node_spacing_m,
        outfile=outfile,
        region=region,
        title=title,
        dem_tif=dem_tif,
        topo_kw=topo_kw,
    )

    return (fig, df_all) if return_df else (fig, None)
'''



'\ndef make_asl_heatmap_for_ampmap(\n    startdate: Union[str, "UTCDateTime", pd.Timestamp, datetime],\n    enddate:   Union[str, "UTCDateTime", pd.Timestamp, datetime],\n    *,\n    localprojectdir: Path,\n    lat_col: str = "lat",\n    lon_col: str = "lon",\n    amp_col: str = "DR",\n    # Optional row filters (only applied if column exists)\n    misfit_max: Optional[float] = None,\n    nsta_min: Optional[int] = None,\n    connectedness_min: Optional[float] = None,\n    azgap_max: Optional[float] = None,\n    dr_min: Optional[float] = None,\n    dr_max: Optional[float] = None,\n    # Plot options\n    inventory=None,\n    cmap: str = "turbo",\n    log_scale: bool = True,\n    node_spacing_m: int = 50,\n    region: Optional[List[float]] = None,\n    dem_tif: Optional[Union[str, Path]] = None,\n    title_fmt: str = "ASL Heatmap (AMPMAP): {start}–{end} UTC",\n    outfile: Optional[Union[str, Path]] = None,  # e.g. "heatmaps/ampmap_{start}_{end}.png"\n    # File discovery\n    glob_patte

In [None]:
from pathlib import Path
from obspy import UTCDateTime

HOME        = Path.home()
PROJECTDIR  = HOME / "Dropbox" / "BRIEFCASE" / "SSADenver"
AMP_CSV_DIR = PROJECTDIR / "AMPMAP_CSV"
DEM_DEFAULT = PROJECTDIR / "metadata" / "MONTSERRAT_DEM_WGS84_MASTER.tif"
HEATMAP_DIR = PROJECTDIR / "heatmaps"
HEATMAP_DIR.mkdir(parents=True, exist_ok=True)

# Define time windows
start = UTCDateTime(2001, 2, 6, 12, 0, 0)
mid   = UTCDateTime(2001, 2, 16, 0, 0, 0)
end   = UTCDateTime(2001, 7, 2, 12, 0, 0)
windows = [(start, mid), (mid, end)]

# Common args
common = dict(
    localprojectdir=AMP_CSV_DIR,
    cmap="turbo",
    log_scale=True,
    node_spacing_m=100,
    dem_tif=DEM_DEFAULT,
    return_df=True,
    region=[-62.208, -62.138, 16.681, 16.751],
    outfile_pattern=str(HEATMAP_DIR / "ampmap_{tag}_{start}_{end}.png"),
)

results = {}
for s, e in windows:
    res = make_asl_heatmap_from_events(
        startdate=s,
        enddate=e,
        **common,
    )
    results[(s, e)] = res




In [None]:
# heatmap from new ASL locations but with ampmap settings
from flovopy.asl.wrappers import make_asl_heatmaps_per_tag
tag = "VSAM_mean_5s_surface_v1.5_Q23_F8_2d_l2"
startt = UTCDateTime("2001-02-06")
endt = UTCDateTime("2001-03-04")
res = make_asl_heatmaps_per_tag(
    startt, 
    endt,
    localprojectdir="/Users/thompsong/Dropbox/AMPMAP_RESULTS",
    tag=[tag],
    nsta_min=5, 
    misfit_max=0.30,
    outfile_pattern = str(PROJECTDIR / "heatmaps" /f"{tag}_{startt}_{endt}.png"),
)

In [5]:
HOME = Path.home()
PROJECTDIR      = HOME / "Dropbox" / "BRIEFCASE" / "SSADenver"
LOCALPROJECTDIR = HOME / "work" / "PROJECTS" / "SSADenver_local"
from flovopy.asl.wrappers import make_asl_heatmap_from_events
tag = "VSAM_mean_5s_surface_v1.5_Q23_F8_2d_l2"
METADATA_DIR    = PROJECTDIR / "metadata" 
DEM_DEFAULT     = METADATA_DIR / "MONTSERRAT_DEM_WGS84_MASTER.tif"
startt = UTCDateTime("2001-02-06")
endt = UTCDateTime("2001-03-04")

fig = make_asl_heatmap_from_events(
    startdate=startt,
    enddate=endt,
    localprojectdir="/Users/thompsong/Dropbox/AMPMAP_RESULTS",
    tag=tag,
    # region=[-62.25, -62.10, 16.65, 16.78],
    dem_tif=DEM_DEFAULT,
    # outfile="asl_heatmap_{start}_{end}.png",
    #nsta_min=5, 
    #misfit_max=0.30,
    # return_df=True,
    # verbose=True,
    outfile = str(PROJECTDIR / "heatmaps" /f"{tag}_{startt}_{endt}.png"),
)

startt = UTCDateTime("2001-03-04")
endt = UTCDateTime("2001-03-09 12:00:00")
fig = make_asl_heatmap_from_events(
    startdate=startt,
    enddate=endt,
    localprojectdir="/Users/thompsong/Dropbox/AMPMAP_RESULTS",
    tag=tag,
    # region=[-62.25, -62.10, 16.65, 16.78],
    dem_tif=DEM_DEFAULT,
    # outfile="asl_heatmap_{start}_{end}.png",
    #nsta_min=5, 
    #misfit_max=0.30,
    # return_df=True,
    # verbose=True,
    outfile = str(PROJECTDIR / "heatmaps" /f"{tag}_{startt}_{endt}.png"),

)
