In [None]:
# ============================== modis_download.py ==============================
"""
MODIS Downloader (Terra/Aqua) via Earthdata/CMR (earthaccess)
===============================================================================

What this script does
---------------------
• Authenticates with NASA Earthdata:
    - Prefers token: EARTHDATA_TOKEN or EARTHACCESS_TOKEN
    - Else EARTHDATA_USERNAME / EARTHDATA_PASSWORD (env)
    - Else interactive prompt (safe for local runs)
• Reads an AOI shapefile (any CRS), computes its WGS84 bounding box.
• For each calendar day in START_DATE..END_DATE:
    - Creates BASEDIR/YYYY-MM-DD/raw_modis/
    - (Optional) Downloads Thermal L1B (MOD021KM/MYD021KM) and GEO (MOD03/MYD03)
      and writes a pairing manifest: manifest_pairs_modis.csv
    - (Optional) Downloads Active Fire L2 (MOD14/MYD14)
      and writes AF manifest: manifest_active_fire_modis.csv

Outputs (per-day)
-----------------
• Thermal manifest: BASEDIR/YYYY-MM-DD/manifest_pairs_modis.csv
    columns: l1b_path, geo_path, platform, timestamp_key
• AF manifest:      BASEDIR/YYYY-MM-DD/manifest_active_fire_modis.csv
    columns: product, file_path, timestamp_key, has_internal_geo, paired_geo_path

Notes / Assumptions
-------------------
• Pairing uses the MODIS/VIIRS-like timestamp key found in file names: ".Ayyyyddd.HHMM."
• For AF products, an internal lat/lon check is attempted (HDF4 via optional pyhdf).
  If missing, we provide a GEO path when available (from downloaded Thermal pairs).

Dependencies
------------
pip install earthaccess fiona shapely pyproj pyhdf
"""

from __future__ import annotations

# ============================== Standard libs =================================
import os
import re
import csv
import socket
from pathlib import Path
from datetime import date, timedelta
from typing import Iterable, Optional

# ============================== Third-party ===================================
import earthaccess as ea
import fiona
from shapely.geometry import shape
from shapely.ops import unary_union, transform as shp_transform
from pyproj import CRS as PJCRS, Transformer

# ================================ SWITCHES ====================================
# Toggle downloads (preserved defaults)
DOWNLOAD_THERMAL = True   # L1B+GEO (radiance/BT workflows)
DOWNLOAD_AF      = True    # Active Fire swath L2 (MOD14/MYD14)

# Platforms to fetch (preserved defaults)
GET_TERRA = True           # Terra: MOD*
GET_AQUA  = True           # Aqua : MYD*

# ================================= CONFIG =====================================
BASEDIR    = Path(r"path\to\base\directory")
AOI_SHP    = Path(r"path\to\F002_L1__IR__L2L1M0__2025-01-10T215412.018348Z_2025-04-10T154832.806087Z_97706189_MWIR_Boundary.shp")
START_DATE = date(2025, 1, 1)
END_DATE   = date(2025, 1, 12)

# Network safety: avoid long hangs
socket.setdefaulttimeout(20)

# =============================== Auth & AOI ====================================
def ensure_login() -> str:
    """
    Authenticate with Earthdata/CMR via earthaccess.

    Order of preference
    -------------------
    1) Token in env: EARTHDATA_TOKEN or EARTHACCESS_TOKEN (length > 50)
    2) Username/password in env: EARTHDATA_USERNAME / EARTHDATA_PASSWORD
    3) Interactive prompt (username + password)

    Returns
    -------
    str: "token" | "userpass" | "prompt"
    """
    tok = os.getenv("EARTHDATA_TOKEN") or os.getenv("EARTHACCESS_TOKEN")
    if tok and len(tok) > 50:
        os.environ["EARTHDATA_TOKEN"] = tok
        ea.login(strategy="environment")
        return "token"

    user = os.getenv("EARTHDATA_USERNAME")
    pwd  = os.getenv("EARTHDATA_PASSWORD")
    if user and pwd:
        ea.login(strategy="environment")
        return "userpass"

    # Fallback prompt (safe for local, not for unattended environments)
    import getpass
    user = input("Earthdata username: ").strip()
    pwd  = getpass.getpass("Earthdata password: ")
    os.environ["EARTHDATA_USERNAME"] = user
    os.environ["EARTHDATA_PASSWORD"] = pwd
    ea.login(strategy="environment")
    return "prompt"


def read_aoi_bbox_wgs84(shp_path: Path) -> tuple[float, float, float, float]:
    """
    Read AOI shapefile (any CRS) and return its (minx, miny, maxx, maxy) in WGS84.

    Clamps to valid lon/lat ranges.
    """
    with fiona.open(shp_path, "r") as src:
        geoms = [shape(f["geometry"]) for f in src]
        if not geoms:
            raise ValueError("AOI shapefile has no geometries.")
        src_crs = src.crs
    aoi = unary_union(geoms).buffer(0)
    if not src_crs:
        raise ValueError("AOI shapefile has no CRS.")
    src_crs_obj = PJCRS.from_user_input(src_crs)
    dst_crs_obj = PJCRS.from_epsg(4326)
    if src_crs_obj != dst_crs_obj:
        transformer = Transformer.from_crs(src_crs_obj, dst_crs_obj, always_xy=True)
        aoi = shp_transform(lambda x, y, z=None: transformer.transform(x, y), aoi)
    minx, miny, maxx, maxy = aoi.bounds
    return (max(minx, -180.0), max(miny, -90.0), min(maxx, 180.0), min(maxy, 90.0))


def day_iter(d0: date, d1: date) -> Iterable[date]:
    """Inclusive date iterator: d0, d0+1, …, d1."""
    cur = d0
    while cur <= d1:
        yield cur
        cur += timedelta(days=1)

# ============================== Pairing utilities ==============================
def timestamp_key_from_name(name: str) -> Optional[str]:
    """
    Extract MODIS-style timestamp key: '.Ayyyyddd.HHMM.' from a file name.

    Returns
    -------
    str | None
    """
    m = re.search(r"\.(A\d{7}\.\d{4})\.", name)
    return m.group(1) if m else None


def pair_by_timestamp(l1b_paths: list[str], geo_paths: list[str]) -> list[tuple[Path, Path]]:
    """
    Pair L1B ↔ GEO by the timestamp key; only exact matches are kept.
    """
    geo_map = {timestamp_key_from_name(Path(g).name): Path(g) for g in geo_paths}
    pairs: list[tuple[Path, Path]] = []
    for l1b in l1b_paths:
        key = timestamp_key_from_name(Path(l1b).name)
        if key in geo_map and geo_map[key] is not None:
            pairs.append((Path(l1b), geo_map[key]))
    return pairs

# ================================ Thermal =====================================
def search_download_thermal(day: date,
                            bbox: tuple[float, float, float, float],
                            rawdir: Path) -> list[dict]:
    """
    Search and download MODIS L1B & GEO for Terra/Aqua, pair by timestamp,
    and return manifest rows.
    """
    rows: list[dict] = []
    prods: list[tuple[str, str, str]] = []
    if GET_TERRA:
        prods.append(("MOD021KM", "MOD03", "TERRA"))
    if GET_AQUA:
        prods.append(("MYD021KM", "MYD03", "AQUA"))

    for L1B, GEO, plat in prods:
        print(f"[INFO] {day} — searching {plat}: {L1B}/{GEO}")
        items_l1b = ea.search_data(short_name=L1B, temporal=(day, day), bounding_box=bbox)
        items_geo = ea.search_data(short_name=GEO, temporal=(day, day), bounding_box=bbox)
        print(f"       found {len(items_l1b)} L1B, {len(items_geo)} GEO")

        l1b_paths = ea.download(items_l1b, rawdir.as_posix()) if items_l1b else []
        geo_paths = ea.download(items_geo, rawdir.as_posix()) if items_geo else []
        print(f"       downloaded {len(l1b_paths)} L1B, {len(geo_paths)} GEO")

        pairs = pair_by_timestamp(l1b_paths, geo_paths)
        print(f"       paired {len(pairs)} {plat} granules")

        for l1b_p, geo_p in pairs:
            rows.append({
                "l1b_path": str(l1b_p),
                "geo_path": str(geo_p),
                "platform": plat,
                "timestamp_key": timestamp_key_from_name(Path(l1b_p).name) or ""
            })
    return rows

# ============================= Active Fire (AF) ================================
def _af_has_internal_geo(hdf_path: Path) -> bool:
    """
    Best-effort check for internal lat/lon in MOD14/MYD14 HDF4.
    Returns True if datasets 'Latitude' and 'Longitude' exist.
    If pyhdf is not available, returns False (no failure).
    """
    try:
        from pyhdf.SD import SD, SDC  # optional dependency
    except Exception:
        return False
    try:
        sd = SD(str(hdf_path), SDC.READ)
        names = [sd.select(idx).info()[0] for idx in range(len(sd.datasets()))]
        sd.end()
        names_l = [n.lower() for n in names]
        return ("latitude" in names_l) and ("longitude" in names_l)
    except Exception:
        return False


def search_download_active_fire(day: date,
                                bbox: tuple[float, float, float, float],
                                rawdir: Path,
                                geo_rows_for_pairing: list[dict]) -> list[dict]:
    """
    Search and download MODIS Active Fire L2 (MOD14/MYD14).
    Attempt to pair with GEO if needed (using Thermal rows), but AF may carry internal geo.
    Returns AF manifest rows.
    """
    rows: list[dict] = []
    prods: list[tuple[str, str]] = []
    if GET_TERRA:
        prods.append(("MOD14", "TERRA"))
    if GET_AQUA:
        prods.append(("MYD14", "AQUA"))

    # Quick lookup for potential GEO pairing (from thermal rows)
    geo_map = {r.get("timestamp_key"): r.get("geo_path") for r in geo_rows_for_pairing}

    for AF, plat in prods:
        print(f"[INFO] {day} — searching {plat}: {AF} (Active Fire)")
        items_af = ea.search_data(short_name=AF, temporal=(day, day), bounding_box=bbox)
        print(f"       found {len(items_af)} AF items")

        af_paths = ea.download(items_af, rawdir.as_posix()) if items_af else []
        print(f"       downloaded {len(af_paths)} AF granules")

        for p in af_paths:
            p = Path(p)
            key = timestamp_key_from_name(p.name) or ""
            has_geo = 1 if _af_has_internal_geo(p) else 0
            paired_geo = geo_map.get(key, "") if key in geo_map else ""
            rows.append({
                "product": AF,
                "file_path": str(p),
                "timestamp_key": key,
                "has_internal_geo": has_geo,
                "paired_geo_path": paired_geo
            })
    return rows

# ================================== Main ======================================
def main() -> None:
    """Entry point: authenticate, compute AOI bbox, iterate days, write manifests."""
    if not DOWNLOAD_THERMAL and not DOWNLOAD_AF:
        raise SystemExit("Both DOWNLOAD_THERMAL and DOWNLOAD_AF are False. Nothing to do.")

    auth_mode = ensure_login()
    print(f"[INFO] Auth mode: {auth_mode}")

    bbox = read_aoi_bbox_wgs84(AOI_SHP)
    print(f"[INFO] AOI bbox (WGS84): {bbox}")

    for day in day_iter(START_DATE, END_DATE):
        daydir = BASEDIR / day.strftime("%Y-%m-%d")
        rawdir = daydir / "raw_modis"
        rawdir.mkdir(parents=True, exist_ok=True)

        thermal_rows: list[dict] = []
        af_rows: list[dict] = []

        # ---- Thermal (L1B + GEO) ----
        if DOWNLOAD_THERMAL:
            thermal_rows = search_download_thermal(day, bbox, rawdir)
            if thermal_rows:
                manifest_path = daydir / "manifest_pairs_modis.csv"
                with manifest_path.open("w", newline="") as f:
                    w = csv.DictWriter(f, fieldnames=["l1b_path", "geo_path", "platform", "timestamp_key"])
                    w.writeheader()
                    for r in thermal_rows:
                        w.writerow(r)
                print(f"[OK] Thermal manifest written: {manifest_path}")
            else:
                print(f"[WARN] {day}: no L1B/GEO pairs found.")

        # ---- Active Fire (MOD14/MYD14) ----
        if DOWNLOAD_AF:
            af_rows = search_download_active_fire(day, bbox, rawdir, thermal_rows)
            if af_rows:
                manifest_af = daydir / "manifest_active_fire_modis.csv"
                with manifest_af.open("w", newline="") as f:
                    w = csv.DictWriter(
                        f,
                        fieldnames=["product", "file_path", "timestamp_key", "has_internal_geo", "paired_geo_path"]
                    )
                    w.writeheader()
                    for r in af_rows:
                        w.writerow(r)
                print(f"[OK] AF manifest written: {manifest_af}")
            else:
                print(f"[WARN] {day}: no AF granules found.")


if __name__ == "__main__":
    main()


In [None]:
# ============================= modis_process_rad_bt.py =============================
"""
MODIS L1B → Radiance & Brightness Temperature
===============================================================================

Bands: 21 (~3.96 µm) and 31 (~11.0 µm)

Pipeline (per day)
------------------
Inputs (created by modis_download.py):
  BASEDIR/YYYY-MM-DD/raw_modis/*.hdf
  BASEDIR/YYYY-MM-DD/manifest_pairs_modis.csv

Processing:
  1) Read L1B (MOD021KM/MYD021KM) EV_1KM_Emissive for Bands 21 & 31
  2) Scale to radiance (W·m^-2·sr^-1·µm^-1)
  3) Convert to BT (K)
  4) Read geolocation from MOD03/MYD03
  5) Resample swath to a fixed WGS84 grid (EPSG:4326), intersected with AOI
  6) Apply cloud mask: Band 31 BT < CLOUD_BT_K = 265 °K → invalid
  7) AOI clip
  8) Write GeoTIFFs: *_B21_Rad.tif, *_B21_BT_K.tif, *_B31_Rad.tif, *_B31_BT_K.tif
  9) Append global CSV log: BASEDIR/processing_log_modis.csv (UTC/local times & stats)

Dependencies
------------
• Install pyhdf gdal rasterio pyresample fiona shapely pyproj tzdata

Notes
-----
• Cloud mask is deliberately simple (cold-cloud threshold on Band 31 BT).
"""

from __future__ import annotations

# ------------------------------ Standard library ------------------------------
import re
import csv
from pathlib import Path
from datetime import date, datetime, timedelta, timezone
from typing import Iterable, Optional

# --------------------------------- Numerics -----------------------------------
import numpy as np

# --------------------------------- Raster IO ----------------------------------
import rasterio
from rasterio.transform import from_bounds
from rasterio.crs import CRS
from rasterio.features import geometry_mask

# ----------------------------- Vector / geometry ------------------------------
import fiona
from shapely.geometry import shape, mapping
from shapely.ops import unary_union, transform as shp_transform
from pyproj import CRS as PJCRS, Transformer

# ----------------------------- Resampling (swath) -----------------------------
from pyresample import geometry, kd_tree

# --------------------------------- Timezone -----------------------------------
from zoneinfo import ZoneInfo

# ---------------------------------- HDF4 --------------------------------------
from pyhdf.SD import SD, SDC  # MODIS L1B/geo reader

# ==============================================================================
#                                 CONFIGURATION
# ==============================================================================

BASEDIR    = Path(r"path\to\base\directory")
AOI_SHP    = Path(r"path\to\F002_L1__IR__L2L1M0__2025-01-10T215412.018348Z_2025-04-10T154832.806087Z_97706189_MWIR_Boundary.shp")
START_DATE = date(2025, 1, 8)
END_DATE   = date(2025, 1, 12)

# Local time zone for logging
LOCAL_TZNAME = "America/Los_Angeles"  # California: "America/Los_Angeles", Texas: "America/Chicago"
LOCAL_TZ     = ZoneInfo(LOCAL_TZNAME)

# Output grid & cloud mask parameters
GRID_RES_DEG = 0.008333  # ≈ 1 km
CLOUD_BT_K   = 265.0     # cold-cloud threshold on Band 31 BT

# ==============================================================================
#                         PHYSICS & BAND CONFIGURATION
# ==============================================================================

# MCST effective central wavenumbers (cm^-1) — as used by Satpy, etc.
MCST_CWN_CM1: dict[str, float] = {
    "20": 2641.775, "21": 2505.277, "22": 2518.028, "23": 2465.428,
    "24": 2235.815, "25": 2200.346,
    "27": 1477.967, "28": 1362.737, "29": 1173.190, "30": 1027.715,
    "31": 908.0884, "32": 831.5399, "33": 748.3394, "34": 730.8963,
    "35": 718.8681, "36": 704.5367,
}

# Physical constants
_H = 6.62607015e-34   # J·s
_C = 2.99792458e8     # m·s^-1
_K = 1.380649e-23     # J·K^-1

def k1k2_from_cwn_cm1(cwn_cm1: float) -> tuple[float, float]:
    """
    Compute (K1, K2) for spectral radiance in W·m^-2·sr^-1·µm^-1 given ν_eff (cm^-1).
    """
    lam_m = 1.0 / (cwn_cm1 * 100.0)             # cm^-1 → m
    K1 = (2.0 * _H * _C**2) / (lam_m**5) * 1e-6  # per µm
    K2 = (_H * _C) / (_K * lam_m)
    return K1, K2

def bt_from_radiance_with_bandnum(L_um: np.ndarray | None, band_num: str) -> Optional[np.ndarray]:
    """
    Brightness temperature (K) from spectral radiance (W·m^-2·sr^-1·µm^-1),
    using MCST ν_eff (band_num: "21", "31").
    """
    if L_um is None:
        return None
    cwn = MCST_CWN_CM1.get(str(band_num))
    if cwn is None:
        raise ValueError(f"No MCST ν_eff for band {band_num}.")
    K1, K2 = k1k2_from_cwn_cm1(cwn)
    L = np.asarray(L_um, dtype=np.float64)
    L = np.clip(L, 1e-9, np.inf)                 # guard against log singularities
    return (K2 / np.log1p(K1 / L)).astype(np.float32)

# ==============================================================================
#                           IO HELPERS & TIME UTILITIES
# ==============================================================================

def day_iter(d0: date, d1: date) -> Iterable[date]:
    """Inclusive date iterator: d0, d0+1, …, d1."""
    cur = d0
    while cur <= d1:
        yield cur
        cur += timedelta(days=1)

def read_aoi_wgs84(shp_path: Path):
    """
    Read AOI polygons (any CRS) and return a unified geometry in EPSG:4326.
    """
    with fiona.open(shp_path, "r") as src:
        geoms = [shape(f["geometry"]) for f in src]
        if not geoms:
            raise ValueError("AOI shapefile has no geometries.")
        src_crs = src.crs
    aoi = unary_union(geoms).buffer(0)
    if not src_crs:
        raise ValueError("AOI shapefile has no CRS.")
    src_crs_obj = PJCRS.from_user_input(src_crs)
    dst_crs_obj = PJCRS.from_epsg(4326)
    if src_crs_obj != dst_crs_obj:
        transformer = Transformer.from_crs(src_crs_obj, dst_crs_obj, always_xy=True)
        aoi = shp_transform(lambda x, y, z=None: transformer.transform(x, y), aoi)
    return aoi

def timestamp_key_from_name(name: str) -> Optional[str]:
    """Extract '.Ayyyyddd.HHMM.' timestamp key from a MODIS filename."""
    m = re.search(r"\.(A\d{7}\.\d{4})\.", name)
    return m.group(1) if m else None

def dt_from_timestamp_key(key: str) -> Optional[datetime]:
    """Convert 'Ayyyyddd.HHMM' to aware UTC datetime."""
    m = re.match(r"A(\d{4})(\d{3})\.(\d{2})(\d{2})$", key)
    if not m:
        return None
    year, doy, hh, mm = map(int, m.groups())
    return datetime(year, 1, 1, tzinfo=timezone.utc) + timedelta(days=doy - 1, hours=hh, minutes=mm)

# ==============================================================================
#                             MODIS HDF4 READERS
# ==============================================================================

# Fallback band order for EV_1KM_Emissive when metadata is missing
_BAND_ORDER_DEFAULT = [20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]

def _parse_band_names(attr_val) -> list[int] | None:
    """
    Parse 'band_names' attribute into a list of integer band IDs.
    """
    try:
        s = attr_val.decode("utf-8") if isinstance(attr_val, bytes) else str(attr_val)
        parts = [p.strip() for p in s.replace("Band_", "").split(",")]
        nums: list[int] = []
        for p in parts:
            m = re.match(r"(\d+)", p)
            if m:
                nums.append(int(m.group(1)))
        return nums or None
    except Exception:
        return None

def read_modis_emissive(mod021_path: Path, want_bands: tuple[int, int] = (21, 31)) -> dict[int, Optional[np.ndarray]]:
    """
    Read EV_1KM_Emissive and scale to radiance for requested bands.
    Returns a dict {band_num: radiance(float32) | None}.
    Units: W·m^-2·sr^-1·µm^-1
    """
    sd = SD(str(mod021_path), SDC.READ)
    sds = sd.select("EV_1KM_Emissive")
    arr = sds.get()  # shape: (nbands, lines, samples), int16
    attrs = sds.attributes()

    band_names = _parse_band_names(attrs.get("band_names", attrs.get("Band_names", ""))) or _BAND_ORDER_DEFAULT
    band_to_idx = {b: i for i, b in enumerate(band_names)}

    scales  = np.array(attrs.get("radiance_scales"),  dtype=np.float64)
    offsets = np.array(attrs.get("radiance_offsets"), dtype=np.float64)
    badval  = attrs.get("bad_data_value", None)
    fillval = attrs.get("_FillValue", None)

    out: dict[int, Optional[np.ndarray]] = {}
    for b in want_bands:
        if b not in band_to_idx:
            out[b] = None
            continue
        i = band_to_idx[b]
        dn = arr[i, :, :].astype(np.float64)

        # Replace sentinel/fill with NaN before scaling
        if fillval is not None:
            dn[dn == float(fillval)] = np.nan
        if badval is not None:
            dn[dn == float(badval)] = np.nan

        rad = (dn - offsets[i]) * scales[i]           # scale to radiance
        rad = rad.astype(np.float32)
        rad[~np.isfinite(rad)] = np.nan
        out[b] = rad

    sds.endaccess()
    sd.end()
    return out

def read_modis_geo(mod03_path: Path) -> tuple[np.ndarray, np.ndarray]:
    """
    Read MOD03/MYD03 geolocation: Latitude, Longitude (float32).
    """
    sd = SD(str(mod03_path), SDC.READ)
    lat = sd.select("Latitude").get().astype(np.float32)
    lon = sd.select("Longitude").get().astype(np.float32)
    sd.end()
    lat[(lat < -90) | (lat > 90)] = np.nan
    lon[(lon < -180) | (lon > 180)] = np.nan
    return lat, lon

# ==============================================================================
#                        GRIDDING / RESAMPLING (WGS84)
# ==============================================================================

def define_area_wgs84_intersection(lat: np.ndarray,
                                   lon: np.ndarray,
                                   aoi_geom,
                                   res_deg: float = GRID_RES_DEG,
                                   pad: float = 0.0):
    """
    Create a pyresample AreaDefinition for the intersection of the swath bbox and the AOI.
    Returns (area_def, transform, width, height) or None if no intersection.
    """
    sw_lon_min, sw_lon_max = float(np.nanmin(lon)), float(np.nanmax(lon))
    sw_lat_min, sw_lat_max = float(np.nanmin(lat)), float(np.nanmax(lat))
    aoi_lon_min, aoi_lat_min, aoi_lon_max, aoi_lat_max = aoi_geom.bounds

    lon_min = max(sw_lon_min, aoi_lon_min) - pad
    lon_max = min(sw_lon_max, aoi_lon_max) + pad
    lat_min = max(sw_lat_min, aoi_lat_min) - pad
    lat_max = min(sw_lat_max, aoi_lat_max) + pad
    if not (lon_min < lon_max and lat_min < lat_max):
        return None

    width  = int(np.ceil((lon_max - lon_min) / res_deg))
    height = int(np.ceil((lat_max - lat_min) / res_deg))
    transform = from_bounds(lon_min, lat_min, lon_max, lat_max, width, height)

    proj_dict = {"proj": "longlat", "datum": "WGS84"}
    area_def = geometry.AreaDefinition(
        "wgs84", "WGS84 latlon", "epsg4326",
        proj_dict, width, height, (lon_min, lat_min, lon_max, lat_max)
    )
    return area_def, transform, width, height

def resample_swath_to_grid(lat: np.ndarray,
                           lon: np.ndarray,
                           data: np.ndarray,
                           area_def) -> np.ndarray:
    """
    Nearest-neighbor resampling with a 6 km radius of influence (typical for 1 km MODIS).
    """
    swath_def = geometry.SwathDefinition(lons=lon, lats=lat)
    out = kd_tree.resample_nearest(
        swath_def, data, area_def,
        radius_of_influence=6000, fill_value=np.nan
    )
    return out.astype(np.float32)

def write_geotiff(path: Path,
                  arr: np.ndarray,
                  transform,
                  crs=CRS.from_epsg(4326),
                  nodata=np.float32(np.nan),
                  band_tags: dict | None = None) -> None:
    """
    Write a single-band GeoTIFF with LZW compression and NaN nodata.
    """
    profile = {
        "driver": "GTiff",
        "height": arr.shape[0],
        "width":  arr.shape[1],
        "count":  1,
        "dtype":  rasterio.float32,
        "crs":    crs,
        "transform": transform,
        "nodata": nodata,
        "compress": "lzw",
        "tiled": True
    }
    with rasterio.open(path, "w", **profile) as dst:
        dst.write(arr, 1)
        if band_tags:
            dst.update_tags(1, **band_tags)

# ==============================================================================
#                                   LOGGING
# ==============================================================================

LOG_CSV = BASEDIR / "processing_log_modis.csv"

def append_log(row: dict) -> None:
    """
    Append one processing record to the global MODIS log (creates file on first call).
    """
    exists = LOG_CSV.exists()
    with LOG_CSV.open("a", newline="") as f:
        w = csv.DictWriter(f, fieldnames=[
            "l1b_file","geo_file","platform","timestamp_key",
            "acq_date_utc","acq_time_utc","acq_date_local","acq_time_local","local_tz",
            "cloud_thresh_K",
            "bt_B21_min","bt_B21_max","bt_B21_mean",
            "bt_B31_min","bt_B31_max","bt_B31_mean"
        ])
        if not exists:
            w.writeheader()
        w.writerow(row)

# ==============================================================================
#                                     MAIN
# ==============================================================================

def main() -> None:
    """
    Iterate days, process all MODIS L1B/GEO pairs in the manifest, and write products + log.
    """
    aoi = read_aoi_wgs84(AOI_SHP)

    for day in day_iter(START_DATE, END_DATE):
        daydir   = BASEDIR / day.strftime("%Y-%m-%d")
        manifest = daydir / "manifest_pairs_modis.csv"
        outdir   = daydir / "bt"
        outdir.mkdir(parents=True, exist_ok=True)

        if not manifest.exists():
            print(f"[INFO] {day}: no manifest found; skipping.")
            continue

        with manifest.open("r", newline="") as f:
            rows = list(csv.DictReader(f))

        print(f"[INFO] {day}: processing {len(rows)} MODIS pairs …")

        for row in rows:
            l1b_p = Path(row["l1b_path"])
            geo_p = Path(row["geo_path"])
            plat  = row.get("platform", "")
            key   = row.get("timestamp_key", "")

            if not l1b_p.exists() or not geo_p.exists():
                print(f"[WARN] Missing files for {key}; skip.")
                continue

            # 1) Radiance for B21/B31 (W·m^-2·sr^-1·µm^-1)
            rad_map = read_modis_emissive(l1b_p, want_bands=(21, 31))
            rad21, rad31 = rad_map.get(21), rad_map.get(31)
            if rad21 is None and rad31 is None:
                print(f"[WARN] {l1b_p.name}: neither B21 nor B31 present; skip.")
                continue

            # 2) Brightness temperature via MCST ν_eff-derived (K1, K2)
            bt21 = bt_from_radiance_with_bandnum(rad21, "21") if rad21 is not None else None
            bt31 = bt_from_radiance_with_bandnum(rad31, "31") if rad31 is not None else None

            # 3) Cloud mask from B31 BT (optional/simple)
            cloud_mask = (bt31 < CLOUD_BT_K) if bt31 is not None else None

            # 4) Geolocation (MOD03/MYD03)
            lat, lon = read_modis_geo(geo_p)

            # 5) Output grid = swath ∩ AOI
            area = define_area_wgs84_intersection(lat, lon, aoi, res_deg=GRID_RES_DEG)
            if area is None:
                print("[INFO] Swath does not intersect AOI; skipping.")
                continue
            area_def, transform, width, height = area

            # Helper: apply cloud mask (if present)
            def apply_masks(a: Optional[np.ndarray]) -> Optional[np.ndarray]:
                if a is None:
                    return None
                out = a.copy()
                if cloud_mask is not None:
                    out[cloud_mask] = np.nan
                return out

            rad21_c = apply_masks(rad21); rad31_c = apply_masks(rad31)
            bt21_c  = apply_masks(bt21);  bt31_c  = apply_masks(bt31)

            # 6) Resample to grid
            if rad21_c is not None:
                rad21_g = resample_swath_to_grid(lat, lon, rad21_c, area_def)
                bt21_g  = resample_swath_to_grid(lat, lon, bt21_c,  area_def)
            if rad31_c is not None:
                rad31_g = resample_swath_to_grid(lat, lon, rad31_c, area_def)
                bt31_g  = resample_swath_to_grid(lat, lon, bt31_c,  area_def)

            # 7) AOI clip mask
            mask_aoi = geometry_mask([mapping(aoi)],
                                     out_shape=(height, width),
                                     transform=transform,
                                     invert=True).astype(bool)

            stem = l1b_p.with_suffix("").name

            # 8) Write products
            # -- Band 21
            if rad21_c is not None:
                rad21_g = np.where(mask_aoi, rad21_g, np.nan)
                write_geotiff(outdir / f"{stem}_B21_Rad.tif", rad21_g, transform,
                              band_tags={"units": "W/m^2/sr/μm", "long_name": "MODIS Band 21 Radiance"})
                bt21_g = np.where(mask_aoi, bt21_g, np.nan)
                write_geotiff(outdir / f"{stem}_B21_BT_K.tif", bt21_g, transform,
                              band_tags={"units": "K", "long_name": "MODIS Band 21 Brightness Temperature"})
                print(f"[OK] wrote B21 radiance & BT for {stem}")

            # -- Band 31
            if rad31_c is not None:
                rad31_g = np.where(mask_aoi, rad31_g, np.nan)
                write_geotiff(outdir / f"{stem}_B31_Rad.tif", rad31_g, transform,
                              band_tags={"units": "W/m^2/sr/μm", "long_name": "MODIS Band 31 Radiance"})
                bt31_g = np.where(mask_aoi, bt31_g, np.nan)
                write_geotiff(outdir / f"{stem}_B31_BT_K.tif", bt31_g, transform,
                              band_tags={"units": "K", "long_name": "MODIS Band 31 Brightness Temperature"})
                print(f"[OK] wrote B31 radiance & BT for {stem}")

            # 9) Logging (UTC/local time + BT stats)
            dt_utc = dt_from_timestamp_key(key)
            if dt_utc is not None:
                dt_loc = dt_utc.astimezone(LOCAL_TZ)
                d_utc, t_utc = dt_utc.strftime("%Y-%m-%d"), dt_utc.strftime("%H:%M")
                d_loc, t_loc = dt_loc.strftime("%Y-%m-%d"), dt_loc.strftime("%H:%M")
            else:
                d_utc = t_utc = d_loc = t_loc = ""

            def stats(a: Optional[np.ndarray]) -> tuple[float, float, float]:
                if a is None:
                    return (np.nan, np.nan, np.nan)
                vv = a[np.isfinite(a)]
                if vv.size == 0:
                    return (np.nan, np.nan, np.nan)
                return (float(np.nanmin(vv)), float(np.nanmax(vv)), float(np.nanmean(vv)))

            s21 = stats(bt21_g if rad21_c is not None else None)
            s31 = stats(bt31_g if rad31_c is not None else None)

            append_log({
                "l1b_file": str(l1b_p),
                "geo_file": str(geo_p),
                "platform":  plat,
                "timestamp_key": key,
                "acq_date_utc":  d_utc,
                "acq_time_utc":  t_utc,
                "acq_date_local": d_loc,
                "acq_time_local": t_loc,
                "local_tz": LOCAL_TZNAME,
                "cloud_thresh_K": CLOUD_BT_K,
                "bt_B21_min": s21[0], "bt_B21_max": s21[1], "bt_B21_mean": s21[2],
                "bt_B31_min": s31[0], "bt_B31_max": s31[1], "bt_B31_mean": s31[2],
            })

    print(f"[DONE] Log at: {LOG_CSV}")

# ------------------------------------------------------------------------------
if __name__ == "__main__":
    main()


In [None]:
# ============================ modis_process_af.py =============================
"""
MODIS Active Fire (MOD14/MYD14) → GeoTIFFs (FireMask + Detection)
==================================================================

Per-day outputs in: BASEDIR/YYYY-MM-DD/af/
  - <HDF_basename>_AF_FireMask.tif
      • float32; FireMask classes (1..9) inside AOI; NaN outside AOI
  - <HDF_basename>_AF_Detect.tif
      • float32; 1.0 where FireMask ∈ {7,8,9}, 0.0 elsewhere; NaN outside AOI

Geolocation
-----------
• Prefer Latitude/Longitude SDS embedded in AF file.
• Otherwise, auto-pair a MOD03/MYD03 HDF by timestamp (.Ayyyyddd.HHMM.) found
  within the same day’s raw_modis/ folder.

Processing
----------
1) Read FireMask and lat/lon (internal or paired GEO).
2) Build EPSG:4326 output grid as swath∩AOI; resample with nearest-neighbor.
3) AOI clip (NaN outside).
4) Write FireMask + binary Detect GeoTIFFs.

Notes
-----
• Detection mask uses FireMask classes {7,8,9}.
• Dependencies: pyhdf rasterio pyresample fiona shapely pyproj tzdata
"""

from __future__ import annotations

# ------------------------------ Std lib --------------------------------
import csv
import re
from pathlib import Path
from datetime import date, timedelta

# -------------------------------- NumPy --------------------------------
import numpy as np

# -------------------------------- HDF4 ---------------------------------
from pyhdf.SD import SD, SDC  # HDF4 reader

# -------------------------------- Raster --------------------------------
import rasterio
from rasterio.transform import from_bounds
from rasterio.crs import CRS
from rasterio.features import geometry_mask

# ------------------------------ Vectors --------------------------------
import fiona
from shapely.geometry import shape, mapping
from shapely.ops import unary_union, transform as shp_transform
from pyproj import CRS as PJCRS, Transformer

# ----------------------------- Resampling ------------------------------
from pyresample import geometry, kd_tree

# ------------------------------- CONFIG --------------------------------
BASEDIR    = Path(r"path\to\base\directory")
AOI_SHP    = Path(r"path\to\F002_L1__IR__L2L1M0__2025-01-10T215412.018348Z_2025-04-10T154832.806087Z_97706189_MWIR_Boundary.shp")
START_DATE = date(2025, 1, 8)
END_DATE   = date(2025, 1, 12)

# Output grid resolution in degrees (≈1 km at equator: 0.008333)
GRID_RES_DEG = 0.008333

# ----------------------------- Utilities -------------------------------
def day_iter(d0: date, d1: date):
    """Inclusive date iterator: d0, d0+1, …, d1."""
    cur = d0
    while cur <= d1:
        yield cur
        cur += timedelta(days=1)

def read_aoi_wgs84(shp_path: Path):
    """
    Read AOI shapefile (any CRS) → unified geometry in EPSG:4326.
    """
    if not shp_path.exists():
        raise FileNotFoundError(f"AOI not found: {shp_path}")
    with fiona.open(shp_path, "r") as src:
        geoms = [shape(f["geometry"]) for f in src]
        if not geoms:
            raise ValueError("AOI shapefile has no geometries.")
        src_crs = src.crs
    if not src_crs:
        raise ValueError("AOI shapefile has no CRS.")
    aoi = unary_union(geoms).buffer(0)
    src_crs_obj = PJCRS.from_user_input(src_crs)
    dst_crs_obj = PJCRS.from_epsg(4326)
    if src_crs_obj != dst_crs_obj:
        tf = Transformer.from_crs(src_crs_obj, dst_crs_obj, always_xy=True)
        aoi = shp_transform(lambda x, y, z=None: tf.transform(x, y), aoi)
    return aoi

def define_area_wgs84_intersection(lat, lon, aoi_geom, res_deg=GRID_RES_DEG, pad=0.0):
    """
    Define EPSG:4326 output grid as intersection of swath extent and AOI extent.
    Returns (area_def, transform, width, height) or None if no intersection.
    """
    sw_lon_min = float(np.nanmin(lon)); sw_lon_max = float(np.nanmax(lon))
    sw_lat_min = float(np.nanmin(lat)); sw_lat_max = float(np.nanmax(lat))
    aoi_lon_min, aoi_lat_min, aoi_lon_max, aoi_lat_max = aoi_geom.bounds

    lon_min = max(sw_lon_min, aoi_lon_min) - pad
    lon_max = min(sw_lon_max, aoi_lon_max) + pad
    lat_min = max(sw_lat_min, aoi_lat_min) - pad
    lat_max = min(sw_lat_max, aoi_lat_max) + pad
    if not (lon_min < lon_max and lat_min < lat_max):
        return None

    width  = int(np.ceil((lon_max - lon_min) / res_deg))
    height = int(np.ceil((lat_max - lat_min) / res_deg))
    transform = from_bounds(lon_min, lat_min, lon_max, lat_max, width, height)
    proj_dict = {"proj": "longlat", "datum": "WGS84"}
    area_def = geometry.AreaDefinition(
        "wgs84", "WGS84 latlon", "epsg4326",
        proj_dict, width, height, (lon_min, lat_min, lon_max, lat_max)
    )
    return area_def, transform, width, height

def resample_swath_to_grid(lat, lon, data, area_def):
    """Nearest-neighbor resampling from swath to target grid."""
    swath_def = geometry.SwathDefinition(lons=lon, lats=lat)
    out = kd_tree.resample_nearest(
        swath_def, data, area_def,
        radius_of_influence=6000, fill_value=np.nan
    )
    return out.astype(np.float32)

def write_geotiff(path: Path, arr: np.ndarray, transform, crs=CRS.from_epsg(4326),
                  nodata=np.float32(np.nan), band_tags: dict | None = None, dtype=rasterio.float32):
    """Write a single-band GeoTIFF with LZW compression and NaN nodata."""
    profile = {
        "driver": "GTiff", "height": arr.shape[0], "width": arr.shape[1], "count": 1,
        "dtype": dtype, "crs": crs, "transform": transform,
        "nodata": nodata, "compress": "lzw", "tiled": True
    }
    with rasterio.open(path, "w", **profile) as dst:
        dst.write(arr.astype(profile["dtype"]), 1)
        if band_tags:
            dst.update_tags(1, **band_tags)

# ------------------------- HDF4 helpers (robust) ------------------------
def _list_sds_names(sd: SD) -> list[str]:
    """Return all SDS names in this HDF4 file (original case)."""
    info = sd.datasets()  # dict: name -> (idx, rank, dims, type, nattrs)
    return list(info.keys())

def _normalize(s: str) -> str:
    """Lowercase alnum-only normalization for forgiving name matches."""
    return "".join(ch for ch in s.lower() if ch.isalnum())

def _find_sds(sd: SD, candidates: list[str]) -> str | None:
    """
    Case/punct-insensitive search among SDS names. Returns the original SDS name or None.
    """
    names = _list_sds_names(sd)
    names_norm = {_normalize(n): n for n in names}
    for cand in candidates:
        key = _normalize(cand)
        if key in names_norm:
            return names_norm[key]
    return None

def _read_sds_scaled(sd: SD, sds_name: str):
    """
    Read an SDS and apply optional scale_factor/add_offset and _FillValue handling.
    Returns (float32 array, attributes dict).
    """
    sds = sd.select(sds_name)
    arr = sds.get()  # may raise if inaccessible
    attrs = sds.attributes()

    arr = arr.astype(np.float64)
    fv = attrs.get("_FillValue", None)
    if fv is not None:
        arr[arr == float(fv)] = np.nan

    scale = attrs.get("scale_factor", None)
    offs  = attrs.get("add_offset", None)
    if scale is not None or offs is not None:
        s = float(scale if scale is not None else 1.0)
        a = float(offs  if offs  is not None else 0.0)
        arr = arr * s + a

    return arr.astype(np.float32), attrs

# --------------------------- AF readers --------------------------------
def timestamp_key_from_name(name: str) -> str | None:
    """Extract '.Ayyyyddd.HHMM.' from filename, if present."""
    m = re.search(r"\.(A\d{7}\.\d{4})\.", name)
    return m.group(1) if m else None

def read_af_latlon_or_pair(af_path: Path, fallback_dirs: list[Path]):
    """
    Try internal AF lat/lon SDS; else auto-pair a MOD03/MYD03 HDF within fallback_dirs
    using the timestamp key. Returns (lat, lon) or (None, None).
    """
    sd = SD(str(af_path), SDC.READ)

    # Preferred: internal lat/lon
    lat_name = _find_sds(sd, ["Latitude", "latitude"])
    lon_name = _find_sds(sd, ["Longitude", "longitude"])

    if lat_name and lon_name:
        try:
            lat = sd.select(lat_name).get().astype(np.float32)
            lon = sd.select(lon_name).get().astype(np.float32)
            sd.end()
        except Exception:
            sd.end()
            lat = lon = None
    else:
        # Fallback: pair MOD03/MYD03 by timestamp
        sd.end()
        key = timestamp_key_from_name(af_path.name)
        if not key:
            return None, None
        is_aqua = af_path.name.startswith("MYD")
        geo_prefix = "MYD03" if is_aqua else "MOD03"
        pat = f"{geo_prefix}.{key}*.hdf"
        lat = lon = None
        for d in fallback_dirs:
            for p in d.glob(pat):
                try:
                    g = SD(str(p), SDC.READ)
                    lat = g.select("Latitude").get().astype(np.float32)
                    lon = g.select("Longitude").get().astype(np.float32)
                    g.end()
                    break
                except Exception:
                    continue
            if lat is not None:
                break

    if lat is None or lon is None:
        return None, None

    # Basic range hygiene
    lat[(lat < -90) | (lat > 90)] = np.nan
    lon[(lon < -180) | (lon > 180)] = np.nan
    return lat, lon

def read_firemask_only(af_path: Path):
    """
    Read the FireMask SDS (2-D) as float32 with NaNs for fill. Returns None if absent.
    """
    sd = SD(str(af_path), SDC.READ)
    nm_firemask = _find_sds(sd, ["FireMask", "fire_mask", "Fire Mask", "PixelFireMask", "FP_Mask", "Mask"])
    if not nm_firemask:
        sd.end()
        return None
    try:
        fm, _ = _read_sds_scaled(sd, nm_firemask)
        sd.end()
        return fm  # float32; classes ~1..9; NaN for fill
    except Exception as e:
        print(f"[WARN] FireMask read failed ({nm_firemask}) in {af_path.name}: {e}")
        sd.end()
        return None

# -------------------------------- Main -----------------------------------
def main():
    aoi = read_aoi_wgs84(AOI_SHP)

    for day in day_iter(START_DATE, END_DATE):
        daydir = BASEDIR / day.strftime("%Y-%m-%d")
        outdir = daydir / "af"
        outdir.mkdir(parents=True, exist_ok=True)

        manifest_af = daydir / "manifest_active_fire_modis.csv"
        rawdir = daydir / "raw_modis"

        # Build list of AF files to process
        af_files: list[Path] = []
        if manifest_af.exists():
            with manifest_af.open("r", newline="") as f:
                for row in csv.DictReader(f):
                    p = Path(row["file_path"])
                    if p.exists():
                        af_files.append(p)
        else:
            # Fallback: scan raw_modis for MOD14/MYD14 HDFs
            if rawdir.exists():
                af_files += list(rawdir.glob("MOD14.A*.hdf"))
                af_files += list(rawdir.glob("MYD14.A*.hdf"))

        if not af_files:
            print(f"[INFO] {day}: no AF files found; skipping.")
            continue

        print(f"[INFO] {day}: processing {len(af_files)} MODIS AF granules …")

        # Search paths for GEO fallback when AF lacks internal lat/lon
        fallback_geo_dirs = [rawdir]

        for af_path in af_files:
            # 1) Geolocation
            lat, lon = read_af_latlon_or_pair(af_path, fallback_geo_dirs)
            if lat is None or lon is None:
                print(f"[WARN] No geolocation for {af_path.name}; skipping.")
                continue

            # 2) FireMask
            firemask = read_firemask_only(af_path)
            if firemask is None:
                print(f"[WARN] No FireMask in {af_path.name}; skipping.")
                continue

            # Sanity check: shapes must match for resampling input
            if firemask.shape != lat.shape:
                print(f"[WARN] FireMask shape {firemask.shape} != geo {lat.shape} in {af_path.name}; skipping.")
                continue

            # 3) Output grid = swath ∩ AOI
            area = define_area_wgs84_intersection(lat, lon, aoi, res_deg=GRID_RES_DEG)
            if area is None:
                print(f"[INFO] AF swath ({af_path.name}) does not intersect AOI; skipping.")
                continue
            area_def, transform, width, height = area

            # 4) Detection mask: FireMask ∈ {7,8,9} → 1.0 else 0.0
            fm_int = np.rint(firemask).astype(np.int16)
            det = np.isin(fm_int, (7, 8, 9)).astype(np.float32)

            # 5) Resample to target grid
            fm_grid  = resample_swath_to_grid(lat, lon, firemask.astype(np.float32), area_def)
            det_grid = resample_swath_to_grid(lat, lon, det,                         area_def)

            # 6) AOI clip (NaN outside)
            mask_aoi = geometry_mask([mapping(aoi)], out_shape=(height, width),
                                     transform=transform, invert=True).astype(bool)
            fm_grid  = np.where(mask_aoi, fm_grid,  np.nan)
            det_grid = np.where(mask_aoi, det_grid, np.nan)

            # 7) Write GeoTIFFs
            stem = af_path.with_suffix("").name

            out_fm = outdir / f"{stem}_AF_FireMask.tif"
            write_geotiff(
                out_fm, fm_grid, transform,
                band_tags={
                    "units": "class",
                    "long_name": "MODIS AF FireMask",
                    "classes": "1:obsolete,2:not_processed,3:water,4:cloud,5:land_nonfire,6:unknown,7:fire_low,8:fire_nominal,9:fire_high"
                }
            )
            print(f"[OK] wrote {out_fm}")

            out_det = outdir / f"{stem}_AF_Detect.tif"
            write_geotiff(
                out_det, det_grid, transform,
                band_tags={
                    "units": "binary",
                    "definition": "1=Fire (FireMask in {7,8,9}); 0=No fire; NaN outside AOI"
                }
            )
            print(f"[OK] wrote {out_det}")

    print("[DONE]")

# ------------------------------------------------------------------------------
if __name__ == "__main__":
    main()
