## Natural Disaster Case Studies for Model Testing and Validation

This notebook examines three recent U.S. disasters, chosen to represent different hazard types and regions. The goal is to test and validate model performance across varied physical processes and socio-demographic contexts.

---

#### 1. 2025 Southern California Wildfires
- Among the most destructive in U.S. history  
- Severe wildland–urban interface losses and unprecedented damages  

---

#### 2. 2024 Hurricane Helene (Northwestern North Carolina)
- Catastrophic inland flooding in the Appalachians  
- Most fatalities occurred far from the coast  

---

#### 3. 2023 Midwest Tornado Outbreak (Greenfield, Iowa)
- Multi-day outbreak of high-intensity tornadoes  
- Greenfield tornado recorded near-historic wind speeds  


Decide and download polygons

In [1]:
!jupyter trust /media/gisense/xihan/250812_CyberTraining_Team4/validation_case.ipynb

Signing notebook: /media/gisense/xihan/250812_CyberTraining_Team4/validation_case.ipynb


In [1]:
import requests
import geopandas as gpd
from pathlib import Path

# Set up paths
BASE = Path("/media/gisense/xihan/250812_CyberTraining_Team4")
RAW = BASE / "data" / "raw"
RAW.mkdir(parents=True, exist_ok=True)

# Direct GeoJSON query
URL = ("https://services3.arcgis.com/T4QMspbfLg3qTGWY/arcgis/rest/services/"
       "WFIGS_Interagency_Perimeters_YearToDate/FeatureServer/0/query?"
       "outFields=*&where=1%3D1&f=geojson")

# Download to file
out_file = RAW / "wfigs_perimeters_ytd.geojson"
r = requests.get(URL, timeout=120)
r.raise_for_status()
out_file.write_bytes(r.content)
print("Saved:", out_file)

# Load into GeoDataFrame
gdf = gpd.read_file(out_file)
print("Features:", len(gdf), "| CRS:", gdf.crs)
gdf.head()


Saved: /media/gisense/xihan/250812_CyberTraining_Team4/data/raw/wfigs_perimeters_ytd.geojson


ERROR 1: PROJ: proj_create_from_database: Open of /media/gisense/xihan/geoai-cu121/share/proj failed


Features: 2000 | CRS: EPSG:4326


Unnamed: 0,OBJECTID,poly_SourceOID,poly_IncidentName,poly_FeatureCategory,poly_MapMethod,poly_GISAcres,poly_DeleteThis,poly_FeatureAccess,poly_FeatureStatus,poly_IsVisible,...,attr_Source,attr_IsCpxChild,attr_CpxName,attr_CpxID,attr_SourceGlobalID,GlobalID,Shape__Area,Shape__Length,attr_IncidentComplexityLevel,geometry
0,36334,138,Daggett Brook,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,0.25,No,Public,Approved,Yes,...,IRWIN,0,,,{C7FFB004-D6C1-4B48-BEB7-E701581CE6E4},b505cfc7-e5cc-4bb7-963e-f7f3686c0d80,5.202401e-08,0.001123,,"POLYGON ((-94.10368 46.17836, -94.10358 46.178..."
1,36389,170,Cattle,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,48.0,No,Public,Approved,Yes,...,IRWIN,0,,,{A7F41DAF-A6E0-47D5-B6BE-14FCEC2BFFC8},63c34d34-c1ad-46ba-8470-ead7afb4757b,1.751453e-05,0.043307,Type 4 Incident,"POLYGON ((-114.52647 32.75359, -114.52533 32.7..."
2,36392,171,Ranch,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,65.0,No,Public,Approved,Yes,...,IRWIN,0,,,{871AEB04-A5ED-4B30-988F-65BEDA931C45},80d6f02f-6b45-47f6-a661-76eff90b4529,1.041941e-05,0.028968,Type 5 Incident,"POLYGON ((-114.53292 32.73687, -114.53267 32.7..."
3,36398,172,PALISADES,Wildfire Daily Fire Perimeter,IR Image Interpretation,23448.0,No,Public,Approved,Yes,...,IRWIN,0,,,{35C19D7C-FEDA-42F8-AD76-9B094E783801},2a11c9e3-3295-4623-8c53-4e9a192ae0f3,0.009267988,1.157614,Type 3 Incident,"MULTIPOLYGON (((-118.56093 34.04408, -118.5609..."
4,36400,211,Beaver Creek,Wildfire Daily Fire Perimeter,Hand Sketch,0.14,No,Public,Approved,Yes,...,IRWIN,0,,,{DDEAC0E4-5140-4276-9996-4013D16FB1A5},2330124e-6788-4ff2-a8f1-cf8f965b3a40,1.295034e-07,0.001818,,"POLYGON ((-93.87519 46.48887, -93.87496 46.489..."


In [2]:
import geopandas as gpd
from pathlib import Path

BASE = Path("/media/gisense/xihan/250812_CyberTraining_Team4")
RAW = BASE / "data" / "raw"

gdf = gpd.read_file(RAW / "wfigs_perimeters_ytd.geojson")
print("Total features:", len(gdf))
gdf.head()

Total features: 2000


Unnamed: 0,OBJECTID,poly_SourceOID,poly_IncidentName,poly_FeatureCategory,poly_MapMethod,poly_GISAcres,poly_DeleteThis,poly_FeatureAccess,poly_FeatureStatus,poly_IsVisible,...,attr_Source,attr_IsCpxChild,attr_CpxName,attr_CpxID,attr_SourceGlobalID,GlobalID,Shape__Area,Shape__Length,attr_IncidentComplexityLevel,geometry
0,36334,138,Daggett Brook,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,0.25,No,Public,Approved,Yes,...,IRWIN,0,,,{C7FFB004-D6C1-4B48-BEB7-E701581CE6E4},b505cfc7-e5cc-4bb7-963e-f7f3686c0d80,5.202401e-08,0.001123,,"POLYGON ((-94.10368 46.17836, -94.10358 46.178..."
1,36389,170,Cattle,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,48.0,No,Public,Approved,Yes,...,IRWIN,0,,,{A7F41DAF-A6E0-47D5-B6BE-14FCEC2BFFC8},63c34d34-c1ad-46ba-8470-ead7afb4757b,1.751453e-05,0.043307,Type 4 Incident,"POLYGON ((-114.52647 32.75359, -114.52533 32.7..."
2,36392,171,Ranch,Wildfire Daily Fire Perimeter,Auto-generated for InFORM,65.0,No,Public,Approved,Yes,...,IRWIN,0,,,{871AEB04-A5ED-4B30-988F-65BEDA931C45},80d6f02f-6b45-47f6-a661-76eff90b4529,1.041941e-05,0.028968,Type 5 Incident,"POLYGON ((-114.53292 32.73687, -114.53267 32.7..."
3,36398,172,PALISADES,Wildfire Daily Fire Perimeter,IR Image Interpretation,23448.0,No,Public,Approved,Yes,...,IRWIN,0,,,{35C19D7C-FEDA-42F8-AD76-9B094E783801},2a11c9e3-3295-4623-8c53-4e9a192ae0f3,0.009267988,1.157614,Type 3 Incident,"MULTIPOLYGON (((-118.56093 34.04408, -118.5609..."
4,36400,211,Beaver Creek,Wildfire Daily Fire Perimeter,Hand Sketch,0.14,No,Public,Approved,Yes,...,IRWIN,0,,,{DDEAC0E4-5140-4276-9996-4013D16FB1A5},2330124e-6788-4ff2-a8f1-cf8f965b3a40,1.295034e-07,0.001818,,"POLYGON ((-93.87519 46.48887, -93.87496 46.489..."


In [3]:
from shapely.geometry import box

# Define bounding box for LA region
bbox = box(-119.25, 33.5, -117.5, 34.8)

# Filter perimeters that intersect this bbox
gdf_la = gdf[gdf.geometry.intersects(bbox)]
print("Fires in LA region:", len(gdf_la))


Fires in LA region: 8


In [4]:
gdf_la.explore(
    column="poly_GISAcres",
    cmap="Reds",
    tooltip=["poly_IncidentName", "poly_GISAcres"],
    popup=True,
    tiles="CartoDB positron"
)

In [10]:
import geopandas as gpd
from shapely.geometry import box
from pathlib import Path

# Paths
BASE = Path("/media/gisense/xihan/250812_CyberTraining_Team4")
RAW  = BASE / "data" / "raw"
OUT  = BASE / "data" / "interim"
OUT.mkdir(parents=True, exist_ok=True)

# Load
gdf = gpd.read_file(RAW / "wfigs_perimeters_ytd.geojson")

# Filter by LA bbox (lon/lat)
bbox_ll = box(-119.25, 33.5, -117.5, 34.8)
gdf_la = gdf.to_crs(4326)
gdf_la = gdf_la[gdf_la.geometry.intersects(bbox_ll)].copy()

# Add a simple ID so you can note selections later
if "poly_IncidentName" in gdf_la.columns:
    gdf_la["cand_id"] = range(1, len(gdf_la) + 1)
    tooltip_cols = ["cand_id", "poly_IncidentName", "poly_GISAcres"] if "poly_GISAcres" in gdf_la.columns else ["cand_id", "poly_IncidentName"]
else:
    gdf_la["cand_id"] = range(1, len(gdf_la) + 1)
    tooltip_cols = ["cand_id"]

# Uniform style (same color for all polygons)
m = gdf_la.explore(
    color="#e41a1c",                  # uniform stroke color (red)
    style_kwds={"fillOpacity": 0.20, "weight": 1},
    tooltip=tooltip_cols,
    popup=True,
    tiles="CartoDB positron"          # nice basemap
)

# Save interactive map
html_path = OUT / "la_fire_candidates_map.html"
m.save(html_path.as_posix())
print(f"Saved interactive map → {html_path}")


Saved interactive map → /media/gisense/xihan/250812_CyberTraining_Team4/data/interim/la_fire_candidates_map.html


In [13]:
# === From scratch: build 600 m grid intersecting cand_id 1 & 2 ===
import geopandas as gpd
import numpy as np
from shapely.geometry import box as sbox
from pathlib import Path
import math

# ---------------- Paths ----------------
BASE = Path("/media/gisense/xihan/250812_CyberTraining_Team4")
RAW  = BASE / "data" / "raw"
OUT  = BASE / "data" / "interim"
OUT.mkdir(parents=True, exist_ok=True)

perims = RAW / "wfigs_perimeters_ytd.geojson"

# ---------------- 1) Build LA candidates (same bbox as your map) ----------------
gdf = gpd.read_file(perims)

# Filter by LA bbox in WGS84
bbox_ll = sbox(-119.25, 33.5, -117.5, 34.8)
gdf_la = gdf.to_crs(4326)
gdf_la = gdf_la[gdf_la.geometry.intersects(bbox_ll)].copy()

# Assign cand_id exactly like before (sequential in current order)
gdf_la["cand_id"] = range(1, len(gdf_la) + 1)

# ---------------- 2) Keep cand_id 1 & 2 ----------------
keep_ids = [1, 2]
sel_ll = gdf_la[gdf_la["cand_id"].isin(keep_ids)].copy()
if sel_ll.empty:
    raise ValueError("No features for cand_id 1 & 2. Rebuild candidates and confirm IDs.")

# ---------------- 3) Reproject to UTM Zone 11N & validate ----------------
sel = sel_ll.to_crs(32611)
# Fix any invalid geometries if needed
try:
    from shapely.validation import make_valid
    sel["geometry"] = sel.geometry.map(make_valid)
except Exception:
    sel["geometry"] = sel.buffer(0)

sel = sel[~sel.geometry.is_empty & sel.geometry.is_valid].copy()
if sel.empty:
    raise ValueError("Selected polygons are empty/invalid after reprojection; check inputs.")

# Save the selected polygons (for reference)
sel_polys_path = OUT / "la_fire_selected_cand1_2_utm11.gpkg"
sel.to_file(sel_polys_path, layer="aoi_sel", driver="GPKG")
print(f"Saved AOI polygons → {sel_polys_path}")

# ---------------- 4) Build 600 m fishnet covering AOI extent ----------------
# Union for intersection tests + robust bounds
try:
    aoi_union = sel.union_all()     # GeoPandas >= 0.14
except AttributeError:
    aoi_union = sel.unary_union     # fallback

minx, miny, maxx, maxy = sel.total_bounds
for v in (minx, miny, maxx, maxy):
    if not np.isfinite(v):
        raise ValueError("AOI bounds are NaN/inf; selection likely empty.")

cell = 600.0  # meters

# Snap to a 600 m lattice to ensure full coverage
xmin = math.floor(minx / cell) * cell
ymin = math.floor(miny / cell) * cell
xmax = math.ceil (maxx / cell) * cell
ymax = math.ceil (maxy / cell) * cell

nx = int(round((xmax - xmin) / cell))
ny = int(round((ymax - ymin) / cell))
xs = xmin + cell * np.arange(nx)
ys = ymin + cell * np.arange(ny)

# Full extent grid (rectangles), then keep only those intersecting AOI
full_cells = [sbox(x, y, x + cell, y + cell) for x in xs for y in ys]
grid_full = gpd.GeoDataFrame({"cell_id": range(len(full_cells))}, geometry=full_cells, crs=32611)

grid_clip = grid_full[grid_full.geometry.intersects(aoi_union)].copy()
grid_clip.reset_index(drop=True, inplace=True)
grid_clip["cell_id"] = range(len(grid_clip))

# Optional centroids (projected + WGS84)
grid_clip["cx"] = grid_clip.geometry.centroid.x
grid_clip["cy"] = grid_clip.geometry.centroid.y
grid_clip_ll = grid_clip.to_crs(4326)
grid_clip["lon"] = grid_clip_ll.geometry.centroid.x
grid_clip["lat"] = grid_clip_ll.geometry.centroid.y

# ---------------- 5) Save outputs ----------------
grid_clip_path = OUT / "la_fire_grid_600m_clip_utm11.gpkg"
grid_clip.to_file(grid_clip_path, layer="grid_600m_clip", driver="GPKG")
print(f"Saved {len(grid_clip)} intersecting cells → {grid_clip_path}")

# ---------------- 6) Quick interactive preview ----------------
try:
    m = sel.to_crs(4326).explore(
        color="#e41a1c",
        style_kwds={"fillOpacity": 0.15, "weight": 2},
        name="Selected AOI (cand 1 & 2)"
    )
    grid_clip.to_crs(4326).explore(
        m=m,
        color="#444444",
        style_kwds={"fillOpacity": 0.0, "weight": 0.6},
        name="600 m boxes (intersecting)"
        # no tooltips to keep it light; add if needed
    )
    import folium
    folium.LayerControl().add_to(m)
    html_preview = OUT / "la_fire_grid_600m_clip_preview.html"
    m.save(html_preview.as_posix())
    print(f"Preview map → {html_preview}")
except Exception as e:
    print("Preview step skipped:", e)


Saved AOI polygons → /media/gisense/xihan/250812_CyberTraining_Team4/data/interim/la_fire_selected_cand1_2_utm11.gpkg
Saved 548 intersecting cells → /media/gisense/xihan/250812_CyberTraining_Team4/data/interim/la_fire_grid_600m_clip_utm11.gpkg
Preview map → /media/gisense/xihan/250812_CyberTraining_Team4/data/interim/la_fire_grid_600m_clip_preview.html



  grid_clip["lon"] = grid_clip_ll.geometry.centroid.x

  grid_clip["lat"] = grid_clip_ll.geometry.centroid.y


In [14]:
# From MAXAR generate the grids: 
from pathlib import Path
BASE = Path("/media/gisense/xihan/250812_CyberTraining_Team4")
OUT  = BASE / "data" / "interim"
CHIPS = BASE / "data" / "chips_600m"
CHIPS.mkdir(parents=True, exist_ok=True)

GRID_GPKG  = OUT / "la_fire_grid_600m_clip_utm11.gpkg"  # layer="grid_600m_clip"

# Maxar Open Data (Los Angeles Jan 2025 Wildfires – ARD zone 11)
BUCKET = "maxar-opendata"
PRE_PREFIX  = "events/WildFires-LosAngeles-Jan-2025/ard/11/pre-event/"
POST_PREFIX = "events/WildFires-LosAngeles-Jan-2025/ard/11/post-event/"


In [16]:
import geopandas as gpd
import mercantile, requests, rasterio
from rasterio.merge import merge
from rasterio.mask import mask
from pathlib import Path

GRIDS = "/media/gisense/xihan/250812_CyberTraining_Team4/data/interim/la_fire_grid_600m_clip_utm11.gpkg"
OUT   = Path("/media/gisense/xihan/250812_CyberTraining_Team4/data/derived/maxar_chips")
OUT.mkdir(parents=True, exist_ok=True)

# 1) Load grids, transform to EPSG:3857 for tiling
gr = gpd.read_file(GRIDS, layer="grid_600m_clip").to_crs(3857)

# 2) Choose dates/folders after you inspect S3 listing (e.g., 2025-01-10 for post; pick a pre date if available)
# Tip: verify available date folders under .../ard/<zoom>/<date> by listing S3.
BASE = "https://maxar-opendata.s3.amazonaws.com/events/WildFires-LosAngeles-Jan-2025/ard"
ZOOM = 19  # ~30 cm tiles
DATE_POST = "2025-01-10"  # example; verify exists
DATE_PRE  = "2025-01-08"  # example; verify exists

def grid_to_tiles(geom_3857, z):
    # mercantile works in WGS84; reproject bbox to 4326
    wgs = gpd.GeoSeries([geom_3857], crs=3857).to_crs(4326).geometry[0]
    west, south, east, north = wgs.bounds
    return list(mercantile.tiles(west, south, east, north, [z]))

def url_for(z, x, y, date):
    # Common Maxar ODP ARD tile pattern under the event; verify exact structure with aws s3 ls
    return f"{BASE}/{z}/{x}/{y}/{date}.tif"

def fetch_tile(url, dst):
    if dst.exists(): return dst
    r = requests.get(url, stream=True, timeout=120)
    if r.status_code == 200:
        with open(dst, "wb") as f:
            for chunk in r.iter_content(1<<20):
                f.write(chunk)
        return dst
    return None

for _, row in gr.iterrows():
    gid = int(row["cell_id"])
    geom = row.geometry
    tiles = grid_to_tiles(geom, ZOOM)

    for tag, date in [("pre", DATE_PRE), ("post", DATE_POST)]:
        folder = OUT / f"cell_{gid:05d}" / tag
        folder.mkdir(parents=True, exist_ok=True)

        # download tiles
        rasters = []
        for t in tiles:
            u = url_for(t.z, t.x, t.y, date)
            p = folder / f"{t.z}_{t.x}_{t.y}.tif"
            got = fetch_tile(u, p)
            if got:
                rasters.append(p)

        if not rasters:
            continue

        # mosaic and clip to exact 600 m polygon
        srcs = [rasterio.open(p) for p in rasters]
        mosaic, out_transform = merge(srcs)
        out_meta = srcs[0].meta.copy()
        for s in srcs: s.close()

        out_meta.update({
            "height": mosaic.shape[1],
            "width":  mosaic.shape[2],
            "transform": out_transform
        })

        tmp = folder / "mosaic_tmp.tif"
        with rasterio.open(tmp, "w", **out_meta) as dst:
            dst.write(mosaic)

        # clip to grid geometry (reproject geom to mosaic CRS)
        with rasterio.open(tmp) as src:
            grid_geom_src = gpd.GeoSeries([geom], crs=3857).to_crs(src.crs)
            out_img, out_t = mask(src, [grid_geom_src.geometry[0]], crop=True)
            meta = src.meta.copy()
            meta.update({"height": out_img.shape[1], "width": out_img.shape[2], "transform": out_t})

        out_chip = OUT / f"cell_{gid:05d}_{tag}.tif"
        with rasterio.open(out_chip, "w", **meta) as dst:
            dst.write(out_img)

        tmp.unlink(missing_ok=True)
