# Ingest Fire Hazard Severity Zones (FHSZ)

This notebook ingests CAL FIRE Fire Hazard Severity Zones (FHSZ) for both responsibility areas:
- **LRA** (Local Responsibility Area) — provided as a File Geodatabase (`.gdb`)
- **SRA** (State Responsibility Area) — provided as a shapefile (`.shp`)
It standardizes hazard codes into hazard classes, validates geometry, and writes a canonical hazard layer to PostGIS.

## Inputs
- `data/raw/fhsz/lra/*.gdb`
- `data/raw/fhsz/sra/*.shp`

## Output
- PostGIS table: `hazard_fhsz`
  - Columns: `responsibility`, `hazard_code`, `hazard_class`, `source`, `geom`

### Imports

In [2]:
from pathlib import Path
import os
import fiona

import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
from sqlalchemy import create_engine, text

### Define Paths

In [5]:
DATA_RAW = ROOT / "data" / "raw"
FHSZ_DIR = DATA_RAW / "fhsz"

LRA_DIR = FHSZ_DIR / "lra"
SRA_DIR = FHSZ_DIR / "sra"

print("ROOT:", ROOT)
print("DATA_RAW:", DATA_RAW)
print("LRA_DIR exists:", LRA_DIR.exists(), LRA_DIR)
print("SRA_DIR exists:", SRA_DIR.exists(), SRA_DIR)

ROOT: C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure
DATA_RAW: C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure\data\raw
LRA_DIR exists: True C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure\data\raw\fhsz\lra
SRA_DIR exists: True C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure\data\raw\fhsz\sra


## Database Connection

In [28]:
load_dotenv(ROOT / ".env")

DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "wildfire_grid")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

missing = [k for k, v in {"DB_USER": DB_USER, "DB_PASSWORD": DB_PASSWORD}.items() if not v]
if missing:
    raise ValueError(f"Missing env vars: {missing}. Check your .env at {ROOT/'.env'}")

DB_URL = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DB_URL)

with engine.begin() as conn:
    conn.execute(text("SELECT 1;"))

print("Connected to:", DB_NAME)

Connected to: wildfire_grid


## Load LRA & SRA

In [37]:
lra_gdbs = sorted(LRA_DIR.glob("*.gdb"))
if len(lra_gdbs) == 0:
    raise FileNotFoundError(
        f"No .gdb found under {LRA_DIR}\n"
        f"Expected: data/raw/fhsz/lra/*.gdb"
    )
if len(lra_gdbs) > 1:
    raise ValueError(f"Multiple .gdb files found under {LRA_DIR}: {lra_gdbs}")

LRA_GDB = lra_gdbs[0]
lra_layers = fiona.listlayers(LRA_GDB)
if not lra_layers:
    raise ValueError(f"GDB contains no layers: {LRA_GDB}")

LRA_LAYER = lra_layers[0]

print("LRA GDB:", LRA_GDB.name)
print("LRA layers:", lra_layers)
print("Using LRA layer:", LRA_LAYER)

gdf_lra_raw = gpd.read_file(LRA_GDB, layer=LRA_LAYER)

sra_shps = sorted(SRA_DIR.glob("*.shp"))
if len(sra_shps) == 0:
    raise FileNotFoundError(f"No .shp found under {SRA_DIR}")
if len(sra_shps) > 1:
    raise ValueError(f"Multiple .shp files found under {SRA_DIR}: {sra_shps}")

SRA_SHP = sra_shps[0]
print("SRA shapefile:", SRA_SHP.name)

gdf_sra_raw = gpd.read_file(SRA_SHP)

print("\nLRA rows:", len(gdf_lra_raw), "CRS:", gdf_lra_raw.crs)
print("SRA rows:", len(gdf_sra_raw), "CRS:", gdf_sra_raw.crs)

display(gdf_lra_raw.head(3))
display(gdf_sra_raw.head(3))

LRA GDB: FHSZLRA25_1_All.gdb
LRA layers: ['FHSALRA25_v1_All']
Using LRA layer: FHSALRA25_v1_All
SRA shapefile: FHSZSRA_23_3.shp

LRA rows: 9752 CRS: EPSG:3310
SRA rows: 18423 CRS: EPSG:3310


Unnamed: 0,SRA,FHSZ,FHSZ_Description,Shape_Length,Shape_Area,geometry
0,LRA,-3,NonWildland,2781.759086,156847.5,"MULTIPOLYGON (((271020 -604422.652, 270940.566..."
1,LRA,-3,NonWildland,2327.045466,175893.4,"MULTIPOLYGON (((270480 -603390, 270480 -603360..."
2,LRA,-3,NonWildland,29612.610679,8974583.0,"MULTIPOLYGON (((286479.276 -600180, 286479.327..."


Unnamed: 0,SRA,FHSZ,FHSZ_Descr,Shape_Leng,Shape_Area,geometry
0,SRA,1,Moderate,172.42938,1572.066,"POLYGON Z ((288964.702 -601800 0, 288962.896 -..."
1,SRA,1,Moderate,543.383043,6692.749715,"POLYGON Z ((288961.993 -601710 0, 288961.921 -..."
2,SRA,1,Moderate,73.264096,198.790288,"POLYGON Z ((288953.826 -601440 0, 288953.372 -..."


## Standardize

In [39]:
CODE_TO_CLASS = {
    -3: "NonWildland",
    1: "Moderate",
    2: "High",
    3: "Very High",
}

def force_2d_safe(geom):
    if geom is None:
        return None
    try:
        from shapely import force_2d
        return force_2d(geom)
    except Exception:
        return geom

def pick_hazard_col(gdf: gpd.GeoDataFrame) -> str:
    candidates = ["FHSZ", "HAZ_CODE", "HAZARD_CODE", "hazard_code"]
    for c in candidates:
        if c in gdf.columns:
            return c
    raise KeyError(
        "Could not find hazard code column.\n"
        f"Columns available: {list(gdf.columns)}\n"
        "Update candidates in pick_hazard_col()."
    )

def standardize_fhsz(gdf_raw: gpd.GeoDataFrame, responsibility: str, source: str) -> gpd.GeoDataFrame:
    gdf = gdf_raw.copy()
    hazard_col = pick_hazard_col(gdf)

    if gdf.crs is None:
        raise ValueError("Input CRS is missing; cannot standardize without a CRS.")

    gdf = gdf.to_crs("EPSG:4326")

    out = gpd.GeoDataFrame(
        {
            "responsibility": responsibility,
            "hazard_code": pd.to_numeric(gdf[hazard_col], errors="coerce"),
            "source": source,
            "geom": gdf.geometry.map(force_2d_safe),
        },
        geometry="geom",
        crs="EPSG:4326",
    )
    out["hazard_class"] = out["hazard_code"].map(CODE_TO_CLASS)

    return out[["responsibility", "hazard_code", "hazard_class", "source", "geom"]]

gdf_lra = standardize_fhsz(gdf_lra_raw, responsibility="LRA", source="CAL FIRE FHSZ (LRA)")
gdf_sra = standardize_fhsz(gdf_sra_raw, responsibility="SRA", source="CAL FIRE FHSZ (SRA)")

gdf_fhsz = pd.concat([gdf_lra, gdf_sra], ignore_index=True)
gdf_fhsz = gpd.GeoDataFrame(gdf_fhsz, geometry="geom", crs="EPSG:4326")

print("Combined rows:", len(gdf_fhsz))
display(gdf_fhsz.head(5))

Combined rows: 28175


Unnamed: 0,responsibility,hazard_code,hazard_class,source,geom
0,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA),"MULTIPOLYGON (((-117.11935 32.53483, -117.1202..."
1,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA),"MULTIPOLYGON (((-117.12475 32.5443, -117.12474..."
2,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA),"MULTIPOLYGON (((-116.9537 32.56877, -116.9537 ..."
3,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA),"MULTIPOLYGON (((-117.02179 32.57684, -117.0218..."
4,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA),"MULTIPOLYGON (((-117.13211 32.58563, -117.1324..."


## Validation

In [40]:
print("By responsibility:")
display(gdf_fhsz["responsibility"].value_counts(dropna=False))

print("\nBy hazard_class:")
display(gdf_fhsz["hazard_class"].value_counts(dropna=False))

print("\nNull hazard_code:", int(gdf_fhsz["hazard_code"].isna().sum()))
print("Null geom:", int(gdf_fhsz["geom"].isna().sum()))
print("Invalid geometries:", int((~gdf_fhsz.is_valid).sum()))
print("CRS:", gdf_fhsz.crs)

By responsibility:


responsibility
SRA    18423
LRA     9752
Name: count, dtype: int64


By hazard_class:


hazard_class
Very High      9419
Moderate       8929
High           7873
NonWildland    1954
Name: count, dtype: int64


Null hazard_code: 0
Null geom: 0
Invalid geometries: 352
CRS: EPSG:4326


## Write to PostGIS

In [33]:
TABLE = "hazard_fhsz"

with engine.begin() as conn:
    conn.execute(text(f"DROP TABLE IF EXISTS {TABLE};"))

gdf_fhsz.to_postgis(TABLE, engine, if_exists="replace", index=False)

with engine.begin() as conn:
    conn.execute(text(f"CREATE INDEX IF NOT EXISTS idx_{TABLE}_geom_gist ON {TABLE} USING GIST (geom);"))
    conn.execute(text(f"CREATE INDEX IF NOT EXISTS idx_{TABLE}_class ON {TABLE} (hazard_class);"))
    conn.execute(text(f"ANALYZE {TABLE};"))

pd.read_sql(
    f"SELECT responsibility, hazard_class, COUNT(*) AS n FROM {TABLE} GROUP BY 1,2 ORDER BY 1,2;",
    engine
)

Unnamed: 0,responsibility,hazard_class,n
0,LRA,High,2049
1,LRA,Moderate,4741
2,LRA,NonWildland,1954
3,LRA,Very High,1008
4,SRA,High,5824
5,SRA,Moderate,4188
6,SRA,Very High,8411


## Preview

In [34]:
pd.read_sql(
    "SELECT responsibility, hazard_code, hazard_class, source FROM hazard_fhsz LIMIT 10;",
    engine
)

Unnamed: 0,responsibility,hazard_code,hazard_class,source
0,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
1,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
2,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
3,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
4,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
5,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
6,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
7,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
8,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
9,LRA,-3,NonWildland,CAL FIRE FHSZ (LRA)
