# Ingest Historical Fire Perimeters (FRAP)

This notebook ingests CAL FIRE FRAP fire perimeter polygons, applies a year filter, validates geometry, and writes a canonical fire perimeter table to PostGIS. The resulting table is used later for screening-level spatial overlap with transmission lines.

## Inputs
- FRAP fire perimeter geodatabase (GDB)

## Outputs
- PostGIS table: `fire_perimeters`
  - Columns: fire_year, fire_name, source, geom

### Imports

In [11]:
import os
from pathlib import Path

import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
from shapely.geometry import Polygon, MultiPolygon
import fiona

### Define Paths

In [12]:
CWD = Path.cwd()
ROOT = CWD.parent if CWD.name.lower() == "notebooks" else CWD

DATA_RAW = ROOT / "data" / "raw"
FIRE_DIR = DATA_RAW / "fire_perimeters" / "frap"

load_dotenv(ROOT / ".env")

print("ROOT:", ROOT)
print("FIRE_DIR:", FIRE_DIR)

ROOT: C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure
FIRE_DIR: C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure\data\raw\fire_perimeters\frap


## Database Connection

In [13]:
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "wildfire_grid")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

missing = [k for k, v in {"DB_USER": DB_USER, "DB_PASSWORD": DB_PASSWORD}.items() if not v]
if missing:
    raise ValueError(f"Missing env vars: {missing}. Check your .env at {ROOT/'.env'}")

db_url = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(db_url)

with engine.begin() as conn:
    conn.execute(text("SELECT 1;"))

print("DB:", DB_NAME, "@", DB_HOST, DB_PORT)

DB: wildfire_grid @ localhost 5432


### Configure FRAP Input

In [14]:
FIRE_GDB = FIRE_DIR / "fire24_1.gdb"
if not FIRE_GDB.exists():
    raise FileNotFoundError(f"Missing FRAP GDB: {FIRE_GDB}")

YEAR_MIN = 1990
YEAR_MAX = 2024

print("FIRE_GDB:", FIRE_GDB)
print("Year window:", YEAR_MIN, "to", YEAR_MAX)

FIRE_GDB: C:\dev\wildfire\Wildfire-Exposure-of-California-Transmission-Infrastructure\data\raw\fire_perimeters\frap\fire24_1.gdb
Year window: 1990 to 2024


### Discover Layers

In [15]:
layers = fiona.listlayers(FIRE_GDB)
if not layers:
    raise ValueError(f"No layers found in GDB: {FIRE_GDB}")

print("Available FRAP layers:", layers)

# Default to the first layer (typical FRAP packaging). Change if needed.
FIRE_LAYER = layers[0]
print("Using FRAP layer:", FIRE_LAYER)

gdf_raw = gpd.read_file(FIRE_GDB, layer=FIRE_LAYER)

if gdf_raw.empty:
    raise ValueError("FRAP layer loaded but contains zero rows.")

print("Rows:", len(gdf_raw))
print("CRS:", gdf_raw.crs)
print("Geom types:", gdf_raw.geom_type.value_counts().to_dict())
print("Columns:", list(gdf_raw.columns))
gdf_raw.head(3)

Available FRAP layers: ['rxburn24_1', 'firep24_1']
Using FRAP layer: rxburn24_1
Rows: 10675
CRS: EPSG:3310
Geom types: {'MultiPolygon': 10675}
Columns: ['YEAR_', 'STATE', 'AGENCY', 'UNIT_ID', 'TREATMENT_ID', 'TREATMENT_NAME', 'TREATMENT_TYPE', 'START_DATE', 'END_DATE', 'TREATED_AC', 'GIS_ACRES', 'RX_CONSUM', 'PRE_CON_CLASS', 'POST_CON_CLASS', 'Shape_Length', 'Shape_Area', 'geometry']


Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,TREATMENT_ID,TREATMENT_NAME,TREATMENT_TYPE,START_DATE,END_DATE,TREATED_AC,GIS_ACRES,RX_CONSUM,PRE_CON_CLASS,POST_CON_CLASS,Shape_Length,Shape_Area,geometry
0,2024.0,CA,CSP,209,{90B173CB-5FBD-4649-B651-905B9853A323},Sonoma Coast SP Rx,1.0,2024-09-13 00:00:00+00:00,2024-09-20 00:00:00+00:00,35.854112,35.854256,,,,2294.333934,145097.026812,"MULTIPOLYGON (((-269883.341 51335.642, -269873..."
1,2024.0,CA,CSP,745,{C9462E66-6000-4EFE-BF16-4DBD11CA5B50},Harmony Headlands SP Rx,1.0,2024-10-24 00:00:00+00:00,2024-10-24 00:00:00+00:00,170.871449,170.872131,,,,4018.530429,691494.979357,"MULTIPOLYGON (((-90500.086 -281511.329, -90498..."
2,2024.0,CA,CSP,423,{E01035F3-66BF-47BB-A3B3-C788345DBBD6},Butano Rx,1.0,2024-10-29 00:00:00+00:00,2024-11-11 00:00:00+00:00,96.924613,96.925003,,,,7679.786271,392241.585996,"MULTIPOLYGON (((-206414.874 -86910.56, -206369..."


## Standardize

In [16]:
YEAR_CANDIDATES = ["YEAR_", "YEAR", "FIRE_YEAR", "FIREYEAR", "YEAR"]
NAME_CANDIDATES = ["FIRE_NAME", "FIRENAME", "INCIDENTNAME", "INCIDENT_NM", "NAME", "FIRE"]

year_col = next((c for c in YEAR_CANDIDATES if c in gdf_raw.columns), None)
if year_col is None:
    raise KeyError(f"Could not find a fire year column. Tried: {YEAR_CANDIDATES}")

name_col = next((c for c in NAME_CANDIDATES if c in gdf_raw.columns), None)

print("Detected year column:", year_col)
print("Detected name column:", name_col if name_col else "(none; will store NULL)")

Detected year column: YEAR_
Detected name column: (none; will store NULL)


### Additional Cleaning

In [17]:
def drop_z(geom):
    if geom is None:
        return None
    try:
        from shapely import force_2d
        return force_2d(geom)
    except Exception:
        return geom

def to_multipolygon(geom):
    if geom is None:
        return None
    if isinstance(geom, Polygon):
        return MultiPolygon([geom])
    if isinstance(geom, MultiPolygon):
        return geom
    return None

## Build GeoDataFrame

In [18]:
g = gdf_raw.copy()

if g.crs is None:
    raise ValueError("FRAP CRS missing; set correct CRS before proceeding.")

g = g[g.geometry.notna()].copy()
g["geometry"] = g["geometry"].apply(drop_z)

g = g[g.geom_type.isin(["Polygon", "MultiPolygon"])].copy()

g["fire_year"] = pd.to_numeric(g[year_col], errors="coerce").astype("Int64")

if name_col:
    g["fire_name"] = g[name_col].astype("string")
else:
    g["fire_name"] = pd.Series([None] * len(g), dtype="string")

g["source"] = "CAL FIRE FRAP Fire Perimeters"

g = g.to_crs(epsg=4326)

g["geom"] = g["geometry"].apply(to_multipolygon)
g = g[g["geom"].notna()].copy()

g = g[g["fire_year"].notna()].copy()
g = g[(g["fire_year"] >= YEAR_MIN) & (g["fire_year"] <= YEAR_MAX)].copy()

fire = gpd.GeoDataFrame(
    g[["fire_year", "fire_name", "source", "geom"]].copy(),
    geometry="geom",
    crs="EPSG:4326"
)

print("Standardized rows:", len(fire))
print("Fire year range:", int(fire["fire_year"].min()), int(fire["fire_year"].max()))
print("Geom type counts:", fire.geom_type.value_counts().to_dict())
fire.head(3)

Standardized rows: 9474
Fire year range: 1990 2024
Geom type counts: {'MultiPolygon': 9474}


Unnamed: 0,fire_year,fire_name,source,geom
0,2024,,CAL FIRE FRAP Fire Perimeters,"MULTIPOLYGON (((-123.09612 38.43852, -123.0960..."
1,2024,,CAL FIRE FRAP Fire Perimeters,"MULTIPOLYGON (((-120.99833 35.47906, -120.9983..."
2,2024,,CAL FIRE FRAP Fire Perimeters,"MULTIPOLYGON (((-122.32939 37.21174, -122.3288..."


## Validation

In [19]:
print("Rows:", len(fire))
print("CRS:", fire.crs)

print("Null fire_year:", int(fire["fire_year"].isna().sum()))
print("Null geom:", int(fire["geom"].isna().sum()))
print("Invalid geometries:", int((~fire.is_valid).sum()))

display(fire["fire_year"].value_counts().sort_index().head(10))
display(fire["fire_year"].value_counts().sort_index().tail(10))

Rows: 9474
CRS: EPSG:4326
Null fire_year: 0
Null geom: 0
Invalid geometries: 257


fire_year
1990    84
1991    58
1992    37
1993    65
1994    64
1995    81
1996    58
1997    77
1998    78
1999    66
Name: count, dtype: Int64

fire_year
2015     152
2016      53
2017     242
2018     446
2019    1034
2020     706
2021     692
2022     605
2023    1138
2024     903
Name: count, dtype: Int64

## Write to PostGIS

In [21]:
TABLE = "fire_perimeters"

with engine.begin() as conn:
    conn.execute(text(f"DROP TABLE IF EXISTS {TABLE};"))

fire.to_postgis(
    name=TABLE,
    con=engine,
    if_exists="replace",
    index=False
)

with engine.begin() as conn:
    conn.execute(text(f"CREATE INDEX IF NOT EXISTS idx_{TABLE}_geom_gist ON {TABLE} USING GIST (geom);"))
    conn.execute(text(f"CREATE INDEX IF NOT EXISTS idx_{TABLE}_year ON {TABLE} (fire_year);"))
    conn.execute(text(f"ANALYZE {TABLE};"))

print(f"Wrote PostGIS table: {TABLE} (rows={len(fire):,})")

Wrote PostGIS table: fire_perimeters (rows=9,474)


## Preview

In [22]:
cnt = pd.read_sql(f"SELECT COUNT(*) AS n FROM {TABLE};", engine)
display(cnt)

preview = pd.read_sql(
    f"SELECT fire_year, fire_name, source FROM {TABLE} ORDER BY fire_year DESC NULLS LAST LIMIT 10;",
    engine
)
preview

Unnamed: 0,n
0,9474


Unnamed: 0,fire_year,fire_name,source
0,2024,,CAL FIRE FRAP Fire Perimeters
1,2024,,CAL FIRE FRAP Fire Perimeters
2,2024,,CAL FIRE FRAP Fire Perimeters
3,2024,,CAL FIRE FRAP Fire Perimeters
4,2024,,CAL FIRE FRAP Fire Perimeters
5,2024,,CAL FIRE FRAP Fire Perimeters
6,2024,,CAL FIRE FRAP Fire Perimeters
7,2024,,CAL FIRE FRAP Fire Perimeters
8,2024,,CAL FIRE FRAP Fire Perimeters
9,2024,,CAL FIRE FRAP Fire Perimeters
