In [40]:
import geopandas as gpd
import pandas as pd
import xarray as xr
import xvec
from shapely import Point


Highways Data - Shape File from https://geodata.bts.gov/datasets/usdot::north-american-roads/about
North Spokane Corrdor - data pulled from openstreetmap in osm_highways.py

In [72]:
SPOKANE = {"geometry": [Point(-117.41, 47.65)]}
CRS = "EPSG:4326"
CONVERSION_CRS = "EPSG:2285"
METERS = 100 * 1609 # 100 miles in meters
gdf_spokane = gpd.GeoDataFrame(SPOKANE, crs=CRS)
gdf_spokane = gdf_spokane.to_crs(CONVERSION_CRS)
gdf_spokane["geometry"] = gdf_spokane.buffer(METERS)
gdf_spokane_50_mile_radius = gdf_spokane.to_crs(CRS)

gdf = gpd.read_file("/home/ubuntu/climate-risk-map/experimentation/student_projects/amazon_wildfire_risk_spring_2025/data/highways/NTAD_North_American_Roads_-6941702301048783378/North_American_Roads.shp")
gdf_wa = gdf.loc[(gdf["JURISNAME"]=="Washington")]
north_spokane_corridor = gpd.read_file("/home/ubuntu/climate-risk-map/experimentation/student_projects/amazon_wildfire_risk_spring_2025/data/highways/north_spokane_corridor.geojson")

gdfs = []

north_spokane_corridor["NAME"] = "north_spokane_corridor"
north_spokane_corridor["ID"] = north_spokane_corridor["id"]
gdfs.append(north_spokane_corridor[["ID", "NAME", "geometry"]])

highway_nums = ["I90", "U395", "U2"]
for highway in highway_nums:

    gdf_temp = gdf_wa.loc[gdf_wa["ROADNUM"]==highway].copy()
    gdf_temp["NAME"] = highway
    gdfs.append(gdf_temp[["ID", "NAME", "geometry"]])

gdf_highways = pd.concat(gdfs)
gdf_highways_spokane = gpd.sjoin(left_df=gdf_highways, right_df=gdf_spokane_50_mile_radius, how="inner")
gdf_highways_spokane = gdf_highways_spokane.drop(columns=["index_right"])
gdf_highways_spokane = gdf_highways_spokane.set_index("ID")

Skipping field node_ids: unsupported OGR type: 13


In [97]:
def zonal_aggregation_linestring(
    climate: xr.Dataset,
    climate_variable: str,
    infra: gpd.GeoDataFrame,
    x_dim: str,
    y_dim: str,
    time_col: str,
    aggregation: str
) -> pd.DataFrame:
    """Linestring cannot be zonally aggreated, so must be broken into points"""

    sampled_points = []
    for idx, row in infra.iterrows():
        line = row["geometry"]  # type == shapely.LineString
        points = list(line.coords)
        point_rows = [(idx, Point(point)) for point in points]
        sampled_points.extend(point_rows)

    if sampled_points:
        df_sampled_points = pd.DataFrame(
            sampled_points, columns=["ID", "geometry"]
        )
        gdf_sampled_points = gpd.GeoDataFrame(
            df_sampled_points, geometry="geometry", crs=infra.crs
        ).set_index("ID")
        ds_linestring_points = climate.xvec.extract_points(
            gdf_sampled_points.geometry, x_coords=x_dim, y_coords=y_dim, index=True
        )

        if time_col:
            df_linestring = (
                ds_linestring_points.stack(id_dim=("geometry", "decade_month"))
                .to_dataframe()
                .reset_index(drop=True)[
                    ["ID", "decade_month", "geometry"] + list(ds_linestring_points.data_vars)
                ]
            )
            df_linestring["decade"] = df_linestring["decade_month"].apply(lambda x: int(x[0:4]))
            df_linestring["month"] = df_linestring["decade_month"].apply(lambda x: int(x[-2:]))
            df_linestring.drop(columns=["decade_month"], inplace=True)
            group_by_columns = ["ID", "decade", "month"]
        else:
            df_linestring = ds_linestring_points.to_dataframe().reset_index(drop=True)[["ID"] + list(ds_linestring_points.data_vars)]
            group_by_columns = ["ID"]

        # TODO: At this step, we are left with OSM ids broken out into individual points.
        # Depending on the resolution of the climate dataset, there will be different exposure measures
        # along the entire linestring. Different segments of the same line will have different climate exposures,
        # while the entire line is considered one entity. For the time being, I am taking some simple means, mins, and max
        # to move forward with development. In the future, the ideal solution is to break up each osm_id into multiple line segments.
        # We can determine the segments by seeing where the mean, median, etc... values change along the points. We can group these series of points into multiple linestrings,
        # and then store the linestring segments in the database with their individual exposure values. A single osm id may be comprised of between 1 and N line segments.
        # The downside to this is extra segment geometries will need to be stored, possibly in their own tables. This also creates the eventual output dataset to the user more complicated
        # as a single entity may now have multiple records of exposure, one for each line segment.

        df_linestring = (
            df_linestring.drop_duplicates()
            .groupby(group_by_columns)
            .agg(
                {
                    climate_variable: aggregation,
                }
            )
            .reset_index()
        )
    else:
        df_linestring = pd.DataFrame()
    return df_linestring

In [98]:
ds_burn_probability = xr.open_dataset("s3://uw-crl/climate-risk-map/backend/climate/usda/BP_CONUS.zarr")
ds_burn_probability = ds_burn_probability.assign_coords({"lon": (((ds_burn_probability["lon"] + 180) % 360) - 180)})
ds_burn_probability = ds_burn_probability.sortby("lon")
ds_burn_probability = ds_burn_probability.sortby("lat")
ds_burn_probability = ds_burn_probability.sel({"lat": slice(44, 50), "lon": slice(-122, -115)})


ds_fwi = xr.open_dataset("s3://uw-crl/climate-risk-map/backend/climate/NEX-GDDP-CMIP6/DECADE_MONTH_ENSEMBLE/ssp370/fwi_decade_month_ssp370.zarr")
ds_fwi = ds_fwi.assign_coords({"lon": (((ds_fwi["lon"] + 180) % 360) - 180)})
ds_fwi = ds_fwi.sortby("lon")
ds_fwi = ds_fwi.sortby("lat")
ds_fwi = ds_fwi.sel({"lat": slice(44, 50), "lon": slice(-122, -115)})

In [99]:
df_burn_probability = zonal_aggregation_linestring(
    climate=ds_burn_probability,
    climate_variable="burn_probability",
    infra=gdf_highways_spokane,
    x_dim="lon",
    y_dim="lat",
    time_col=None,
    aggregation="max"
)
df_fwi = zonal_aggregation_linestring(
    climate=ds_fwi,
    climate_variable="ensemble_mean",
    infra=gdf_highways_spokane,
    x_dim="lon",
    y_dim="lat",
    time_col="decade_month",
    aggregation="mean"
)

In [100]:
gdf_highways_spokane.merge(df_burn_probability, on="ID", how="left")

Unnamed: 0,ID,NAME,geometry,burn_probability
0,44755221,north_spokane_corridor,"LINESTRING (-117.37032 47.77269, -117.37072 47...",0.000214
1,44755232,north_spokane_corridor,"LINESTRING (-117.37072 47.77313, -117.37265 47...",0.000534
2,44755402,north_spokane_corridor,"LINESTRING (-117.37118 47.77313, -117.37069 47...",0.000214
3,44755840,north_spokane_corridor,"LINESTRING (-117.37069 47.77259, -117.3699 47....",0.000232
4,44756053,north_spokane_corridor,"LINESTRING (-117.35958 47.75238, -117.35967 47...",0.000474
...,...,...,...,...
626,582136,U2,"LINESTRING (-117.36429 47.77863, -117.3641 47....",0.000473
627,582131,U2,"LINESTRING (-117.35925 47.78028, -117.35889 47...",0.000607
628,582142,U2,"LINESTRING (-117.35828 47.7806, -117.35793 47....",0.000607
629,582141,U2,"LINESTRING (-117.35512 47.78252, -117.35502 47...",0.000450


In [103]:
df_fwi

Unnamed: 0,ID,decade,month,ensemble_mean
0,480749,2010,1,0.399288
1,480749,2010,2,0.397792
2,480749,2010,3,0.695042
3,480749,2010,4,0.611033
4,480749,2010,5,1.071471
...,...,...,...,...
75715,1372535140,2100,8,8.729251
75716,1372535140,2100,9,6.219358
75717,1372535140,2100,10,2.123783
75718,1372535140,2100,11,0.385233
