# Making LOCA geodatabase

In [26]:
import geopandas as gpd
import xarray as xr
from shapely.geometry import Point
from time import time

import rioxarray  # for the extension to load
import rasterio

import numpy as np
import pandas as pd

crs = "EPSG:3310"

In [33]:
# Load census tract shapefile or GeoJSON
census_tracts = gpd.read_file(
    "CA_tiger_2023_tract/"
)  # GeoDataFrame of census tracts for LA County

### we want to convert the census tract polygons from lon-lat coordinates
### to x-y coordinates like we have for our WRF output.
### this is because sjoin_nearest() can better calculate the distances
### between grid points and polygons when we use an area-preserving projection,
### which WRF natively uses.

# subset geodataframe to the minimum of what we need
# so we save memory
census_tracts = census_tracts[["geometry", "GEOID"]]
census_tracts = census_tracts.to_crs(crs)

In [28]:
# get LOCA data - we just need the grid information from it so we take one time step
above_90 = xr.open_dataset("avg_extreme_heat_days_over_90.nc", engine="netcdf4")
ds = above_90.sel(time_slice_name="mid-century")


# we want to load the dataset into memory here; it will make future computations much faster
ds = ds.compute()
display(ds)

In [29]:
name = "avg annual # extreme heat days above 90 F"

In [34]:
# convert to geodataframe and spatially join
df = ds.squeeze().to_dataframe().reset_index().set_index("time_slice_name")
gdf = gpd.GeoDataFrame(data=df, geometry=gpd.points_from_xy(df.lat, df.lon)).set_crs(
    crs
)

# save the original WRF grid points to the geodataframe
gdf[["x", "y"]] = gdf.apply(
    lambda p: (p.geometry.x, p.geometry.y), axis=1, result_type="expand"
)
# spatially join the gridded WRF data to the census tract polygons
mapped_tracts = census_tracts.sjoin_nearest(gdf, how="left")
# subset to the columns we care about
mapped_tracts = mapped_tracts[["GEOID", "x", "y", "geometry"]]
display(mapped_tracts)

Unnamed: 0,GEOID,x,y,geometry
0,06001442700,32.546875,-124.390625,"POLYGON ((-177981.55 -51137.614, -177979.26 -5..."
1,06001442800,32.546875,-124.390625,"POLYGON ((-176688.486 -52219.888, -176682.583 ..."
2,06037204920,41.984375,-124.390625,"POLYGON ((165979.248 -442413.375, 165979.82 -4..."
3,06037205110,41.984375,-124.390625,"POLYGON ((164414.225 -441707.563, 164427.648 -..."
4,06037320101,41.984375,-124.390625,"POLYGON ((143676.272 -411800.427, 143812.312 -..."
...,...,...,...,...
9124,06059001303,41.984375,-124.390625,"POLYGON ((188706.771 -452501.282, 188733.86 -4..."
9125,06059001304,41.984375,-124.390625,"POLYGON ((188697.755 -452099.655, 188778.369 -..."
9126,06059001401,41.984375,-124.390625,"POLYGON ((189454.205 -450215.987, 189454.373 -..."
9127,06013367200,32.546875,-124.390625,"POLYGON ((-205775.612 -3326.14, -205774.55 -33..."


In [35]:
# set the geometry to the original WRF x-y grid points
mapped_tracts = mapped_tracts.set_geometry(
    gpd.points_from_xy(mapped_tracts.x, mapped_tracts.y)
)[["GEOID", "geometry"]]
display(mapped_tracts)

Unnamed: 0,GEOID,geometry
0,06001442700,POINT (32.547 -124.391)
1,06001442800,POINT (32.547 -124.391)
2,06037204920,POINT (41.984 -124.391)
3,06037205110,POINT (41.984 -124.391)
4,06037320101,POINT (41.984 -124.391)
...,...,...
9124,06059001303,POINT (41.984 -124.391)
9125,06059001304,POINT (41.984 -124.391)
9126,06059001401,POINT (41.984 -124.391)
9127,06013367200,POINT (32.547 -124.391)


In [37]:
mapped_tracts.to_file("loca_points_to_geoid.gdb")