In [None]:
import rioxarray
import geopandas as gpd
import xarray as xr
from xrspatial.zonal import stats
from shapely.geometry import box
from rasterio.features import rasterize



In [None]:
layer = 'filldepth' # TODO make a list of raster layers to run (like the Control Table csv)
in_zone_data_path = '../high_res_data/NHDPLUS_H_1710_HU4_GDB.gdb'
output_path = '../high_res_data/output'
vpu_id = '1710' #TODO get all vpu_ids from national release GDB
raster_path = f'../high_res_data/NHDPLUS_H_{vpu_id}_HU4_RASTERS/HRNHDPlusRasters{vpu_id}/{layer}.tif'

In [3]:
raster = rioxarray.open_rasterio(raster_path).sel(band=1).drop_vars('band') #, chunks=True, lock=False
raster

In [None]:
vector_data = gpd.read_file(in_zone_data_path, layer='NHDPlusCatchment')
vector_data.head()

In [5]:
vector_data = vector_data.to_crs(raster.rio.crs)
bounds = raster.rio.bounds()
bbox = box(*bounds)

vector_data_clipped = vector_data[vector_data.geometry.intersects(bbox)]

transform = raster.rio.transform()
width = raster.sizes['x']
height = raster.sizes['y']
# print(len(vector_data_clipped.GridCode.unique()))
shapes = [(geom, value) for geom, value in zip(vector_data_clipped.geometry, vector_data_clipped['GridCode'])]
shapes[1:10]

[(<MULTIPOLYGON (((-2113895 3122625, -2113905 3122625, -2113905 3122635, -2113...>,
  26064),
 (<MULTIPOLYGON (((-2110595 3121775, -2110595 3121835, -2110615 3121835, -2110...>,
  3899),
 (<MULTIPOLYGON (((-2107015 3121775, -2107015 3121785, -2107025 3121785, -2107...>,
  584956),
 (<MULTIPOLYGON (((-2118615 3121765, -2118625 3121765, -2118625 3121795, -2118...>,
  22600),
 (<MULTIPOLYGON (((-2108525 3121765, -2108525 3121775, -2108535 3121775, -2108...>,
  4080),
 (<MULTIPOLYGON (((-2116245 3121755, -2116265 3121755, -2116265 3121765, -2116...>,
  784030),
 (<MULTIPOLYGON (((-2106425 3121755, -2106435 3121755, -2106435 3121765, -2106...>,
  529615),
 (<MULTIPOLYGON (((-2109455 3124195, -2109465 3124195, -2109465 3124205, -2109...>,
  20315),
 (<MULTIPOLYGON (((-2108465 3124195, -2108495 3124195, -2108495 3124205, -2108...>,
  15377)]

In [6]:
zones = rasterize(
        shapes=shapes,
        out_shape=(height, width),
        transform=transform,
        fill=0,  # Fill value for areas outside the vector data
        dtype='int32'
    )
zones

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [7]:
zones_da = xr.DataArray(
        data=zones,
        dims=['y', 'x'],
        coords={
            'y': raster.y,
            'x': raster.x
        }
    )# .chunk(raster.chunksizes)
zones_da

In [8]:
stats_df = stats(
        zones=zones_da,
        values=raster,
        stats_funcs=['mean', 'sum', 'min', 'max', 'count'],
        nodata_values=raster.rio.nodata
    )
stats_df

KeyboardInterrupt: 