In [None]:
import fsspec
import panel
from pynhd import NLDI, WaterData
import zarr
import xarray as xr
import pandas as pd
import dask.dataframe as dd
import dask_geopandas
import hvplot.pandas
import hvplot.xarray
import geopandas as gpd
from matplotlib import pyplot as plt
#from zarr.storage import KVStore
import math

In [None]:
def convert_size(size_bytes):
   if size_bytes == 0:
       return "0B"
   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
   i = int(math.floor(math.log(size_bytes, 1024)))
   p = math.pow(1024, i)
   s = round(size_bytes / p, 2)
   return "%s %s" % (s, size_name[i])

In [None]:
huc12_geoparquet_path = 'hytest/wbd/huc12/huc12.geoparquet'
huc12_geopackage_path = 'hytest/wbd/huc12/huc12.gpkg'
zarr_path = 'hytest-internal/nhm_prms_v1_1_gridmet/nhm_prms_v1_1_gridmet.zarr'

In [None]:
fs_hytest = fsspec.filesystem(
    's3',
    anon=True,
    client_kwargs={'endpoint_url': 'https://usgs.osn.mghpcc.org'}
)

In [None]:
fs_hytest_internal = fsspec.filesystem(
    's3',
    profile='osn-hytest-internal',  ## aws profile name for bucket you are reading credentialed data from
    client_kwargs={'endpoint_url': 'https://usgs.osn.mghpcc.org'}
)

# Read in HUC12 (2022-10-06 snapshot) from geoparquet on object storage

In [None]:
print(f'size of huc12 geoparquet: {convert_size(fs_hytest.size(huc12_geoparquet_path))}')
with fs_hytest.open(huc12_geoparquet_path, mode='rb') as f:
    huc12_basins_geoparquet = gpd.read_parquet(f)

# Read in HUC12 (2022-10-06 snapshot) from geopackage on object storage

In [None]:
print(f'size of huc12 geopackage: {convert_size(fs_hytest.size(huc12_geopackage_path))}')
with fs_hytest.open(huc12_geopackage_path, mode='rb') as f:
    huc12_basins_geopackage = gpd.read_file(f, layer='huc12', driver="GPKG")  

# Read in NHM zarr on object storage

In [None]:
m = fs_hytest_internal.get_mapper(zarr_path)
nhm_ds = xr.open_dataset(m, engine='zarr', consolidated=True, chunks={})
# ds.sel(date='1983-01-01', huc12='010100020101').load()
nhm_ds

# Choose geodataframe to join to

In [None]:
# choose from huc12_basins_geoparquet, huc12_basins_geopackage, huc12_basins_pynhd
huc12_gdf = huc12_basins_geoparquet

In [None]:
huc12_basins_ddf = dask_geopandas.from_geopandas(huc12_gdf.set_index('huc12'), npartitions=30)

# Choose variable and date to plot

In [None]:
var_to_plot = 'gwres_flow'
date_to_plot = '1983-01-01'

In [None]:
nhm_ddf = nhm_ds[[var_to_plot]].sel(time=date_to_plot).to_dask_dataframe()

# Merge HUC12 data with NHM zarr and plot

In [None]:
merged = huc12_basins_ddf.merge(nhm_ddf, on='huc12').compute()
gdf = gpd.GeoDataFrame(merged, crs="EPSG:4326", geometry='geometry')

In [None]:
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(1,1,1)
ax.tick_params(
    axis='both', bottom=False, left=False,         
    labelbottom=False, labelleft=False)
gdf.plot(ax=ax, column=var_to_plot, legend=True)
ax.set_title(f'{var_to_plot} for HUC12s', fontsize=20, pad=10);