# Geofabric v1.1 to parquet

This notebook processes the GIS Features of the Geospatial Fabric for the National Hydrologic Model, version 1.1 (https://doi.org/10.5066/P971JAGF) into several parquet files and saves them to the OSN pod.

In [None]:
import fsspec
import geopandas as gpd
import intake
import os
import yaml
import intake_sqlite

In [None]:
def convert_size(size_bytes):
   if size_bytes == 0:
       return "0B"
   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
   i = int(math.floor(math.log(size_bytes, 1024)))
   p = math.pow(1024, i)
   s = round(size_bytes / p, 2)
   return "%s %s" % (s, size_name[i])

In [None]:
hytest_cat = intake.open_catalog("../../../dataset_catalog/hytest_intake_catalog.yml")
list(hytest_cat)

In [None]:
# read in GFv1.1 GDB from OSN pod
fp = hytest_cat['geofabric_v1_1-zip-osn'].urlpath.split('s3://')[1]
endpoint_url = hytest_cat['geofabric_v1_1-zip-osn'].storage_options['client_kwargs']['endpoint_url']
gdb_file = f'zip+{endpoint_url}/{fp}'

In [None]:
# select layer of GDP to write to parquet
layer = 'nhru_v1_1'
#layer = 'nhru_v1_1_simp'
#layer = 'nsegment_v1_1'
#layer = 'POIs_v1_1'
#layer = 'TBtoGFv1_POIs'
gf = gpd.read_file(gdb_file, layer=layer)

In [None]:
gf.head()

In [None]:
fs_hytest = fsspec.filesystem(
    's3',
    profile='osn-hytest',  ## aws profile name for bucket you are writing data to
    client_kwargs={'endpoint_url': 'https://usgs.osn.mghpcc.org'}
)

In [None]:
fname=f'hytest/geofabric_v1_1/GFv1.1_{layer}.geoparquet'
with fs_hytest.open(fname, mode='wb') as f:
    gf.to_parquet(f)

In [None]:
print(f'size of parquet: {convert_size(fs_hytest.size(fname))}')