In [1]:
import geopandas as gpd
import os
import pandas as pd

### Set-up

In [15]:
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'

In [6]:
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 500, 1000]

In [16]:
# Define dtypes for lookup tables to preserve leading zeros
dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
# Read in HUC lookup table
lookup = pd.read_csv(os.path.join(mdata_path,
                                  'HUC4_lookup_no_great_lakes.csv'),
                     dtype=dtype_dic)

In [23]:
max_slurm = lookup['slurm_index'].max()

### NHD_prepped

In [27]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped/'

In [31]:
for i in range(max_slurm+1):
    # Get current HUC2 and HUC4 IDs
    huc2 = 'HUC2_' + lookup.loc[i,'HUC4'][0:2]
    huc4 = 'NHDPLUS_H_' + lookup.loc[i,'HUC4'] + '_HU4_GDB'
    
    # Set data filepath
    file_path = os.path.join(data_path, huc2, huc4 + '_prepped.parquet')
    
    # Read in
    basin = gpd.read_parquet(file_path)
    
    ## Bin reaches by width, set to string for parquet
    basin['Bin_Min'] = pd.cut(basin['WidthM_Min'], bins).astype(str)
    basin['Bin_Max'] = pd.cut(basin['WidthM_Max'], bins).astype(str)
    
    # Write back out
    basin.to_parquet(file_path)

### NHD_prepped_segmented

In [39]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented/'

In [40]:
for i in range(max_slurm+1):
    # Get current HUC2 and HUC4 IDs
    huc2 = 'HUC2_' + lookup.loc[i,'HUC4'][0:2]
    huc4 = 'NHDPLUS_H_' + lookup.loc[i,'HUC4'] + '_HU4_GDB'
    
    # Set data filepath
    file_path = os.path.join(data_path, huc2, huc4 + '_prepped_segmented.parquet')
    
    # Read in
    basin = gpd.read_parquet(file_path)
    
    ## Bin reaches by width, set to string for parquet
    basin['Bin_Min'] = pd.cut(basin['WidthM_Min'], bins).astype(str)
    basin['Bin_Max'] = pd.cut(basin['WidthM_Max'], bins).astype(str)
    
    # Write back out
    basin.to_parquet(file_path)