#### mergeFlowlines

In [1]:
import geopandas as gpd
import os
import pandas as pd
import time

In [2]:
look_up = pd.read_csv('/nas/cee-water/cjgleason/craig/CONUS_ephemeral/data/HUC4_lookup.csv')

In [3]:
datapath = '/nas/cee-water/cjgleason/craig/CONUS_ephemeral_data/'

In [4]:
codes_huc2 = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']

In [5]:
all_basins = gpd.GeoDataFrame()

In [6]:
fieldsF = ['GNIS_ID', 'GNIS_Name', 'LengthKM',  'FlowDir',
           'WBArea_Permanent_Identifier', 'FType', 'FCode',
           'NHDPlusID', 'VPUID', 'geometry']
fieldsVAA = ['NHDPlusID', 'StreamOrde', 'FromNode', 'ToNode',
            'LevelPathI', 'TerminalFl', 'TotDASqKm', 'VPUID']
fieldsEROMMA = ['NHDPlusID', 'QBMA', 'VPUID']

In [None]:
# Read in each set of flowlines in the HUC2 basin, merge on VAA
tic = time.time()

for i in range(len(codes_huc2)):
    # Get all HUC4 GBD paths for current HUC2 (exclude WBD)
    sub_paths = [fn for fn in os.listdir(os.path.join(datapath, 'HUC2_' + codes_huc2[i]))
                 if fn.startswith('NHD')]
    
    for j in sub_paths:
        path = os.path.join(datapath, 'HUC2_' + codes_huc2[i],
                            j, j + '.gdb')
        # print(path)
        
        # Flowlines
        new_basin = gpd.read_file(filename=path, layer='NHDFlowline',
                                  columns=fieldsF)
        # VAA
        vaa = gpd.read_file(filename=path, layer='NHDPlusFlowlineVAA',
                            columns=fieldsVAA)
        # Merge on VAA
        new_basin = new_basin.merge(vaa, on=['NHDPlusID', 'VPUID'])
        
        # EROMMA
        eromma = gpd.read_file(filename=path, layer='NHDPlusEROMMA',
                                columns=fieldsEROMMA)
        # Merge on EROMMA
        new_basin = new_basin.merge(eromma, on=['NHDPlusID', 'VPUID'])
        
        # Append new basin to existing
        all_basins = pd.concat([all_basins, new_basin])
        
print('{:.2f}s to make merged shapefile.'.format(time.time()-tic))

In [22]:
all_basins

Unnamed: 0,FCode,FType,FlowDir,GNIS_ID,GNIS_Name,LengthKM,NHDPlusID,VPUID,WBArea_Permanent_Identifier,geometry,FromNode,LevelPathI,StreamOrde,TerminalFl,ToNode,TotDASqKm,QBMA
0,55800,558,1,,,1.678533,5.000100e+12,0101,{A44FF3D6-77AE-475F-ABF4-A69F85C427D6},"MULTILINESTRING Z ((-68.58938 48.05766 0, -68....",5.000100e+12,5.000100e+12,2,0,5.000100e+12,9.3662,6.942256
1,46006,460,1,,,3.339840,5.000100e+12,0101,,"MULTILINESTRING Z ((-67.8897 47.20717 0, -67.8...",5.000100e+12,5.000100e+12,1,0,5.000100e+12,4.3692,2.581971
2,46003,460,1,,,0.836389,5.000100e+12,0101,,"MULTILINESTRING Z ((-70.32724 46.44595 0, -70....",5.000100e+12,5.000100e+12,2,0,5.000100e+12,1.9543,1.616468
3,46006,460,1,-1,Burntland Brook,0.724000,5.000100e+12,0101,,"MULTILINESTRING Z ((-67.3152 46.26263 0, -67.3...",5.000100e+12,5.000100e+12,2,0,5.000100e+12,14.5836,10.966773
4,46006,460,1,-1,North Branch Becaguimec Stream,0.426000,5.000100e+12,0101,,"MULTILINESTRING Z ((-67.26483 46.33892 0, -67....",5.000100e+12,5.000100e+12,3,0,5.000100e+12,41.0453,36.687272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97665,46007,460,1,,,0.069000,5.000100e+13,1803,,"MULTILINESTRING Z ((-119.27876 36.99176 0, -11...",5.000100e+13,5.000100e+13,1,0,5.000100e+13,0.0018,0.000703
97666,46007,460,1,,,0.078000,5.000100e+13,1803,,"MULTILINESTRING Z ((-119.26894 36.99076 0, -11...",5.000100e+13,5.000100e+13,1,0,5.000100e+13,0.0025,0.000935
97667,46006,460,1,,,0.100112,5.000100e+13,1803,,"MULTILINESTRING Z ((-118.20324 36.41839 0, -11...",5.000100e+13,5.000100e+13,1,0,5.000100e+13,0.0147,0.003155
97668,46006,460,1,00272097,Little Kern River,2.937799,5.000100e+13,1803,,"MULTILINESTRING Z ((-118.43785 36.18062 0, -11...",5.000100e+13,5.000100e+13,5,0,5.000100e+13,338.7869,223.982190


**Divisions**

In [4]:
# Read in shapefile for physiographic divisions
# https://www.sciencebase.gov/catalog/item/631405bbd34e36012efa304e
physio = gpd.read_file('/nas/cee-water/cjgleason/craig/CONUS_ephemeral_data/other_shapefiles/physio.shp')

In [5]:
physio

Unnamed: 0,AREA,PERIMETER,PHYSIODD_,PHYSIODD_I,FCODE,FENCODE,DIVISION,PROVINCE,SECTION,PROVCODE,geometry
0,40.121,36.938,2,72,122,12b,INTERIOR PLAINS,CENTRAL LOWLAND,WESTERN LAKE,12,"POLYGON ((-103.00201 49.00395, -102.94103 49.0..."
1,21.976,39.951,3,59,131,13a,INTERIOR PLAINS,GREAT PLAINS,"MISSOURI PLATEAU, GLACIATED",13,"POLYGON ((-113.55362 48.99719, -112.19278 48.9..."
2,2.706,18.014,4,33,241,24a,PACIFIC MOUNTAIN SYSTEM,PACIFIC BORDER,PUGET TROUGH,24,"POLYGON ((-122.35828 47.59034, -122.33666 47.5..."
3,3.636,8.140,5,34,231,23a,PACIFIC MOUNTAIN SYSTEM,CASCADE-SIERRA MOUNTAINS,NORTHERN CASCADE MOUNTAINS,23,"POLYGON ((-122.14882 49.00173, -120.85696 48.9..."
4,30.059,28.208,6,48,190,19,ROCKY MOUNTAIN SYSTEM,NORTHERN ROCKY MOUNTAINS,,19,"POLYGON ((-119.67419 49.00147, -118.84358 48.9..."
...,...,...,...,...,...,...,...,...,...,...,...
496,0.001,0.123,498,496,33,3c,ATLANTIC PLAIN,COASTAL PLAIN,FLORIDIAN,3,"POLYGON ((-81.42094 24.645, -81.46506 24.66677..."
497,0.000,0.072,499,498,33,3c,ATLANTIC PLAIN,COASTAL PLAIN,FLORIDIAN,3,"POLYGON ((-81.42094 24.645, -81.41373 24.67262..."
498,0.002,0.231,500,499,33,3c,ATLANTIC PLAIN,COASTAL PLAIN,FLORIDIAN,3,"POLYGON ((-81.70815 24.58081, -81.70338 24.599..."
499,0.001,0.162,501,501,33,3c,ATLANTIC PLAIN,COASTAL PLAIN,FLORIDIAN,3,"POLYGON ((-82.10853 24.56015, -82.10152 24.575..."


In [6]:
# Dissolve provinces by division
physio = physio.dissolve(by='DIVISION')

In [7]:
physio

Unnamed: 0_level_0,geometry,AREA,PERIMETER,PHYSIODD_,PHYSIODD_I,FCODE,FENCODE,PROVINCE,SECTION,PROVCODE
DIVISION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
APPALACHIAN HIGHLANDS,"MULTIPOLYGON (((-75.02058 40.10976, -75.05604 ...",11.837,36.376,37,57,92,9b,NEW ENGLAND,NEW ENGLAND UPLAND,9
ATLANTIC PLAIN,"MULTIPOLYGON (((-97.18027 26.07179, -97.2122 2...",0.104,3.401,159,167,31,3a,COASTAL PLAIN,EMBAYED,3
INTERIOR HIGHLANDS,"POLYGON ((-91.96892 34.99696, -92.00708 34.942...",11.628,15.892,246,22,141,14a,OZARK PLATEAUS,SPRINGFIELD-SALEM PLATEAUS,14
INTERIOR PLAINS,"MULTIPOLYGON (((-97.22617 32.50676, -97.24682 ...",40.121,36.938,2,72,122,12b,CENTRAL LOWLAND,WESTERN LAKE,12
INTERMONTANE PLATEAUS,"POLYGON ((-116.26199 33.20382, -116.22265 33.2...",13.383,21.026,25,56,201,20a,COLUMBIA PLATEAU,WALLA WALLA PLATEAU,20
LAURENTIAN UPLAND,"MULTIPOLYGON (((-92.08446 46.77362, -92.02456 ...",12.77,26.727,12,79,10,1,SUPERIOR UPLAND,,1
PACIFIC MOUNTAIN SYSTEM,"MULTIPOLYGON (((-121.96342 38.07818, -122.0020...",2.706,18.014,4,33,241,24a,PACIFIC BORDER,PUGET TROUGH,24
ROCKY MOUNTAIN SYSTEM,"POLYGON ((-106.29187 42.77171, -106.18233 42.7...",30.059,28.208,6,48,190,19,NORTHERN ROCKY MOUNTAINS,,19


In [None]:
physio = physio.to_crs(epsg=3857)

In [18]:
# Round the NHDPlusID (maybe not necessary, Craig's issue was from QGIS pre-pro)
all_basins.NHDPlusID = all_basins.NHDPlusID.round(0)

In [19]:
# Write out
all_basins.to_file('/nas/cee-water/cjgleason/fiona/narrow_rivers/data/nhd_conus_flow_vaa.gpkg', driver='GPKG')

In [20]:
# all_basins.plot()

#### calculateWidths

In [1]:
import geopandas as gpd

In [2]:
basins = gpd.read_file('/nas/cee-water/cjgleason/fiona/narrow_rivers/data/nhd_conus_flow_vaa.gpkg')

In [None]:
basins = basins.to_crs(epsg=3857)

In [9]:
subset = basins.iloc[0:5, :]

In [10]:
subset

Unnamed: 0,FCode,FType,FlowDir,GNIS_ID,GNIS_Name,LengthKM,NHDPlusID,VPUID,WBArea_Permanent_Identifier,FromNode,LevelPathI,StreamOrde,TerminalFl,ToNode,TotDASqKm,QBMA,geometry
0,55800,558,1,,,1.678533,5000100000000.0,101,{A44FF3D6-77AE-475F-ABF4-A69F85C427D6},5000100000000.0,5000100000000.0,2,0,5000100000000.0,9.3662,6.942256,"MULTILINESTRING Z ((-68.58938 48.05766 0, -68...."
1,46006,460,1,,,3.33984,5000100000000.0,101,,5000100000000.0,5000100000000.0,1,0,5000100000000.0,4.3692,2.581971,"MULTILINESTRING Z ((-67.8897 47.20717 0, -67.8..."
2,46003,460,1,,,0.836389,5000100000000.0,101,,5000100000000.0,5000100000000.0,2,0,5000100000000.0,1.9543,1.616468,"MULTILINESTRING Z ((-70.32724 46.44595 0, -70...."
3,46006,460,1,-1.0,Burntland Brook,0.724,5000100000000.0,101,,5000100000000.0,5000100000000.0,2,0,5000100000000.0,14.5836,10.966773,"MULTILINESTRING Z ((-67.3152 46.26263 0, -67.3..."
4,46006,460,1,-1.0,North Branch Becaguimec Stream,0.426,5000100000000.0,101,,5000100000000.0,5000100000000.0,3,0,5000100000000.0,41.0453,36.687272,"MULTILINESTRING Z ((-67.26483 46.33892 0, -67...."


In [11]:
subset.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands. British Virgin Islands

In [None]:
test = gpd.sjoin(gdf_wm, nhd_wm, how='inner', predicate='within')

In [None]:
# Prep the data (per the recommendations of Craig Brinkerhoff)

In [23]:
# Keep only reaches that are stream types or artificial path
all_basins = all_basins.loc[((all_basins.FCode == 46000) |
                             (all_basins.FCode == 46003) |
                             (all_basins.FCode == 46006) |
                             (all_basins.FCode == 46007) |
                             (all_basins.FCode == 55800))]

In [25]:
# Keep only flowlines that are not in lakes
## REMOVES THE CONNECTICUT
# all_basins =
# all_basins.loc[all_basins.WBArea_Permanent_Identifier.isnull()]

In [26]:
# Keep reaches that are not terminal paths
all_basins = all_basins[all_basins.TerminalFl == 0]

In [28]:
all_basins.shape

(22261957, 17)

In [29]:
# Keep only reaches with non-zero discharge
all_basins = all_basins.loc[all_basins.QBMA > 0]

In [30]:
all_basins.shape

(22029705, 17)

In [31]:
# Keep only reaches with non-zero stream order
all_basins = all_basins.loc[all_basins.StreamOrde > 0]

In [32]:
all_basins.shape

(22029705, 17)