# PA2 Notebook 2x: Aggregate Reach Net Loads by Cluster

WARNING: This approach succeeds at calculating net reach loads over each COMID,
BUT it can not be used for summing over other geographies.

Comparisons with previous HUC12 and HUC8 summaries show that values are:
- Too low if negative values are preserved
- Too high if negative values are assigned `nan`

Even comparison among HUC12 `tp_load_net` values from this notebook vs nb2b shows big differences for most HUC12 (although some are identical!)

# Installation and Setup

Carefully follow our **[Installation Instructions](README.md#get-started)**, especially including:
- Creating a virtual environment for this repository (step 3)

## Import Python Dependencies

In [2]:
from pathlib import Path
from importlib import reload

import numpy     as np
import pandas    as pd
import geopandas as gpd

# import hvplot.pandas
import holoviews as hv
# import geoviews as gv

In [3]:
# Custom functions for Pollution Assessment
import pollution_assessment as pa

## Set Paths


In [4]:
# Set your project directory to your local folder for your clone of this repository
project_path = Path.cwd().parent
project_path

PosixPath('/Users/aaufdenkampe/Documents/Python/pollution-assessment')

In [5]:
# Assign a path for the geographies folder.
geography_path = project_path / 'geography/'

In [6]:
# Assign a path for the data OUTPUT folder.
data_output_path = project_path / 'stage2/data_output/'

## Test Plotting

In [7]:
# create sample data
data = np.random.normal(size=[50, 2])
df = pd.DataFrame(data, columns=['col1', 'col2'])

# create holoviews graph
hv_plot = hv.Points(df)
hv_plot

# Import Data

## Open Files from Notebooks 2 & 2b

In [8]:
# Geograhies
huc12_outlets_drwi_gdf = gpd.read_parquet(geography_path /'huc12_outlets_drwi_gdf.parquet')
huc10_outlets_drwi_gdf = gpd.read_parquet(geography_path /'huc10_outlets_drwi_gdf.parquet')
huc08_outlets_drwi_gdf = gpd.read_parquet(geography_path /'huc08_outlets_drwi_gdf.parquet')

In [9]:
# Results by COMID
reach_concs_gdf = gpd.read_parquet(data_output_path /'reach_concs_gdf.parquet')
catch_loads_gdf = gpd.read_parquet(data_output_path /'catch_loads_gdf.parquet')

# Net loads over HUC12, using Method 2 from Notebook 2b
huc12_outlet_loads_gdf = gpd.read_parquet(
    data_output_path / 'huc12_outlet_loads_gdf.parquet'
)

In [10]:
reach_concs_gdf.info()
reach_concs_gdf.iloc[:,0:25]

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 19496 entries, 1748535 to 932040370
Data columns (total 56 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   catchment_hectares  19496 non-null  float64 
 1   watershed_hectares  19496 non-null  float64 
 2   maflowv             19496 non-null  float64 
 3   geometry            19494 non-null  geometry
 4   cluster             17358 non-null  category
 5   sub_focusarea       186 non-null    Int64   
 6   nord                18870 non-null  Int64   
 7   nordstop            18844 non-null  Int64   
 8   huc12               19496 non-null  category
 9   streamorder         19496 non-null  int64   
 10  headwater           19496 non-null  int64   
 11  phase               4082 non-null   category
 12  fa_name             4082 non-null   category
 13  in_drb              19496 non-null  boolean 
 14  huc08               19496 non-null  category
 15  huc10               194

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geometry,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,...,huc10,into_dr,tocomid,from_comids,ftype,gnis_name,gnis_id,streamleve,streamorde,Source
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1748535,6496.7052,6501.69,43.699,MULTILINESTRING Z ((-8295323.930 5214456.622 0...,drb,,74914,74914,020401020302,1,...,0204010203,False,1748709,,StreamRiver,East Branch Delaware River,949000,2,1,Reach Concentration
1748537,1663.1712,1664.46,11.189,MULTILINESTRING Z ((-8304623.226 5207684.737 0...,drb,,74913,74913,020401020302,1,...,0204010203,False,1748709,,StreamRiver,,,3,1,Reach Concentration
1748539,1639.4128,1640.70,11.223,MULTILINESTRING Z ((-8316446.558 5197994.113 0...,drb,,74921,74921,020401020305,1,...,0204010203,False,1748543,,StreamRiver,,,4,1,Reach Concentration
1748541,3013.8348,12912.30,86.528,MULTILINESTRING Z ((-8304282.841 5198049.613 0...,drb,,74911,74915,020401020302,2,...,0204010203,False,1748561,"[1748709, 1748711]",StreamRiver,East Branch Delaware River,949000,2,2,Reach Concentration
1748543,1151.0990,5232.87,35.389,MULTILINESTRING Z ((-8312991.936 5192442.779 0...,drb,,74920,74922,020401020305,2,...,0204010203,False,1748563,"[1748539, 1748713]",StreamRiver,Platte Kill,960689,3,2,Reach Concentration
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,2124.7248,2720941.47,17802.923,MULTILINESTRING Z ((-8400739.070 4831969.993 0...,drb,,65070,76964,020402060103,7,...,0204020601,False,24903456,"[932040355, 932040367]",ArtificialPath,Delaware River,217887,1,7,Reach Concentration
932040367,788.7859,2717821.26,17788.281,MULTILINESTRING Z ((-8399585.343 4833380.786 0...,drb,,65079,76964,020402060103,7,...,0204020601,False,932040366,"[932040356, 932040368]",ArtificialPath,Delaware River,217887,1,7,Reach Concentration
932040368,265.0275,2716120.08,17780.448,MULTILINESTRING Z ((-8398343.469 4834781.918 0...,drb,,65080,76960,020402060103,7,...,0204020601,False,932040367,"[24903436, 24903452]",ArtificialPath,Delaware River,217887,1,7,Reach Concentration
932040369,1106.5294,2889095.67,18624.999,MULTILINESTRING Z ((-8406760.425 4820639.687 0...,drb,,64232,76965,020402040000,7,...,0204020400,False,932040370,"[24903444, 24903448, 932040360]",ArtificialPath,Delaware River,217887,1,7,Reach Concentration


# Method 2b: Net loads by COMID

In [11]:
reach_loads_gdf = reach_concs_gdf.iloc[:,0:25].copy()

In [12]:
reach_loads_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 19496 entries, 1748535 to 932040370
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   catchment_hectares  19496 non-null  float64 
 1   watershed_hectares  19496 non-null  float64 
 2   maflowv             19496 non-null  float64 
 3   geometry            19494 non-null  geometry
 4   cluster             17358 non-null  category
 5   sub_focusarea       186 non-null    Int64   
 6   nord                18870 non-null  Int64   
 7   nordstop            18844 non-null  Int64   
 8   huc12               19496 non-null  category
 9   streamorder         19496 non-null  int64   
 10  headwater           19496 non-null  int64   
 11  phase               4082 non-null   category
 12  fa_name             4082 non-null   category
 13  in_drb              19496 non-null  boolean 
 14  huc08               19496 non-null  category
 15  huc10               194

In [13]:
reach_loads_gdf.from_comids

comid
1748535                                 None
1748537                                 None
1748539                                 None
1748541                   [1748709, 1748711]
1748543                   [1748539, 1748713]
                          ...               
932040366             [932040355, 932040367]
932040367             [932040356, 932040368]
932040368               [24903436, 24903452]
932040369    [24903444, 24903448, 932040360]
932040370             [932040361, 932040369]
Name: from_comids, Length: 19496, dtype: object

In [14]:
type(reach_loads_gdf.from_comids[932040369])

numpy.ndarray

### Back-Calculate Attenuated Loads at Outlets

Using Method 2 equation in header in `stage2/PA2_2b_AggregateAttenuated.ipynb`.

In [15]:
# Back calculate Loads (kg/y) from average annual concentrations (mg/L) 
# and mean annual flow (CFS))
for suffix in ['','_ps', '_xsnps', '_rem1', '_rem2', '_rem3', '_avoid']:
    for pollutant in ['tn', 'tp', 'tss']:
        reach_loads_gdf[f'{pollutant}_load{suffix}'] = (
            (reach_concs_gdf[f'{pollutant}_conc{suffix}'] * 28.3168 / 1000000)
            * reach_concs_gdf.maflowv * 31557600
        )

In [16]:
# Don't calculate load reductions
# We want to calculate load reductions from catchment loads, at their source

In [17]:
reach_loads_gdf.info()
reach_loads_gdf.head()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 19496 entries, 1748535 to 932040370
Data columns (total 46 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   catchment_hectares  19496 non-null  float64 
 1   watershed_hectares  19496 non-null  float64 
 2   maflowv             19496 non-null  float64 
 3   geometry            19494 non-null  geometry
 4   cluster             17358 non-null  category
 5   sub_focusarea       186 non-null    Int64   
 6   nord                18870 non-null  Int64   
 7   nordstop            18844 non-null  Int64   
 8   huc12               19496 non-null  category
 9   streamorder         19496 non-null  int64   
 10  headwater           19496 non-null  int64   
 11  phase               4082 non-null   category
 12  fa_name             4082 non-null   category
 13  in_drb              19496 non-null  boolean 
 14  huc08               19496 non-null  category
 15  huc10               194

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geometry,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,...,tss_load_rem1,tn_load_rem2,tp_load_rem2,tss_load_rem2,tn_load_rem3,tp_load_rem3,tss_load_rem3,tn_load_avoid,tp_load_avoid,tss_load_avoid
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1748535,6496.7052,6501.69,43.699,MULTILINESTRING Z ((-8295323.930 5214456.622 0...,drb,,74914,74914,20401020302,1,...,-8164923.0,-172025.360243,-2324.880448,-8164923.0,-172025.360243,-2324.880448,-8164923.0,0.0,0.0,0.0
1748537,1663.1712,1664.46,11.189,MULTILINESTRING Z ((-8304623.226 5207684.737 0...,drb,,74913,74913,20401020302,1,...,-2171335.0,-43522.069797,-536.50802,-2171335.0,-43522.069797,-536.50802,-2171335.0,0.0,0.0,0.0
1748539,1639.4128,1640.7,11.223,MULTILINESTRING Z ((-8316446.558 5197994.113 0...,drb,,74921,74921,20401020305,1,...,-2128476.0,-44303.681985,-545.380913,-2128476.0,-44303.681985,-545.380913,-2128476.0,0.0,0.0,0.0
1748541,3013.8348,12912.3,86.528,MULTILINESTRING Z ((-8304282.841 5198049.613 0...,drb,,74911,74915,20401020302,2,...,-16126280.0,-340092.990327,-4457.671727,-16126280.0,-340092.990327,-4457.671727,-16126280.0,0.0,0.0,0.0
1748543,1151.099,5232.87,35.389,MULTILINESTRING Z ((-8312991.936 5192442.779 0...,drb,,74920,74922,20401020305,2,...,-6821594.0,-140017.819276,-1834.914201,-6821594.0,-140017.819276,-1834.914201,-6821594.0,0.0,0.0,0.0


## Functions for net loads
Adapted from `stage2/PA2_2b_AggregateAttenuated.ipynb` to be more general.

In [89]:
reload(pa.calc)

<module 'pollution_assessment.calc' from '/Users/aaufdenkampe/Documents/Python/pollution-assessment/src/pollution_assessment/calc.py'>

In [18]:
# Moved to pa.calc module
# Adapted from `are_fromhucs_in_index()`
# def are_inlets_in_index(
#     df: pd.DataFrame,
#     inlets: np.ndarray,
# ) -> np.ndarray[bool]:
#     if type(inlets) == np.ndarray:
#         inarray = np.isin(inlets, df.index)
#     else:
#         inarray = False
#     return inarray

In [86]:
# Test function on a single good value
comid = 932040369
inlets = reach_loads_gdf.from_comids[comid]
pa.calc.are_inlets_in_index(reach_loads_gdf, inlets)

array([ True,  True,  True])

In [87]:
# Test function again on a bad value
comid = 1748535
inlets = reach_loads_gdf.from_comids[comid]
pa.calc.are_inlets_in_index(reach_loads_gdf, inlets)

False

In [90]:
# Moved to pa.calc module
# def get_inlet_loads(
#     df: pd.DataFrame, 
#     inlets_column: str ,
#     index_value: str | int,
#     var: str, 
# ) -> list:
#     """Fetches list of HUC12s that flow into a HUC12, if any.
#     var = f'{pollutant}_load{var_suffix}'
#     """
#     inlets_array = df.at[index_value,inlets_column]
#     if type(inlets_array) == np.ndarray:
#         inlets_mask = pa.calc.are_inlets_in_index(df, inlets_array)
#         ds = df[var][inlets_array[inlets_mask]]
#     else:
#         ds = []
#     return ds

# def calc_net_load(
#     df: pd.DataFrame, 
#     inlets_column: str ,
#     index_value: str | int,
#     var: str, 
# ) -> float:
#     """Calculates the net load of a HUC12, by subtracting inflow loads from outflow load.
#     var = f'{pollutant}_load{var_suffix}'
#     """
#     net = (df.at[index_value,var]
#         - sum(get_inlet_loads(df, inlets_column, index_value, var))
#     )
#     return net

## Confirm functions work

In [91]:
# Confirm functions work, with typical COMID
comid = 1748541
x = pa.calc.get_inlet_loads(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)
x

comid
1748709    0.031540
1748711    0.027446
Name: tp_conc, dtype: float64

In [23]:
sum(x)

0.0589858763088843

In [24]:
x.sum()

0.0589858763088843

In [25]:
reach_concs_gdf.at[comid,'tp_conc']

0.0323494736392782

In [26]:
calc_net_load(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)

-0.026636402669606096

In [27]:
# Confirm functions work, with COMID missing value
comid = 932040369
x = pa.calc.get_inlet_loads(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)
x

comid
24903444          NaN
24903448          NaN
932040360    0.437149
Name: tp_conc, dtype: float64

In [28]:
x.sum()

0.437148624560633

In [29]:
sum(x)

nan

In [30]:
reach_concs_gdf.at[comid,'tp_conc']

nan

In [31]:
calc_net_load(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)

nan

In [32]:
# Confirm functions work, with COMID with no inlets
comid = 1748535
x = pa.calc.get_inlet_loads(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)
x

[]

In [33]:
reach_concs_gdf.at[comid,'tp_conc']

0.0304638172253371

In [34]:
calc_net_load(
    df=reach_concs_gdf,
    inlets_column='from_comids',
    index_value=comid,
    var='tp_conc',
)

0.0304638172253371

## Add net loads

In [35]:
df = reach_loads_gdf

In [36]:
# Calc net flows
var = 'maflowv'
df[f'{var}_net'] = df.index.to_series().apply(
    lambda comid: pa.calc.calc_net_load(df, 'from_comids', comid, var)
)
# takes 8 sec

In [37]:
# Calc net pollution loads
for suffix in ['', '_ps', '_xsnps', '_rem1', '_rem2', '_rem3']:
    for pollutant in ['tn', 'tp', 'tss']:
        var = f'{pollutant}_load{suffix}'
        df[f'{var}_net'] = df.index.to_series().apply(
            lambda comid: pa.calc.calc_net_load(df, 'from_comids', comid, var)
        )
# takes 2 min

In [38]:
reach_loads_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 19496 entries, 1748535 to 932040370
Data columns (total 65 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   catchment_hectares  19496 non-null  float64 
 1   watershed_hectares  19496 non-null  float64 
 2   maflowv             19496 non-null  float64 
 3   geometry            19494 non-null  geometry
 4   cluster             17358 non-null  category
 5   sub_focusarea       186 non-null    Int64   
 6   nord                18870 non-null  Int64   
 7   nordstop            18844 non-null  Int64   
 8   huc12               19496 non-null  category
 9   streamorder         19496 non-null  int64   
 10  headwater           19496 non-null  int64   
 11  phase               4082 non-null   category
 12  fa_name             4082 non-null   category
 13  in_drb              19496 non-null  boolean 
 14  huc08               19496 non-null  category
 15  huc10               194

In [39]:
# How many HUC12s have a negative `maflowv_net`, due to water withdrawls
reach_loads_gdf.maflowv_net.lt(0).value_counts()

maflowv_net
False    19162
True       334
Name: count, dtype: int64

In [40]:
# This fraction (1.7%) is much lower than the 10.8% we had for HUC12s
334/19162

0.017430330863166685

In [41]:
net_neg_flow_mask = reach_loads_gdf.maflowv_net.lt(0)
net_neg_flow_list = net_neg_flow_mask[net_neg_flow_mask==True].index

In [42]:
# replace `net_neg_flow_mask` net loads with NaN
for suffix in ['', '_xsnps', '_rem1', '_rem2', '_rem3']:
    for pollutant in ['tn', 'tp', 'tss']:
        reach_loads_gdf.loc[net_neg_flow_mask,f'{pollutant}_load{suffix}_net'] = np.nan

# Sum by Geography

To compare with PA1-style catchment loads from `stage2/PA2_2_Analysis.ipynb`

In [43]:
# Create list of columns to aggregate
columns_to_aggregate = [
    'catchment_hectares',        # catcment area
    'tn_load_net', 'tp_load_net', 'tss_load_net', 
    'tn_load_ps_net', 'tp_load_ps_net', 'tss_load_ps_net',
    'tn_load_xsnps_net', 'tp_load_xsnps_net', 'tss_load_xsnps_net', 
    'tn_load_rem1_net', 'tp_load_rem1_net', 'tss_load_rem1_net', 
    'tn_load_rem2_net', 'tp_load_rem2_net', 'tss_load_rem2_net', 
    'tn_load_rem3_net', 'tp_load_rem3_net', 'tss_load_rem3_net'    
]

### Remove Negative Values

In [44]:
reach_loads_noneg_gdf = reach_loads_gdf.copy()

In [45]:
# True if negative
mask_negative = reach_loads_gdf[columns_to_aggregate].lt(0)

In [46]:
mask_negative

Unnamed: 0_level_0,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,tn_load_ps_net,tp_load_ps_net,tss_load_ps_net,tn_load_xsnps_net,tp_load_xsnps_net,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1748535,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True
1748537,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True
1748539,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True
1748541,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True
1748543,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
932040367,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
932040368,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
932040369,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [47]:
reach_loads_noneg_gdf[columns_to_aggregate] = reach_loads_gdf[columns_to_aggregate].mask(mask_negative, np.nan)
reach_loads_noneg_gdf

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geometry,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,...,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1748535,6496.7052,6501.69,43.699,MULTILINESTRING Z ((-8295323.930 5214456.622 0...,drb,,74914,74914,020401020302,1,...,,,,,,,,,,
1748537,1663.1712,1664.46,11.189,MULTILINESTRING Z ((-8304623.226 5207684.737 0...,drb,,74913,74913,020401020302,1,...,,,,,,,,,,
1748539,1639.4128,1640.70,11.223,MULTILINESTRING Z ((-8316446.558 5197994.113 0...,drb,,74921,74921,020401020305,1,...,,,,,,,,,,
1748541,3013.8348,12912.30,86.528,MULTILINESTRING Z ((-8304282.841 5198049.613 0...,drb,,74911,74915,020401020302,2,...,,,,,,,,,,
1748543,1151.0990,5232.87,35.389,MULTILINESTRING Z ((-8312991.936 5192442.779 0...,drb,,74920,74922,020401020305,2,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,2124.7248,2720941.47,17802.923,MULTILINESTRING Z ((-8400739.070 4831969.993 0...,drb,,65070,76964,020402060103,7,...,,,,,,,,,,
932040367,788.7859,2717821.26,17788.281,MULTILINESTRING Z ((-8399585.343 4833380.786 0...,drb,,65079,76964,020402060103,7,...,,,,,,,,,,
932040368,265.0275,2716120.08,17780.448,MULTILINESTRING Z ((-8398343.469 4834781.918 0...,drb,,65080,76960,020402060103,7,...,,,,,,,,,,
932040369,1106.5294,2889095.67,18624.999,MULTILINESTRING Z ((-8406760.425 4820639.687 0...,drb,,64232,76965,020402040000,7,...,,,,,,,,,,


## Sum DRWI
There's no Excess non-point source pollution, when attenuation in streams is considered!

In [48]:
# Sum selected columns and move to a new dataframe
drwi_load_reach_net_df = reach_loads_gdf.loc[:,columns_to_aggregate].sum()
drwi_load_reach_net_df

catchment_hectares    3.786557e+06
tn_load_net           4.718194e+07
tp_load_net           2.430376e+06
tss_load_net          1.602294e+09
tn_load_ps_net        1.214248e+07
tp_load_ps_net        1.090868e+06
tss_load_ps_net       0.000000e+00
tn_load_xsnps_net    -1.052291e+08
tp_load_xsnps_net    -1.376601e+06
tss_load_xsnps_net   -5.436197e+09
tn_load_rem1_net     -1.053179e+08
tp_load_rem1_net     -1.393488e+06
tss_load_rem1_net    -5.450146e+09
tn_load_rem2_net     -1.053192e+08
tp_load_rem2_net     -1.393867e+06
tss_load_rem2_net    -5.450296e+09
tn_load_rem3_net     -1.053451e+08
tp_load_rem3_net     -1.408568e+06
tss_load_rem3_net    -5.452153e+09
dtype: float64

In [49]:
# Sum selected columns and move to a new dataframe
drwi_load_reach_net_df = reach_loads_noneg_gdf.loc[:,columns_to_aggregate].sum()
drwi_load_reach_net_df

catchment_hectares    3.786557e+06
tn_load_net           4.850582e+07
tp_load_net           3.538701e+06
tss_load_net          1.998281e+09
tn_load_ps_net        1.259555e+07
tp_load_ps_net        1.590658e+06
tss_load_ps_net       0.000000e+00
tn_load_xsnps_net     3.314412e+06
tp_load_xsnps_net     7.843502e+05
tss_load_xsnps_net    1.943771e+08
tn_load_rem1_net      3.289690e+06
tp_load_rem1_net      7.643023e+05
tss_load_rem1_net     1.939637e+08
tn_load_rem2_net      3.289509e+06
tp_load_rem2_net      7.639486e+05
tss_load_rem2_net     1.939501e+08
tn_load_rem3_net      3.286660e+06
tp_load_rem3_net      7.488173e+05
tss_load_rem3_net     1.937208e+08
dtype: float64

### DRWI not in Clusters

In [50]:
mask = reach_loads_gdf['cluster'].isnull()

drwi_load_noClus_reach_net_df = reach_loads_gdf[mask].loc[:,
    columns_to_aggregate
].sum()

In [51]:
drwi_load_noClus_reach_net_df

catchment_hectares    1.403987e+05
tn_load_net           7.242151e+05
tp_load_net           3.314974e+04
tss_load_net          1.193807e+08
tn_load_ps_net        0.000000e+00
tp_load_ps_net        0.000000e+00
tss_load_ps_net       0.000000e+00
tn_load_xsnps_net    -2.375052e+06
tp_load_xsnps_net    -2.582151e+04
tss_load_xsnps_net   -3.610686e+07
tn_load_rem1_net     -2.375052e+06
tp_load_rem1_net     -2.582151e+04
tss_load_rem1_net    -3.610686e+07
tn_load_rem2_net     -2.375052e+06
tp_load_rem2_net     -2.582151e+04
tss_load_rem2_net    -3.610686e+07
tn_load_rem3_net     -2.375175e+06
tp_load_rem3_net     -2.583315e+04
tss_load_rem3_net    -3.611502e+07
dtype: float64

### DRWI loads in DRB

In [52]:
# Sum loads for DRB, excluding Clusters via mask
mask = reach_loads_gdf['in_drb']

# Preselect colums to keep
# Exclude non-summable dtypes (object, category, geometry)
drwi_load_drb_reach_net_df = reach_loads_gdf[mask].loc[:,
    columns_to_aggregate
].sum()

## Sum by Cluster

In [53]:
# Sum loads by Cluster categories
groupby_column = 'cluster'

cluster_load_reach_net_df = reach_loads_gdf.loc[:,
    [groupby_column, *columns_to_aggregate.copy()]
].groupby(groupby_column, observed=True).sum()

In [54]:
cluster_load_reach_net_df

Unnamed: 0_level_0,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,tn_load_ps_net,tp_load_ps_net,tss_load_ps_net,tn_load_xsnps_net,tp_load_xsnps_net,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Brandywine and Christina,145739.1,3392404.0,78543.95,88089760.0,390158.1,11834.549253,0.0,-2422393.0,-38014.434358,-184053400.0,-2452631.0,-42692.166619,-192047000.0,-2452631.0,-42692.166619,-192047000.0,-2456007.0,-43853.36675,-192270000.0
Kirkwood - Cohansey Aquifer,550179.6,4312180.0,109949.4,134831800.0,938854.5,44623.529615,0.0,-11040010.0,-209432.330203,-588136500.0,-11041880.0,-209656.507681,-588321200.0,-11042370.0,-209834.374429,-588393400.0,-11042680.0,-209907.225297,-588409100.0
Middle Schuylkill,202958.6,8607999.0,554589.3,184580200.0,3411378.0,250914.860963,0.0,-1497842.0,175852.480785,-151259500.0,-1525712.0,168249.71645,-154158800.0,-1525712.0,168249.71645,-154158800.0,-1530397.0,164382.967742,-154780400.0
New Jersey Highlands,178647.1,1756611.0,87330.8,60787950.0,349288.2,19761.329435,0.0,-3662105.0,-29036.13802,-193530200.0,-3679739.0,-33454.749843,-196657300.0,-3680070.0,-33622.106081,-196712300.0,-3680307.0,-33825.373714,-196724000.0
Poconos and Kittatinny,342462.1,1268144.0,36216.96,52421880.0,109183.6,3320.791739,0.0,-13148880.0,-240477.426583,-665375700.0,-13148880.0,-240474.84186,-665375300.0,-13148880.0,-240474.84186,-665375300.0,-13149350.0,-240820.738025,-665414800.0
Schuylkill Highlands,44855.11,734229.0,12747.59,12442510.0,152149.4,-1350.245775,0.0,-999157.4,-22011.889817,-66604960.0,-999590.9,-21760.155071,-66528070.0,-999590.9,-21760.155071,-66528070.0,-1000144.0,-22006.608749,-66536190.0
Upper Lehigh,198029.8,1083662.0,66385.32,57720780.0,128226.4,10551.168894,0.0,-11292480.0,-178125.365516,-556725600.0,-11292480.0,-178125.365516,-556725600.0,-11292480.0,-178125.365516,-556725600.0,-11293730.0,-179060.371802,-556929700.0
Upstream Suburban Philadelphia,37411.09,510962.9,23475.76,29455720.0,88313.01,7581.729357,0.0,-582787.2,-3243.317226,-20985920.0,-583945.3,-3569.426781,-21257100.0,-584150.6,-3591.132081,-21274660.0,-584356.9,-3754.20013,-21283490.0
drb,1945876.0,24791530.0,1427987.0,862582400.0,6574933.0,743630.578988,0.0,-58208430.0,-806291.40929,-2973418000.0,-58217980.0,-806183.372879,-2972969000.0,-58218220.0,-806194.88001,-2972974000.0,-58233000.0,-813889.608042,-2973690000.0


In [55]:
cluster_load_reach_net_df.tp_load_xsnps_net

cluster
Brandywine and Christina          -38014.434358
Kirkwood - Cohansey Aquifer      -209432.330203
Middle Schuylkill                 175852.480785
New Jersey Highlands              -29036.138020
Poconos and Kittatinny           -240477.426583
Schuylkill Highlands              -22011.889817
Upper Lehigh                     -178125.365516
Upstream Suburban Philadelphia     -3243.317226
drb                              -806291.409290
Name: tp_load_xsnps_net, dtype: float64

## Sum by Focus Area within Clusters

In [56]:
# Create merged name for Focus Area by Phase
reach_loads_gdf['fa_name_phase'] = (
    reach_loads_gdf.phase.dropna().astype('str') 
    + ' ' 
    + reach_loads_gdf.fa_name.dropna().astype('str')
    )
# Change type to category
reach_loads_gdf['fa_name_phase'] = reach_loads_gdf['fa_name_phase'].astype('category')

In [57]:
# Sum loads by Focus Area categories
groupby_column = 'fa_name_phase'

focusarea_load_reach_net_df = reach_loads_gdf.loc[:,
    [groupby_column, *columns_to_aggregate.copy()]
].groupby(groupby_column, observed=True).sum()

In [58]:
# Add back categoricals that were dropped
columns_to_restore = ['fa_name_phase','cluster', 'phase','fa_name']

left = focusarea_load_reach_net_df.copy()
right = reach_loads_gdf.loc[:,columns_to_restore].dropna().drop_duplicates()
focusarea_load_reach_net_df = pd.merge(left,right, on=groupby_column).set_index(groupby_column)

In [59]:
focusarea_load_reach_net_df.head()

Unnamed: 0_level_0,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,tn_load_ps_net,tp_load_ps_net,tss_load_ps_net,tn_load_xsnps_net,tp_load_xsnps_net,tss_load_xsnps_net,...,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net,cluster,phase,fa_name
fa_name_phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Phase 1 Bear Creek,7167.2606,20630.96,955.362713,1013590.0,-160.962189,-134.15993,0.0,-226622.8,-3679.143405,-11395160.0,...,-11395160.0,-226622.8,-3679.143405,-11395160.0,-226622.9,-3676.557552,-11395150.0,Upper Lehigh,Phase 1,Bear Creek
Phase 1 Bush Kill/Hornbecks Creek,44484.944,96103.84,4213.489947,5786433.0,3188.721102,344.569986,0.0,-1410800.0,-24747.78122,-69653580.0,...,-69653580.0,-1410800.0,-24747.78122,-69653580.0,-1410854.0,-24801.212699,-69653990.0,Poconos and Kittatinny,Phase 1,Bush Kill/Hornbecks Creek
Phase 1 Cohansey-Maurice,79845.4069,1096014.0,8723.968306,6509784.0,410685.592634,9251.354181,0.0,-1317101.0,-38791.337544,-93823640.0,...,-93842870.0,-1317914.0,-38783.769849,-93842870.0,-1317945.0,-38781.657789,-93843530.0,Kirkwood - Cohansey Aquifer,Phase 1,Cohansey-Maurice
Phase 1 Core Pine Barrens,131694.8683,239728.8,979.687215,9673978.0,4973.304437,406.606569,0.0,-3679294.0,-73901.442919,-186690500.0,...,-186690500.0,-3679294.0,-73901.442919,-186690500.0,-3679298.0,-73901.582145,-186690600.0,Kirkwood - Cohansey Aquifer,Phase 1,Core Pine Barrens
Phase 1 French Creek Headwaters,4599.8253,64478.03,2730.972426,1900736.0,1.859789,36.154652,0.0,-49380.51,528.411642,-3811355.0,...,-3811188.0,-49424.79,514.947715,-3811188.0,-49463.38,485.155731,-3814250.0,Schuylkill Highlands,Phase 1,French Creek Headwaters


In [60]:
focusarea_load_reach_net_df.tp_load_xsnps_net

fa_name_phase
Phase 1 Bear Creek                   -3679.143405
Phase 1 Bush Kill/Hornbecks Creek   -24747.781220
Phase 1 Cohansey-Maurice            -38791.337544
Phase 1 Core Pine Barrens           -73901.442919
Phase 1 French Creek Headwaters        528.411642
                                         ...     
Phase 2 Upper Musconetcong           -3950.337561
Phase 2 Upper Neversink              -4990.067108
Phase 2 Upper Salem River             4115.857289
Phase 2 Welkinweir/Beaver Run           26.540242
Phase 2 White Clay Creek              3879.757836
Name: tp_load_xsnps_net, Length: 98, dtype: float64

## Cluster loads NOT IN Focus Area (noFA)
To add to cluster summary by focus area, below

In [61]:
# Develop mask
mask = reach_loads_gdf['fa_name_phase'].isnull()
mask.value_counts()

fa_name_phase
True     15414
False     4082
Name: count, dtype: int64

In [62]:
# Sum loads by Cluster categories, excluding Focus Areas via mask
groupby_column = 'cluster'

cluster_load_noFA_reach_net_df = reach_loads_gdf[mask].loc[:,
    [groupby_column, *columns_to_aggregate.copy()]
].groupby(groupby_column, observed=True).sum()

In [63]:
cluster_load_noFA_reach_net_df.head()

Unnamed: 0_level_0,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,tn_load_ps_net,tp_load_ps_net,tss_load_ps_net,tn_load_xsnps_net,tp_load_xsnps_net,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Brandywine and Christina,126864.4144,2961545.0,55178.875903,74554890.0,381539.9,11259.155365,0.0,-2357587.0,-51536.831167,-173153500.0,-2373991.0,-51423.487216,-175700700.0,-2373991.0,-51423.487216,-175700700.0,-2376772.0,-52186.340526,-175753100.0
Kirkwood - Cohansey Aquifer,229360.6413,1303226.0,31439.729348,69356060.0,76091.0,479.261968,0.0,-4677368.0,-81400.348916,-226865400.0,-4677369.0,-81400.374773,-226865000.0,-4677367.0,-81400.25483,-226864400.0,-4677489.0,-81421.687859,-226871700.0
Middle Schuylkill,172185.0158,7937341.0,497285.247431,160112900.0,3407278.0,249196.633922,0.0,-1342408.0,135910.46812,-134488200.0,-1358582.0,132651.478126,-136462100.0,-1358582.0,132651.478126,-136462100.0,-1362792.0,129277.969612,-137019500.0
New Jersey Highlands,97953.7623,996465.9,50666.298438,29004260.0,157542.5,10978.693714,0.0,-1841228.0,-11412.522343,-105455000.0,-1841659.0,-11225.827358,-105401700.0,-1841658.0,-11223.314875,-105400900.0,-1841866.0,-11415.275379,-105410300.0
Poconos and Kittatinny,182634.065,588238.5,26256.577586,30711310.0,72249.32,5488.424946,0.0,-5887516.0,-101135.84069,-290548200.0,-5887516.0,-101134.425017,-290547900.0,-5887516.0,-101134.425017,-290547900.0,-5887802.0,-101314.82968,-290581400.0


## Sum by HUC8 in DRB

In [64]:
# Initialize GDF with HUC names other info
huc08_load_reach_net_gdf = huc08_outlets_drwi_gdf.copy()

In [65]:
# Sum loads by HUC
groupby_column = 'huc08'
# Append to existing df
for column in columns_to_aggregate:
    columns = [column, groupby_column]
    huc08_load_reach_net_gdf[column] = reach_loads_gdf.loc[:,
        columns
    ].groupby(groupby_column, observed=True).sum()

In [66]:
huc08_load_reach_net_gdf

Unnamed: 0_level_0,huc08_name,geometry,comid,nord,in_drb,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,tn_load_ps_net,...,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
huc08,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2040101,Upper Delaware,"POLYGON ((-8304228.499 5229843.998, -8304203.8...",2619256,73297,True,308103.3782,952092.4,35757.10028,27205960.0,40100.48,...,-581189100.0,-11213870.0,-195129.651902,-581194400.0,-11213870.0,-195129.651902,-581194400.0,-11214350.0,-195382.235163,-581225800.0
2040102,East Branch Delaware,"POLYGON ((-8294284.604 5213730.686, -8294297.2...",1752159,74523,True,217471.8713,617257.8,34529.261632,27091060.0,9887.94,...,-534090400.0,-10578430.0,-180329.5405,-534090400.0,-10578430.0,-180329.5405,-534090400.0,-10578430.0,-180329.5405,-534090400.0
2040103,Lackawaxen,"POLYGON ((-8395173.834 5131609.270, -8395131.1...",2741462,72447,True,154757.2607,1300035.0,45820.913799,46658770.0,205786.3,...,-645102800.0,-12694880.0,-225291.882217,-645102800.0,-12694880.0,-225291.882217,-645102800.0,-12698020.0,-226248.088658,-645251700.0
2040104,Middle Delaware-Mongaup-Brodhead,"POLYGON ((-8290255.809 5165720.405, -8290181.2...",4154510,70222,True,395876.6918,1585567.0,76129.014901,79859780.0,153537.8,...,-794918600.0,-16004600.0,-275252.696538,-794918600.0,-16004600.0,-275252.696538,-794918600.0,-16005180.0,-275746.053256,-794960200.0
2040105,Middle Delaware-Musconetcong,"POLYGON ((-8318518.543 5039392.409, -8318515.1...",4481949,68818,True,351714.7924,3313779.0,233323.902527,149351800.0,702893.7,...,-335489700.0,-7071629.0,-24044.958715,-338625800.0,-7071961.0,-24212.314953,-338680900.0,-7074367.0,-26380.600108,-338800800.0
2040106,Lehigh,"POLYGON ((-8394794.527 5054626.737, -8394728.2...",4188251,74985,True,352414.9584,4541033.0,391484.859901,182147400.0,1491897.0,...,-790299100.0,-16300910.0,-162990.964326,-790403200.0,-16301160.0,-163013.379525,-790414600.0,-16304630.0,-165768.656214,-790861100.0
2040201,Crosswicks-Neshaminy,"POLYGON ((-8361112.101 4923606.956, -8361020.3...",4485575,68274,True,140175.697,1645240.0,86971.70484,89637970.0,638777.5,...,-99455410.0,-2766403.0,-29396.912204,-99455410.0,-2766403.0,-29396.912204,-99455410.0,-2767842.0,-30130.333177,-99490230.0
2040202,Lower Delaware,"POLYGON ((-8354536.834 4895108.912, -8354450.1...",24903452,65081,True,298967.9128,5734168.0,454886.2791,280430100.0,2547794.0,...,-132039600.0,-5037046.0,-56066.735608,-132173500.0,-5037244.0,-56077.609116,-132185600.0,-5038473.0,-56854.724552,-132230900.0
2040203,Schuylkill,"POLYGON ((-8453246.777 4995405.859, -8453181.3...",4784841,65459,True,494849.1119,14368870.0,739024.181293,308701300.0,4905242.0,...,-585014500.0,-8386807.0,31337.621626,-587425900.0,-8386807.0,31337.621626,-587425900.0,-8396228.0,26025.73096,-588167100.0
2040204,Delaware Bay,"POLYGON ((-8404100.919 4824476.609, -8404179.3...",24903800,63468,True,16366.2201,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
huc08_load_reach_net_gdf.tp_load_xsnps_net

huc08
02040101   -195106.703918
02040102   -180329.540500
02040103   -225291.882217
02040104   -275252.696538
02040105    -19555.483571
02040106   -162811.415815
02040201    -29396.912204
02040202    -55893.819685
02040203     38465.496417
02040204         0.000000
02040205    -40929.080840
02040206    -64014.214163
02040207     -6381.883721
02040301   -104590.249189
02040302    -55512.955909
Name: tp_load_xsnps_net, dtype: float64

In [68]:
# No negative values
huc08_load_reach_net_noneg_gdf = huc08_outlets_drwi_gdf.copy()
# Sum loads by HUC
groupby_column = 'huc08'
# Append to existing df
for column in columns_to_aggregate:
    columns = [column, groupby_column]
    huc08_load_reach_net_noneg_gdf[column] = reach_loads_noneg_gdf.loc[:,
        columns
    ].groupby(groupby_column, observed=True).sum()

In [69]:
huc08_load_reach_net_noneg_gdf.tp_load_xsnps_net

huc08
02040101       981.948471
02040102       239.518105
02040103      1733.486786
02040104      1394.418504
02040105     57084.683427
02040106     88839.313933
02040201     16267.680197
02040202     31581.645266
02040203    365185.500054
02040204         0.000000
02040205     56188.088117
02040206     32686.484866
02040207    105294.618157
02040301     22254.882048
02040302      4617.972809
Name: tp_load_xsnps_net, dtype: float64

## Sum by HUC10 in DRB

In [70]:
# Initialize GDF with HUC names other info
huc10_load_reach_net_gdf = huc10_outlets_drwi_gdf.copy()

In [71]:
# Sum loads by HUC at selected level
groupby_column = 'huc10'
# Append to existing df
for column in columns_to_aggregate:
    columns = [column, groupby_column]
    huc10_load_reach_net_gdf[column] = reach_loads_gdf.loc[:,
        columns
    ].groupby(groupby_column, observed=True).sum()

In [72]:
huc10_load_reach_net_gdf.head()

Unnamed: 0_level_0,huc10_name,geometry,comid,nord,in_drb,huc08,catchment_hectares,tn_load_net,tp_load_net,tss_load_net,...,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
huc10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
204010101,Upper West Branch Delaware River,"POLYGON ((-8304262.020 5228828.467, -8304276.2...",2612826,74277,True,2040101,51001.7886,155995.114089,11731.349791,7658411.0,...,-66244000.0,-1317070.0,-16297.379027,-66244000.0,-1317070.0,-16297.379027,-66244000.0,-1317070.0,-16297.379027,-66244000.0
204010102,Middle West Branch Delaware River,"POLYGON ((-8346041.487 5210211.202, -8345991.9...",2614138,74141,True,2040101,66948.2854,229407.642333,4838.545514,5090761.0,...,-97706940.0,-1848474.0,-34014.810588,-97706940.0,-1848474.0,-34014.810588,-97706940.0,-1848474.0,-34014.810588,-97706940.0
204010103,Lower West Branch Delaware River,"POLYGON ((-8386125.641 5192313.974, -8386205.0...",2617290,73934,True,2040101,54778.443,209207.239607,9532.192239,4631452.0,...,-128253500.0,-2439675.0,-40940.255315,-128259900.0,-2439675.0,-40940.255315,-128259900.0,-2439717.0,-40973.877047,-128263400.0
204010104,Upper Delaware River,"POLYGON ((-8358825.391 5150856.311, -8358625.2...",2616816,73702,True,2040101,42273.9588,118567.010649,1885.550003,1891606.0,...,-81964190.0,-1560786.0,-30452.810871,-81963760.0,-1560786.0,-30452.810871,-81963760.0,-1560873.0,-30503.912111,-81970150.0
204010105,Middle Delaware River,"POLYGON ((-8329217.537 5136231.813, -8329266.5...",2617486,73565,True,2040101,46119.2838,144201.091456,9603.89545,7008700.0,...,-101397200.0,-2016201.0,-31353.425132,-101397000.0,-2016201.0,-31353.425132,-101397000.0,-2016305.0,-31411.660627,-101403400.0


In [73]:
huc08_select = huc10_load_reach_net_gdf.huc08=='02040205' # Brandywine-Christina

huc10_load_reach_net_gdf.loc[huc08_select].tp_load_xsnps_net

huc10
0204020501     -576.781283
0204020502    14659.615472
0204020503    -2788.478230
0204020504   -24244.549870
0204020505   -25064.240447
0204020506     -799.675962
0204020507      755.884349
0204020508    -2870.854870
Name: tp_load_xsnps_net, dtype: float64

## Sum by HUC12

In [74]:
# Initialize GDF with HUC names other info
huc12_load_reach_net_gdf = huc12_outlets_drwi_gdf.copy()

In [75]:
# Sum loads by HUC at selected level
groupby_column = 'huc12'
# Append to existing df
for column in columns_to_aggregate:
    columns = [column, groupby_column]
    huc12_load_reach_net_gdf[column] = reach_loads_gdf.loc[:,
        columns
    ].groupby(groupby_column, observed=True).sum()

In [76]:
huc12_load_reach_net_gdf.head()

Unnamed: 0_level_0,huc12_name,geometry,centroid_xy,comid,nord,to_huc12,outlet_comid,from_huc12s,inlet_comids,outlet_comids,...,tss_load_xsnps_net,tn_load_rem1_net,tp_load_rem1_net,tss_load_rem1_net,tn_load_rem2_net,tp_load_rem2_net,tss_load_rem2_net,tn_load_rem3_net,tp_load_rem3_net,tss_load_rem3_net
huc12,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20401010101,Town Brook-Headwaters West Brach Delaware River,"POLYGON ((-8303725.462 5224646.990, -8303761.0...","[-74.62155936289159, 42.387091234041016]",2612792,74293,20401010102,2612792,,,[2612792],...,-10644440.0,-217020.271812,-2425.331527,-10644440.0,-217020.271812,-2425.331527,-10644440.0,-217020.271812,-2425.331527,-10644440.0
20401010102,Betty Brook-Headwaters West Brach Delaware River,"POLYGON ((-8315136.657 5225191.846, -8315097.2...","[-74.71393635968639, 42.38194565669812]",2612800,74290,20401010103,2612800,[020401010101],[2612792],"[2612800, 2612922]",...,-7759074.0,-151787.598208,-1695.274463,-7759074.0,-151787.598208,-1695.274463,-7759074.0,-151787.598208,-1695.274463,-7759074.0
20401010103,Rose Brook-Headwaters West Brach Delaware River,"POLYGON ((-8323990.577 5217953.339, -8323948.6...","[-74.71097819143394, 42.330665690562654]",2612808,74288,20401010104,2612808,[020401010102],[2612800],[2612808],...,-8760651.0,-149108.708569,-2015.23851,-8760651.0,-149108.708569,-2015.23851,-8760651.0,-149108.708569,-2015.23851,-8760651.0
20401010104,Elk Creek-Headwaters West Brach Delaware River,"POLYGON ((-8326727.279 5222215.417, -8326605.6...","[-74.82334627464569, 42.34506256688788]",2612820,74282,20401010106,2612820,[020401010103],[2612808],[2612820],...,-8984379.0,-166545.680774,-2106.386684,-8984379.0,-166545.680774,-2106.386684,-8984379.0,-166545.680774,-2106.386684,-8984379.0
20401010105,Upper Little Delaware River,"POLYGON ((-8319654.283 5208307.086, -8319607.8...","[-74.78436638151948, 42.27096486797448]",2612842,74311,20401010106,2612842,,,[2612842],...,-18794170.0,-390839.064157,-5253.900561,-18794170.0,-390839.064157,-5253.900561,-18794170.0,-390839.064157,-5253.900561,-18794170.0


In [77]:
huc08_select = huc12_load_reach_net_gdf.huc08=='02040205' # Brandywine-Christina
huc12_load_reach_net_gdf.loc[huc08_select].tp_load_net

huc12
020402050101     5370.242922
020402050102     3533.289396
020402050103    13477.489261
020402050104     7867.213518
020402050105     3441.894963
020402050201     2244.079011
020402050202    29550.836046
020402050203     6142.618227
020402050204     7291.936900
020402050205    -1917.106865
020402050301     4117.050407
020402050302     2099.136924
020402050303     8216.648375
020402050304     5535.408466
020402050305     1894.088814
020402050306      327.531313
020402050307     1705.518109
020402050308    -6688.196190
020402050401   -11588.859632
020402050402    -6339.361002
020402050403      283.673572
020402050501      894.299160
020402050502     6193.331526
020402050503    -5674.992486
020402050504      283.425800
020402050505      282.753437
020402050601      691.564171
020402050602     2537.790166
020402050701     2061.429644
020402050702    -1166.304158
020402050703    35584.631855
020402050704     1171.753504
020402050705     3203.117046
020402050801      812.429811
02040205

In [78]:
# No negative values
huc12_load_reach_net_noneg_gdf = huc12_outlets_drwi_gdf.copy()
# Sum loads by HUC
groupby_column = 'huc12'
# Append to existing df
for column in columns_to_aggregate:
    columns = [column, groupby_column]
    huc12_load_reach_net_noneg_gdf[column] = reach_loads_noneg_gdf.loc[:,
        columns
    ].groupby(groupby_column, observed=True).sum()

In [79]:
huc12_load_reach_net_noneg_gdf.loc[huc08_select].tp_load_net

huc12
020402050101     6849.568559
020402050102     4504.184383
020402050103    14719.436591
020402050104     8011.566876
020402050105    15258.144412
020402050201     2244.079011
020402050202    31137.688963
020402050203     6243.497394
020402050204     7291.936900
020402050205     3503.413726
020402050301     4117.050407
020402050302     2258.782338
020402050303     8665.769302
020402050304     5535.408466
020402050305     1894.088814
020402050306     1882.578393
020402050307     2030.581923
020402050308     2298.321743
020402050401     5462.396544
020402050402     2775.309292
020402050403      931.707133
020402050501      912.335037
020402050502     6195.599291
020402050503      642.530964
020402050504     1620.634763
020402050505    10782.385251
020402050601      707.932229
020402050602     2582.201776
020402050701     2064.477538
020402050702     1036.009086
020402050703    50370.677271
020402050704     1215.151961
020402050705     3811.405546
020402050801     1470.699632
02040205

# Save Reach Net Load Results

In [81]:

# Results by COMID
reach_loads_gdf.to_parquet(data_output_path /'reach_loads_gdf.parquet',compression='brotli')

# Aggregate by DRWI Geographies, for comparison to Pollution Assessment Stage 1 (PA1)
# As CSV files for easy import into Excel for final analysis similar to PA1.
drwi_load_reach_net_df.to_csv(data_output_path /'drwi_load_all_reach_net.csv')
drwi_load_noClus_reach_net_df.to_csv(data_output_path /'drwi_load_noClus_reach_net.csv')
drwi_load_drb_reach_net_df.to_csv(data_output_path /'drwi_load_drb_reach_net.csv')
cluster_load_reach_net_df.to_csv(data_output_path /'cluster_loads_reach_net.csv')
focusarea_load_reach_net_df.sort_values('cluster').to_csv(data_output_path /'focusarea_loads_byCluster_reach_net.csv')
cluster_load_noFA_reach_net_df.to_csv(data_output_path /'cluster_load_noFA_reach_net.csv')

# Aggregation by HUC, using Method 1 (Sum of Local Loads) similar to PA1
huc12_load_reach_net_gdf.to_parquet(data_output_path /'huc12_load_reach_net_gdf.parquet',compression='brotli')
huc10_load_reach_net_gdf.to_parquet(data_output_path /'huc10_load_reach_net_gdf.parquet',compression='brotli')
huc08_load_reach_net_gdf.to_parquet(data_output_path /'huc08_load_reach_net_gdf.parquet',compression='brotli')