PA2 Sandbox Notebook
===

A "sandbox" for developing and testing functions in the `src` directory.

Functions are organized in reverse chronological order of development, so that restarting the kernel and reruning to the current cell is as efficient as possible.

# Installation and Setup

Carefully follow our **[Installation Instructions](README.md#get-started)**.

## Import Python Dependencies

In [1]:
from pathlib import Path

import pandas    as pd
import geopandas as gpd

import warnings

# # geo packages
# from shapely.geometry import Polygon
# import contextily as ctx

# # packages for viz 
# import matplotlib
# import matplotlib.pyplot as plt
# from matplotlib_scalebar.scalebar import ScaleBar
# from  matplotlib.colors import LogNorm

In [2]:
# Confirm that this repo is in your Python Path
# !conda-develop /Users/aaufdenkampe/Documents/Python/pollution-assessment/src

In [3]:
# Custom functions for Pollution Assessment
import pollution_assessment as pa
import pollution_assessment.calc
import pollution_assessment.plot

## Set Paths and Open Files from Notebook 1


In [4]:
# Set your project directory to your local folder for your clone of this repository
project_path = Path.cwd().parent
project_path

PosixPath('/Users/aaufdenkampe/Documents/Python/pollution-assessment')

In [5]:
# Assign relative paths for the data OUTPUT folder.
pa2_data_output_folder = Path('stage2/data_output')

data_output_path = project_path /pa2_data_output_folder

## Read Notebook 1 Outputs

In [6]:
%%time
# read geometry data from GeoParquet files
reach_gdf = gpd.read_parquet(data_output_path /'reach_gdf.parquet')
catch_gdf = gpd.read_parquet(data_output_path /'catch_gdf.parquet')

CPU times: user 1.13 s, sys: 119 ms, total: 1.25 s
Wall time: 1.23 s


In [7]:
%%time
# read results data from Parquet files
reach_concs_df = pd.read_parquet(data_output_path /'reach_concs_df.parquet')
catch_loads_df = pd.read_parquet(data_output_path /'catch_loads_df.parquet')

CPU times: user 81.5 ms, sys: 19.5 ms, total: 101 ms
Wall time: 38.1 ms


# `pa.calc.add_xsnps()`

In [14]:
def add_xsnps(
    comid_type: str, 
    gdf: gpd.GeoDataFrame,
    df: pd.DataFrame,
) -> gpd.GeoDataFrame:
    """ Calculate excess non-point source pollution and add to the combined 
    PA2 results GeoDataFrame.
    
    excess nonpoint source pollution = excess pollution 
                                   – point source pollution

    Args:
        gdf: PA2 results GeoDataFrame with geometries for mapping 

    Returns:
        The input GeoDataFrame with three extra `_xs` columns added .
    """

    calc_suffix = 'xsnps'
    input_suffix = 'xs'

    if comid_type == 'reach':
        quantity_type = 'conc'
        ps_name = 'Point Source Derived Concentration'
    elif comid_type == 'catch':
        quantity_type = 'loadrate'
        ps_name = 'Point Sources'
    else:
        print("Error: comid_type must be 'reach' or 'catch'")

    df = select_run(comid_type, df, pa.calc.run_groups[0], ps=True)

    # for pollutant in pollutants.values():
    #     gdf[f'{pollutant}_{quantity_type}_{calc_suffix}'] = (
    #         gdf[f'{pollutant}_{quantity_type}'] 
    #         - targets[pollutant][f'{quantity_type}_target']
    #     )

    return df

In [None]:
add_xsnps('reach', reach_concs_gdf, reach_concs_df)


# `pa.calc.add_excess()`

In [8]:
# Open dictionary of Target Values
pa.calc.targets

{'tn': {'loadrate_target': 17.07, 'conc_target': 4.73},
 'tp': {'loadrate_target': 0.31, 'conc_target': 0.09},
 'tss': {'loadrate_target': 923.8, 'conc_target': 237.3}}

In [15]:
add_excess('catch', catch_loads_gdf)

NameError: name 'catch_loads_gdf' is not defined

# `pa.calc.add_loadrate()`

In [None]:
pa.calc.pollutants

{'TotalN': 'tn', 'TotalP': 'tp', 'Sediment': 'tss'}

In [None]:
pa.calc.pollutants['TotalP']

'tp'

In [None]:
catch_loads_gdf = pa.calc.join_results('catch', catch_gdf, catch_loads_df, 
                                       pa.calc.run_groups[0], ps=False)

pa.calc.add_loadrate(catch_loads_gdf)
catch_loads_gdf

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geom_catchment,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,...,Source,Sediment,TotalN,TotalP,run_group,funding_sources,with_attenuation,tn_loadrate,tp_loadrate,tss_loadrate
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1748535,6496.7052,6501.69,43.699,"MULTIPOLYGON (((-8301340.781 5199034.787, -830...",drb,,74914,74914,020401020302,1,...,Total Local Load,1.101612e+06,12680.544786,1189.608231,No restoration or protection,,True,1.951842,0.183109,169.564734
1748537,1663.1712,1664.46,11.189,"MULTIPOLYGON (((-8304909.314 5200051.727, -830...",drb,,74913,74913,020401020302,1,...,Total Local Load,2.013339e+05,3771.332143,363.366436,No restoration or protection,,True,2.267555,0.218478,121.054237
1748539,1639.4128,1640.70,11.223,"MULTIPOLYGON (((-8315191.630 5191704.467, -831...",drb,,74921,74921,020401020305,1,...,Total Local Load,2.514024e+05,3133.430355,357.227990,No restoration or protection,,True,1.911313,0.217900,153.349047
1748541,3013.8348,12912.30,86.528,"MULTIPOLYGON (((-8309824.403 5193427.492, -830...",drb,,74911,74915,020401020302,2,...,Total Local Load,6.177144e+05,6409.514442,668.969079,No restoration or protection,,True,2.126697,0.221966,204.959595
1748543,1151.0990,5232.87,35.389,"MULTIPOLYGON (((-8312514.529 5185023.831, -831...",drb,,74920,74922,020401020305,2,...,Total Local Load,1.989546e+05,2918.236825,317.461447,No restoration or protection,,True,2.535174,0.275790,172.838845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,2124.7248,2720941.47,17802.923,"MULTIPOLYGON (((-8403944.327 4826463.781, -840...",drb,,65070,76964,020402060103,7,...,Total Local Load,1.424324e+06,90140.391034,7976.492910,No restoration or protection,,True,42.424502,3.754130,670.356920
932040367,788.7859,2717821.26,17788.281,"MULTIPOLYGON (((-8400739.269 4832000.931, -840...",drb,,65079,76964,020402060103,7,...,Total Local Load,4.208693e+05,20931.087768,1732.330415,No restoration or protection,,True,26.535829,2.196199,533.565995
932040368,265.0275,2716120.08,17780.448,"MULTIPOLYGON (((-8399608.027 4833463.133, -839...",drb,,65080,76960,020402060103,7,...,Total Local Load,7.518927e+04,188.869267,19.006829,No restoration or protection,,True,0.712640,0.071716,283.703659
932040369,1106.5294,2889095.67,18624.999,"MULTIPOLYGON (((-8409371.984 4816335.622, -840...",drb,,64232,76965,020402040000,7,...,Total Local Load,2.122482e+06,1234.178951,222.587694,No restoration or protection,,True,1.115360,0.201158,1918.143399


In [None]:
warnings.filterwarnings(
    'ignore', 
    message='.*Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0.*'
)
warnings.filterwarnings(
    'ignore', 
    message='.*__len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0.*'
)

In [None]:
catch_loads_gdf.loc[4648450]

catchment_hectares                                             263.4373
watershed_hectares                                               263.61
maflowv                                                           1.412
geom_catchment        (POLYGON ((-8449229.677149855 4880762.40560297...
cluster                                        Brandywine and Christina
sub_focusarea                                                      <NA>
nord                                                              64639
nordstop                                                          64639
huc12                                                      020402050202
streamorder                                                           1
headwater                                                             1
phase                                                           Phase 2
fa_name                                     Brandywine Creek Headwaters
Source                                                 Total Loc

# `pa.calc.join_results()`

In [None]:
pa.calc.join_results('reach', reach_gdf, reach_concs_df, 
                     pa.calc.run_groups[0], ps=True)

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geom,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,headwater,phase,fa_name,Source,Sediment,TotalN,TotalP,run_group,funding_sources,with_attenuation
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1748535,6496.7052,6501.69,43.699,MULTILINESTRING Z ((-8295323.930 5214456.622 0...,drb,,74914,74914,020401020302,1,1,,,Point Source Derived Concentration,0.0,0.0,0.0,No restoration or protection,,True
1748537,1663.1712,1664.46,11.189,MULTILINESTRING Z ((-8304623.226 5207684.737 0...,drb,,74913,74913,020401020302,1,1,,,Point Source Derived Concentration,0.0,0.0,0.0,No restoration or protection,,True
1748539,1639.4128,1640.70,11.223,MULTILINESTRING Z ((-8316446.558 5197994.113 0...,drb,,74921,74921,020401020305,1,1,,,Point Source Derived Concentration,0.0,0.0,0.0,No restoration or protection,,True
1748541,3013.8348,12912.30,86.528,MULTILINESTRING Z ((-8304282.841 5198049.613 0...,drb,,74911,74915,020401020302,2,0,,,Point Source Derived Concentration,0.0,0.0,0.0,No restoration or protection,,True
1748543,1151.0990,5232.87,35.389,MULTILINESTRING Z ((-8312991.936 5192442.779 0...,drb,,74920,74922,020401020305,2,0,,,Point Source Derived Concentration,0.0,0.0,0.0,No restoration or protection,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,2124.7248,2720941.47,17802.923,MULTILINESTRING Z ((-8400739.070 4831969.993 0...,drb,,65070,76964,020402060103,7,0,,,Point Source Derived Concentration,,,,No restoration or protection,,True
932040367,788.7859,2717821.26,17788.281,MULTILINESTRING Z ((-8399585.343 4833380.786 0...,drb,,65079,76964,020402060103,7,0,,,Point Source Derived Concentration,,,,No restoration or protection,,True
932040368,265.0275,2716120.08,17780.448,MULTILINESTRING Z ((-8398343.469 4834781.918 0...,drb,,65080,76960,020402060103,7,0,,,Point Source Derived Concentration,,,,No restoration or protection,,True
932040369,1106.5294,2889095.67,18624.999,MULTILINESTRING Z ((-8406760.425 4820639.687 0...,drb,,64232,76965,020402040000,7,0,,,Point Source Derived Concentration,,,,No restoration or protection,,True


In [None]:
pa.calc.join_results('catch', catch_gdf, catch_loads_df, 
                       pa.calc.run_groups[0], ps=False)

Unnamed: 0_level_0,catchment_hectares,watershed_hectares,maflowv,geom_catchment,cluster,sub_focusarea,nord,nordstop,huc12,streamorder,headwater,phase,fa_name,Source,Sediment,TotalN,TotalP,run_group,funding_sources,with_attenuation
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1748535,6496.7052,6501.69,43.699,"MULTIPOLYGON (((-8301340.781 5199034.787, -830...",drb,,74914,74914,020401020302,1,1,,,Total Local Load,1.101612e+06,12680.544786,1189.608231,No restoration or protection,,True
1748537,1663.1712,1664.46,11.189,"MULTIPOLYGON (((-8304909.314 5200051.727, -830...",drb,,74913,74913,020401020302,1,1,,,Total Local Load,2.013339e+05,3771.332143,363.366436,No restoration or protection,,True
1748539,1639.4128,1640.70,11.223,"MULTIPOLYGON (((-8315191.630 5191704.467, -831...",drb,,74921,74921,020401020305,1,1,,,Total Local Load,2.514024e+05,3133.430355,357.227990,No restoration or protection,,True
1748541,3013.8348,12912.30,86.528,"MULTIPOLYGON (((-8309824.403 5193427.492, -830...",drb,,74911,74915,020401020302,2,0,,,Total Local Load,6.177144e+05,6409.514442,668.969079,No restoration or protection,,True
1748543,1151.0990,5232.87,35.389,"MULTIPOLYGON (((-8312514.529 5185023.831, -831...",drb,,74920,74922,020401020305,2,0,,,Total Local Load,1.989546e+05,2918.236825,317.461447,No restoration or protection,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
932040366,2124.7248,2720941.47,17802.923,"MULTIPOLYGON (((-8403944.327 4826463.781, -840...",drb,,65070,76964,020402060103,7,0,,,Total Local Load,1.424324e+06,90140.391034,7976.492910,No restoration or protection,,True
932040367,788.7859,2717821.26,17788.281,"MULTIPOLYGON (((-8400739.269 4832000.931, -840...",drb,,65079,76964,020402060103,7,0,,,Total Local Load,4.208693e+05,20931.087768,1732.330415,No restoration or protection,,True
932040368,265.0275,2716120.08,17780.448,"MULTIPOLYGON (((-8399608.027 4833463.133, -839...",drb,,65080,76960,020402060103,7,0,,,Total Local Load,7.518927e+04,188.869267,19.006829,No restoration or protection,,True
932040369,1106.5294,2889095.67,18624.999,"MULTIPOLYGON (((-8409371.984 4816335.622, -840...",drb,,64232,76965,020402040000,7,0,,,Total Local Load,2.122482e+06,1234.178951,222.587694,No restoration or protection,,True


# `pa.calc.select_run()`

In [None]:
pa.calc.run_groups

{0: 'No restoration or protection',
 1: 'Direct WPF Restoration',
 2: 'Direct and Indirect WPF Restoration',
 3: 'All Restoration',
 4: 'Direct WPF Protection'}

In [None]:
pa.calc.run_groups[0]

'No restoration or protection'

In [None]:
for group in pa.calc.run_groups.values():
    print(group)

No restoration or protection
Direct WPF Restoration
Direct and Indirect WPF Restoration
All Restoration
Direct WPF Protection


In [None]:
pa.calc.select_run('catch', catch_loads_df, pa.calc.run_groups[0], ps=False)

Unnamed: 0_level_0,Source,Sediment,TotalN,TotalP,huc,gwlfe_endpoint,huc_level,run_group,funding_sources,with_attenuation
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2612780,Total Local Load,30060.129179,413.591250,39.349002,020401010101,wikiSRAT,12,No restoration or protection,,True
2612782,Total Local Load,62416.843740,941.392349,70.116676,020401010101,wikiSRAT,12,No restoration or protection,,True
2612792,Total Local Load,24783.144045,475.462392,56.373437,020401010101,wikiSRAT,12,No restoration or protection,,True
2612794,Total Local Load,504556.037460,10732.339874,1024.816694,020401010101,wikiSRAT,12,No restoration or protection,,True
2612920,Total Local Load,729581.146093,9437.277029,925.627843,020401010101,wikiSRAT,12,No restoration or protection,,True
...,...,...,...,...,...,...,...,...,...,...
9891532,Total Local Load,20732.138827,319.088502,14.142305,020403040501,wikiSRAT,12,No restoration or protection,,True
10466473,Total Local Load,33825.045694,62.848284,3.391234,020403040501,wikiSRAT,12,No restoration or protection,,True
10466475,Total Local Load,157200.311620,321.131969,17.447391,020403040501,wikiSRAT,12,No restoration or protection,,True
10466691,Total Local Load,1225.353589,1.340191,0.146257,020403040501,wikiSRAT,12,No restoration or protection,,True


In [None]:
pa.calc.select_run('reach', reach_concs_df, pa.calc.run_groups[0], ps=True)

Unnamed: 0_level_0,Source,Sediment,TotalN,TotalP,huc,gwlfe_endpoint,huc_level,run_group,funding_sources,with_attenuation
comid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2612780,Point Source Derived Concentration,0.0,0.0,0.0,020401010101,wikiSRAT,12,No restoration or protection,,True
2612782,Point Source Derived Concentration,0.0,0.0,0.0,020401010101,wikiSRAT,12,No restoration or protection,,True
2612792,Point Source Derived Concentration,0.0,0.0,0.0,020401010101,wikiSRAT,12,No restoration or protection,,True
2612794,Point Source Derived Concentration,0.0,0.0,0.0,020401010101,wikiSRAT,12,No restoration or protection,,True
2612920,Point Source Derived Concentration,0.0,0.0,0.0,020401010101,wikiSRAT,12,No restoration or protection,,True
...,...,...,...,...,...,...,...,...,...,...
9891532,Point Source Derived Concentration,0.0,0.0,0.0,020403040501,wikiSRAT,12,No restoration or protection,,True
10466473,Point Source Derived Concentration,0.0,0.0,0.0,020403040501,wikiSRAT,12,No restoration or protection,,True
10466475,Point Source Derived Concentration,0.0,0.0,0.0,020403040501,wikiSRAT,12,No restoration or protection,,True
10466691,Point Source Derived Concentration,0.0,0.0,0.0,020403040501,wikiSRAT,12,No restoration or protection,,True
