# Days exceedence by basin

In [1]:
import sys
sys.path.insert(1, '../scripts/')
import xarray
import yaml
import numpy as np
import pandas
import pathlib
import time
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
# load functions from my scripts file "ssm_utils"
from ssm_utils import get_nearest_node, reshape_fvcom3D, calc_fvcom_stat, extract_fvcom_level

# Updated method using KC results

In [2]:
with open('../etc/SSM_config.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
# get shapefile path    
shp = ssm['paths']['shapefile']
# load shapefile into geopandas dataframe
gdf = gpd.read_file(shp)
DO_thresh = gdf['DO_std']
DO_thresh.shape

(7494,)

In [3]:
np_operator='min'
model_var='DOXG'
processed_netcdf_dir = pathlib.Path(ssm['paths']['processed_output'])/model_var

In [4]:
import os
dir_list = os.listdir(processed_netcdf_dir)

# Load all bottom DO values into dictionary

In [5]:
MinBottomDO={}
for run_dir in dir_list:
    try: 
        run_file=processed_netcdf_dir/run_dir/'bottom'/f'daily_{np_operator}_{model_var}_bottom.nc'
        with xarray.open_dataset(run_file) as ds:
            print([*ds])
            MinBottomDO[run_dir]=ds[f'{model_var}_daily_{np_operator}_bott']
            print(MinBottomDO[run_dir].shape)
    except FileNotFoundError:
        print(f'File Not Found: {run_file}')
        # create empty placeholder for now
        MinBottomDO[run_dir]=np.ones((366, 16012))*9999

['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)
['DOXG_daily_min_bott']
(366, 16012)


## Sub-sample basin footprint

In [6]:
%%time
MinBottDO_basins={}
for run_tag in dir_list:
    MinBottDO_basins[run_tag]=MinBottomDO[run_tag][:,gdf['node_id']-1]
    if np.min(MinBottDO_basins[run_tag])==9999:
        print(f'{run_tag} min value: {np.min(MinBottDO_basins[run_tag])}')
    else:
        print(f'{run_tag} min value: {np.min(MinBottDO_basins[run_tag].values)}')
    

1c_all_sog_riv_off min value: 2.3595900984219043e-06
2b_sog_river_2times min value: 2.3576199055241887e-06
wqm_baseline min value: 2.358590108997305e-06
2a_sog_river_0.5times min value: 2.3590901037096046e-06
1d_small_sog_wwtp_off min value: 2.358619894948788e-06
1e_med_sog_wwtp_off min value: 2.358679921599105e-06
1b_all_sog_wwtp_off min value: 2.358699930482544e-06
CPU times: user 8.54 s, sys: 1min 43s, total: 1min 52s
Wall time: 1min 53s


## Create boolean where MinDO < threshold

In [7]:
# nodataruns=[
#     '1c_all_sog_riv_off',
#     '2b_sog_river_2times',
#     '1d_small_sog_wwtp_off',
#     '1e_med_sog_wwtp_off'
# ]
nodataruns=[]

In [8]:
nnodes = len(DO_thresh)
ndays=366
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh2D = np.ones((nnodes,ndays))*np.array(DO_thresh).reshape(nnodes,1)
# (7494,365) x (7494,1) => element-wise multiplication
for run_type in dir_list:
    if run_type not in nodataruns:
        print(run_type)
        Anoxic = MinBottDO_basins[run_type].transpose()<=DO_thresh2D
        AnoxicDays[run_type]=Anoxic.sum(axis=1)

1c_all_sog_riv_off
2b_sog_river_2times
wqm_baseline
2a_sog_river_0.5times
1d_small_sog_wwtp_off
1e_med_sog_wwtp_off
1b_all_sog_wwtp_off


## Create pandas dataframe with Anoxic days by basin

In [9]:
TotalAnoxic={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in dir_list:
    if run_type not in nodataruns:
        TotalAnoxic[run_type]={}
        for basin in basins:
            TotalAnoxic[run_type][basin]=np.array(AnoxicDays[run_type])[
                (gdf['Basin']==basin) &
                (gdf['included_i']==1)
            ].sum()

In [10]:
TotalAnoxic_df=pandas.DataFrame(TotalAnoxic)
TotalAnoxic_df

Unnamed: 0,1c_all_sog_riv_off,2b_sog_river_2times,wqm_baseline,2a_sog_river_0.5times,1d_small_sog_wwtp_off,1e_med_sog_wwtp_off,1b_all_sog_wwtp_off
Hood_Canal,96063,96061,96059,96061,96059,96058,96057
Main_Basin,162307,162361,162333,162322,162333,162330,162328
SJF_Admiralty,227685,227680,227686,227687,227686,227685,227685
SOG_Bays,118743,119093,118918,118840,118915,118878,118876
South_Sound,129102,129223,129152,129125,129148,129143,129138
Whidbey_Basin,76321,76392,76355,76343,76354,76350,76350


## Create a dataframe showing percent change in days below threshold

In [11]:
# create empty dataframe to place baseline value and percent change for all other runs
PercentChangeAnoxic = pandas.DataFrame().reindex_like(TotalAnoxic_df)
PercentChangeAnoxic['wqm_baseline']=TotalAnoxic_df['wqm_baseline']

# create a list of the runs for which to calculate % change
change_list=TotalAnoxic_df.columns.to_list()
change_list.remove('wqm_baseline')
# calculate percent change
baseline=ssm['run_information']['baseline'] #defined as 'wqm_baseline'
for change in change_list:
    PercentChangeAnoxic[change]=100*(
        (TotalAnoxic_df[change]-TotalAnoxic_df[baseline])/TotalAnoxic_df[baseline]
    )
# PercentChangeAnoxic['1b_all_sog_wwtp_off']=100*(
#     (TotalAnoxic_df['1b_all_sog_wwtp_off']-TotalAnoxic_df['wqm_baseline'])/TotalAnoxic_df['wqm_baseline']
# )
PercentChangeAnoxic

Unnamed: 0,1c_all_sog_riv_off,2b_sog_river_2times,wqm_baseline,2a_sog_river_0.5times,1d_small_sog_wwtp_off,1e_med_sog_wwtp_off,1b_all_sog_wwtp_off
Hood_Canal,0.004164,0.002082,96059,0.002082,0.0,-0.001041,-0.002082
Main_Basin,-0.016016,0.017248,162333,-0.006776,0.0,-0.001848,-0.00308
SJF_Admiralty,-0.000439,-0.002635,227686,0.000439,0.0,-0.000439,-0.000439
SOG_Bays,-0.14716,0.14716,118918,-0.065591,-0.002523,-0.033637,-0.035318
South_Sound,-0.038714,0.054974,129152,-0.020906,-0.003097,-0.006969,-0.01084
Whidbey_Basin,-0.044529,0.048458,76355,-0.015716,-0.00131,-0.006548,-0.006548


In [12]:
PercentChangeAnoxic=PercentChangeAnoxic.rename(columns=ssm['run_information']['run_tag'])
PercentChangeAnoxic = PercentChangeAnoxic.reindex(columns=['Baseline','1b','1c','1d','1e','2a','2b'])
PercentChangeAnoxic

Unnamed: 0,Baseline,1b,1c,1d,1e,2a,2b
Hood_Canal,96059,-0.002082,0.004164,0.0,-0.001041,0.002082,0.002082
Main_Basin,162333,-0.00308,-0.016016,0.0,-0.001848,-0.006776,0.017248
SJF_Admiralty,227686,-0.000439,-0.000439,0.0,-0.000439,0.000439,-0.002635
SOG_Bays,118918,-0.035318,-0.14716,-0.002523,-0.033637,-0.065591,0.14716
South_Sound,129152,-0.01084,-0.038714,-0.003097,-0.006969,-0.020906,0.054974
Whidbey_Basin,76355,-0.006548,-0.044529,-0.00131,-0.006548,-0.015716,0.048458


In [13]:
col_short=PercentChangeAnoxic.columns.to_list()
col_short_wpercent=[f"{name} [% change]" for name in col_short if name!="Baseline"]
col_short_wpercent.insert(0,"Baseline")
col_title = {col_short[i]: col_short_wpercent[i] for i in range(len(col_short))}
col_title

{'Baseline': 'Baseline',
 '1b': '1b [% change]',
 '1c': '1c [% change]',
 '1d': '1d [% change]',
 '1e': '1e [% change]',
 '2a': '2a [% change]',
 '2b': '2b [% change]'}

In [14]:
PercentChangeAnoxic = PercentChangeAnoxic.rename(columns=col_title)
PercentChangeAnoxic

Unnamed: 0,Baseline,1b [% change],1c [% change],1d [% change],1e [% change],2a [% change],2b [% change]
Hood_Canal,96059,-0.002082,0.004164,0.0,-0.001041,0.002082,0.002082
Main_Basin,162333,-0.00308,-0.016016,0.0,-0.001848,-0.006776,0.017248
SJF_Admiralty,227686,-0.000439,-0.000439,0.0,-0.000439,0.000439,-0.002635
SOG_Bays,118918,-0.035318,-0.14716,-0.002523,-0.033637,-0.065591,0.14716
South_Sound,129152,-0.01084,-0.038714,-0.003097,-0.006969,-0.020906,0.054974
Whidbey_Basin,76355,-0.006548,-0.044529,-0.00131,-0.006548,-0.015716,0.048458


In [15]:
col_short=PercentChangeAnoxic.columns.to_list()
col_short_wpercent=[f'{name} [% change]' for name in col_short if name!="Baseline"]
col_short_wpercent.insert(0,"Baseline")
col_short_wpercent

['Baseline',
 '1b [% change] [% change]',
 '1c [% change] [% change]',
 '1d [% change] [% change]',
 '1e [% change] [% change]',
 '2a [% change] [% change]',
 '2b [% change] [% change]']

# Method using 2014 output below

In [16]:
output_dir = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data')
graphics_directory = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/graphics')

# Load netcdf of min daily DO on bottom level
These netcdf were output from `dev_TS_ExistRef_graphic.ipynb`

In [17]:
%%time
MinDO={}
idx=0
# loop through comparison cases and get timeseries from model output
for run_type in ['deviation','baseline']:
    print(run_type)
    xr=xarray.open_dataset(
        output_dir/f'dailyDO_tmin_bottom_{run_type}.nc'
    )
    MinDO[run_type]=xr.DailyMinBottomDO

deviation
baseline
CPU times: user 8.81 ms, sys: 7.25 ms, total: 16.1 ms
Wall time: 90.1 ms


In [18]:
MinDO[run_type].shape

(365, 16012)

# Load DO threshold information

In [19]:
# with open('../etc/SSM_config.yaml', 'r') as file:
#     ssm = yaml.safe_load(file)
# # get shapefile path    
# shp = ssm['shapefile_path']
# # load shapefile into geopandas dataframe
# gdf = gpd.read_file(shp)
# gdf.head(1)

KeyError: 'shapefile_path'

In [None]:
# DO_thresh = gdf['DO_std']
# DO_thresh.shape

# Sub-sample model output with shapefile nodes
Assumption: array index = node_id -1
Why?  node_id = [1,16012], array index = [0,16012)

In [20]:
MinDO[run_type].shape

(365, 16012)

In [21]:
%%time
MinDO_basins={}
for run_type in ['deviation','baseline']:
    run_type
    MinDO_basins[run_type]=MinDO[run_type][:,gdf['node_id']-1]

CPU times: user 2.01 ms, sys: 70 µs, total: 2.07 ms
Wall time: 2.11 ms


In [22]:
MinDO_basins[run_type].shape

(365, 7494)

# Create boolean where MinDO < threshold

In [23]:
nnodes = len(DO_thresh)
ndays=365
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh2D = np.ones((nnodes,ndays))*np.array(DO_thresh).reshape(nnodes,1)
# (7494,365) x (7494,1) => element-wise multiplication
for run_type in ['deviation','baseline']:
    run_type
    Anoxic = MinDO_basins[run_type].transpose()<=DO_thresh2D
    AnoxicDays[run_type]=Anoxic.sum(axis=1)

In [24]:
AnoxicDays[run_type].median()

# Create pandas dataframe with Anoxic days by basin

In [25]:
gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()

['Hood_Canal',
 'Main_Basin',
 'SJF_Admiralty',
 'SOG_Bays',
 'South_Sound',
 'Whidbey_Basin']

In [26]:
TotalAnoxic={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in ['deviation','baseline']:
    TotalAnoxic[run_type]={}
    for basin in basins:
        TotalAnoxic[run_type][basin]=np.array(AnoxicDays[run_type])[gdf['Basin']==basin].sum()

In [27]:
pandas.DataFrame(TotalAnoxic)

Unnamed: 0,deviation,baseline
Hood_Canal,222270,220923
Main_Basin,215083,203937
SJF_Admiralty,320815,320257
SOG_Bays,199846,199058
South_Sound,185479,173016
Whidbey_Basin,157313,147070


#### The above is total days where bottom DO < threshold, by basin, with "deviation" reflecting "existing" and "baseline" as "reference"

# Calculate volume days exceedence

In [28]:
%%time
# specify variable to plot
graphic_var = 'DO'
# get correspoding model variable name, Var_10, for "DO"
ssm_var_name = ssm['var_name'][graphic_var]

for run_type in ['deviation','baseline']:
    # input netcdf filename
    index = ssm['run_index'][run_type]
    path=pathlib.Path(
        ssm['output_paths'][index])/'s_hy_base000_pnnl007_nodes.nc'
    print(path.as_posix())
    # load variable into xarray and calculate daily min.
    with xarray.open_dataset(path) as ds:
        dailyDO = reshape_fvcom(
            ds[ssm_var_name][:,:].data, 
            'days'
        ) #return (365x24xnodes)
    # calculate daily minimum (365 x nodes)
    dailyDO_tmin = calc_fvcom_stat(dailyDO, 'min', axis=1)
    # reshape to levels
    dailyDO_tmin_rshp = reshape_fvcom(dailyDO_tmin, 'levels')
    # save to file
    xr_minDO=xarray.DataArray(dailyDO_tmin_rshp, name='DailyMinDO')
    xr_minDO.to_netcdf(output_dir/f'dailyDO_24hr_min_{run_type}.nc')

KeyError: 'var_name'

In [None]:
%%time
minDailyDO={}
for run_type in ['deviation','baseline']:
    netcdf_in = output_dir/f'dailyDO_24hr_min_{run_type}.nc'
    minDailyDO[run_type]=xarray.open_dataset(netcdf_in)

In [None]:
minDailyDO[run_type].DailyMinDO.shape

## Use `gdf[node_id]-1` to get array indices 

In [None]:
%%time
minDailyDO_shp={}
for run_type in ['deviation','baseline']:
    minDailyDO_shp[run_type]=minDailyDO[run_type].DailyMinDO[:,gdf['node_id']-1,:]

In [None]:
minDailyDO_shp[run_type].shape

In [None]:
minDailyDO_shp[run_type]

In [None]:
minDailyDO_shp[run_type].transpose('dim_1','dim_0','dim_2').shape

## Calculate days exceedence for all layers

In [None]:
DO_thresh = gdf['DO_std']
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)

In [None]:
DO_thresh3D.shape

In [None]:
DO_data = (DO_thresh3D>0)
DO_data.shape

In [None]:
DO_data = (DO_thresh3D>0)
for depth in range(0,10):
    plt.plot(DO_thresh3D[:,1,depth][DO_data[:,1,depth]],'*')

In [None]:
minDailyDO_shp[run_type].shape

In [None]:
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)

In [None]:
DO_thresh3D.shape

In [None]:
%%time
nnodes = len(DO_thresh)
ndays=365
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)# (7494,365) x (7494,1) => element-wise multiplication
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)
for run_type in ['deviation','baseline']:
    print(run_type)
    Anoxic = minDailyDO_shp[run_type]<=DO_thresh3D
    AnoxicDays[run_type]=Anoxic.sum(axis=0)

### Take the max boolean value across depth (i.e. 1 if there is(are) an anoxic level(s)) 

In [None]:
np.sum(ssm['siglev_diff'])

In [None]:
lyr_volume_np=(np.ones((10,7494))*gdf['volume'].values).T*ssm['siglev_diff']/100

In [None]:
lyr_volume_time_np=np.ones((365,7494,10))*lyr_volume_np

In [None]:
lyr_volume_time_np.shape

In [None]:
%%time
nnodes = len(DO_thresh)
ndays=365
AnoxicVolDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)# (7494,365) x (7494,1) => element-wise multiplication
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)
for run_type in ['deviation','baseline']:
    print(run_type)
    Anoxic = np.squeeze([minDailyDO_shp[run_type]<=DO_thresh3D]*lyr_volume_time_np)
    AnoxicVolDays[run_type]=Anoxic.sum(axis=0).sum(axis=1)

In [None]:
AnoxicVolDays[run_type].shape

In [None]:
TotalVolAnoxicDays={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in ['deviation','baseline']:
    TotalVolAnoxicDays[run_type]={}
    for basin in basins:
        TotalVolAnoxicDays[run_type][basin]=np.array(AnoxicVolDays[run_type])[gdf['Basin']==basin].sum()

In [None]:
pandas.DataFrame(TotalVolAnoxicDays).rename(columns={'deviation':'existing','baseline':'reference'})

###  Total volume [m^3] of depth levels with DO < DO_std in 2014

In [None]:
np.array(AnoxicDays[run_type])[test].sum()

In [None]:
np.array(AnoxicDays[run_type])[gdf['Basin']==basin].shape