# Days exceedence by basin

In [1]:
import sys
sys.path.insert(1, '../scripts/')
import xarray
import yaml
import numpy as np
import pandas
import pathlib
import time
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
# load functions from my scripts file "ssm_utils"
from ssm_utils import get_nearest_node, reshape_fvcom3D, calc_fvcom_stat, extract_fvcom_level

# Updated method using KC results

In [2]:
processed_netcdf_dir = pathlib.Path(
    '/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data/slurm_array/DOXG'
) 

In [3]:
import os
dir_list = os.listdir(processed_netcdf_dir)

# Load all bottom DO values into dictionary

In [4]:
MinBottomDO={}
for run_dir in dir_list:
    try: 
        run_file=processed_netcdf_dir/run_dir/'bottom'/'dailyDO_24hrmin_bottom.nc'
        with xarray.open_dataset(run_file) as ds:
            print([*ds])
            MinBottomDO[run_dir]=ds.DOXG24hrMinBott
            print(MinBottomDO[run_dir].shape)
    except FileNotFoundError:
        print(f'File Not Found: {run_file}')
        # create empty placeholder for now
        MinBottomDO[run_dir]=np.ones((366, 16012))*9999

File Not Found: /mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data/slurm_array/DOXG/1c_all_sog_riv_off/bottom/dailyDO_24hrmin_bottom.nc
File Not Found: /mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data/slurm_array/DOXG/2b_sog_river_2times/bottom/dailyDO_24hrmin_bottom.nc
['DOXG24hrMinBott']
(366, 16012)
['DOXG24hrMinBott']
(366, 16012)
File Not Found: /mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data/slurm_array/DOXG/1d_small_sog_wwtp_off/bottom/dailyDO_24hrmin_bottom.nc
File Not Found: /mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data/slurm_array/DOXG/1e_med_sog_wwtp_off/bottom/dailyDO_24hrmin_bottom.nc
['DOXG24hrMinBott']
(366, 16012)


## Load DO threshold information

In [5]:
with open('../etc/SSM_config.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
# get shapefile path    
shp = ssm['shapefile_path']
# load shapefile into geopandas dataframe
gdf = gpd.read_file(shp)
DO_thresh = gdf['DO_std']
DO_thresh.shape

(7494,)

In [6]:
## Sub-sample basin footprint

In [7]:
%%time
MinBottDO_basins={}
for run_tag in dir_list:
    MinBottDO_basins[run_tag]=MinBottomDO[run_tag][:,gdf['node_id']-1]
    if np.min(MinBottDO_basins[run_tag])==9999:
        print(f'{run_tag} min value: {np.min(MinBottDO_basins[run_tag])}')
    else:
        print(f'{run_tag} min value: {np.min(MinBottDO_basins[run_tag].values)}')
    

1c_all_sog_riv_off min value: 9999.0
2b_sog_river_2times min value: 9999.0
wqm_baseline min value: 2.358590108997305e-06
2a_sog_river_0.5times min value: 2.3590901037096046e-06
1d_small_sog_wwtp_off min value: 9999.0
1e_med_sog_wwtp_off min value: 9999.0
1b_all_sog_wwtp_off min value: 2.358699930482544e-06
CPU times: user 2.1 s, sys: 25.4 s, total: 27.5 s
Wall time: 27.6 s


## Create boolean where MinDO < threshold

In [8]:
nodataruns=[
    '1c_all_sog_riv_off',
    '2b_sog_river_2times',
    '1d_small_sog_wwtp_off',
    '1e_med_sog_wwtp_off'
]

In [9]:
nnodes = len(DO_thresh)
ndays=366
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh2D = np.ones((nnodes,ndays))*np.array(DO_thresh).reshape(nnodes,1)
# (7494,365) x (7494,1) => element-wise multiplication
for run_type in dir_list:
    if run_type not in nodataruns:
        print(run_type)
        Anoxic = MinBottDO_basins[run_type].transpose()<=DO_thresh2D
        AnoxicDays[run_type]=Anoxic.sum(axis=1)

wqm_baseline
2a_sog_river_0.5times
1b_all_sog_wwtp_off


## Create pandas dataframe with Anoxic days by basin

In [10]:
TotalAnoxic={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in dir_list:
    if run_type not in nodataruns:
        TotalAnoxic[run_type]={}
        for basin in basins:
            TotalAnoxic[run_type][basin]=np.array(AnoxicDays[run_type])[gdf['Basin']==basin].sum()

In [11]:
TotalAnoxic_df=pandas.DataFrame(TotalAnoxic)
TotalAnoxic_df

Unnamed: 0,wqm_baseline,2a_sog_river_0.5times,1b_all_sog_wwtp_off
Hood_Canal,162388,162389,162385
Main_Basin,201416,201396,201407
SJF_Admiralty,276530,276532,276529
SOG_Bays,163319,163082,163222
South_Sound,183009,182968,182993
Whidbey_Basin,152093,152052,152076


## Create a dataframe showing percent change in days below threshold

In [12]:
PercentChangeAnoxic = pandas.DataFrame().reindex_like(TotalAnoxic_df)
PercentChangeAnoxic['wqm_baseline']=TotalAnoxic_df['wqm_baseline']
PercentChangeAnoxic['2a_sog_river_0.5times']=100*(
    (TotalAnoxic_df['2a_sog_river_0.5times']-TotalAnoxic_df['wqm_baseline'])/TotalAnoxic_df['wqm_baseline']
)
PercentChangeAnoxic['1b_all_sog_wwtp_off']=100*(
    (TotalAnoxic_df['1b_all_sog_wwtp_off']-TotalAnoxic_df['wqm_baseline'])/TotalAnoxic_df['wqm_baseline']
)
PercentChangeAnoxic=PercentChangeAnoxic.rename(columns={'wqm_baseline':'Baseline','2a_sog_river_0.5times':'2a[% change]','1b_all_sog_wwtp_off':'1b[% change]'})
PercentChangeAnoxic

Unnamed: 0,Baseline,2a[% change],1b[% change]
Hood_Canal,162388,0.000616,-0.001847
Main_Basin,201416,-0.00993,-0.004468
SJF_Admiralty,276530,0.000723,-0.000362
SOG_Bays,163319,-0.145115,-0.059393
South_Sound,183009,-0.022403,-0.008743
Whidbey_Basin,152093,-0.026957,-0.011177


# Method using 2014 output below

In [13]:
output_dir = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data')
graphics_directory = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/graphics')

# Load netcdf of min daily DO on bottom level
These netcdf were output from `dev_TS_ExistRef_graphic.ipynb`

In [16]:
%%time
MinDO={}
idx=0
# loop through comparison cases and get timeseries from model output
for run_type in ['deviation','baseline']:
    print(run_type)
    xr=xarray.open_dataset(
        output_dir/f'dailyDO_tmin_bottom_{run_type}.nc'
    )
    MinDO[run_type]=xr.DailyMinBottomDO

deviation
baseline
CPU times: user 10.4 ms, sys: 6.28 ms, total: 16.6 ms
Wall time: 105 ms


In [17]:
MinDO[run_type].shape

(365, 16012)

# Load DO threshold information

In [18]:
with open('../etc/SSM_config.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
# get shapefile path    
shp = ssm['shapefile_path']
# load shapefile into geopandas dataframe
gdf = gpd.read_file(shp)
gdf.head(1)

Unnamed: 0,node_id,Basin,uncategori,x,y,x_shp,y_shp,lat,lon,depth,included_i,DO_std,volume,area,Shape_Leng,Shape_Area,geometry
0,1249,SJF_Admiralty,,380473.98,5365288.5,380500.860294,5365280.0,48.429395,-124.615801,160.628998,1,7,1824350000.0,11357538.53,13249.778795,11357540.0,"POLYGON ((378570.860 5364434.875, 378213.530 5..."


In [19]:
DO_thresh = gdf['DO_std']
DO_thresh.shape

(7494,)

# Sub-sample model output with shapefile nodes
Assumption: array index = node_id -1
Why?  node_id = [1,16012], array index = [0,16012)

In [20]:
MinDO[run_type].shape

(365, 16012)

In [21]:
%%time
MinDO_basins={}
for run_type in ['deviation','baseline']:
    run_type
    MinDO_basins[run_type]=MinDO[run_type][:,gdf['node_id']-1]

CPU times: user 2.02 ms, sys: 101 µs, total: 2.12 ms
Wall time: 2.15 ms


In [22]:
MinDO_basins[run_type].shape

(365, 7494)

# Create boolean where MinDO < threshold

In [23]:
nnodes = len(DO_thresh)
ndays=365
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh2D = np.ones((nnodes,ndays))*np.array(DO_thresh).reshape(nnodes,1)
# (7494,365) x (7494,1) => element-wise multiplication
for run_type in ['deviation','baseline']:
    run_type
    Anoxic = MinDO_basins[run_type].transpose()<=DO_thresh2D
    AnoxicDays[run_type]=Anoxic.sum(axis=1)

In [24]:
AnoxicDays[run_type].median()

# Create pandas dataframe with Anoxic days by basin

In [25]:
gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()

['Hood_Canal',
 'Main_Basin',
 'SJF_Admiralty',
 'SOG_Bays',
 'South_Sound',
 'Whidbey_Basin']

In [26]:
TotalAnoxic={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in ['deviation','baseline']:
    TotalAnoxic[run_type]={}
    for basin in basins:
        TotalAnoxic[run_type][basin]=np.array(AnoxicDays[run_type])[gdf['Basin']==basin].sum()

In [27]:
pandas.DataFrame(TotalAnoxic)

Unnamed: 0,deviation,baseline
Hood_Canal,222270,220923
Main_Basin,215083,203937
SJF_Admiralty,320815,320257
SOG_Bays,199846,199058
South_Sound,185479,173016
Whidbey_Basin,157313,147070


#### The above is total days where bottom DO < threshold, by basin, with "deviation" reflecting "existing" and "baseline" as "reference"

# Calculate volume days exceedence

In [28]:
%%time
# specify variable to plot
graphic_var = 'DO'
# get correspoding model variable name, Var_10, for "DO"
ssm_var_name = ssm['var_name'][graphic_var]

for run_type in ['deviation','baseline']:
    # input netcdf filename
    index = ssm['run_index'][run_type]
    path=pathlib.Path(
        ssm['output_paths'][index])/'s_hy_base000_pnnl007_nodes.nc'
    print(path.as_posix())
    # load variable into xarray and calculate daily min.
    with xarray.open_dataset(path) as ds:
        dailyDO = reshape_fvcom(
            ds[ssm_var_name][:,:].data, 
            'days'
        ) #return (365x24xnodes)
    # calculate daily minimum (365 x nodes)
    dailyDO_tmin = calc_fvcom_stat(dailyDO, 'min', axis=1)
    # reshape to levels
    dailyDO_tmin_rshp = reshape_fvcom(dailyDO_tmin, 'levels')
    # save to file
    xr_minDO=xarray.DataArray(dailyDO_tmin_rshp, name='DailyMinDO')
    xr_minDO.to_netcdf(output_dir/f'dailyDO_24hr_min_{run_type}.nc')

KeyError: 'var_name'

In [29]:
%%time
minDailyDO={}
for run_type in ['deviation','baseline']:
    netcdf_in = output_dir/f'dailyDO_24hr_min_{run_type}.nc'
    minDailyDO[run_type]=xarray.open_dataset(netcdf_in)

CPU times: user 7.63 ms, sys: 7.16 ms, total: 14.8 ms
Wall time: 100 ms


In [30]:
minDailyDO[run_type].DailyMinDO.shape

(365, 16012, 10)

## Use `gdf[node_id]-1` to get array indices 

In [None]:
%%time
minDailyDO_shp={}
for run_type in ['deviation','baseline']:
    minDailyDO_shp[run_type]=minDailyDO[run_type].DailyMinDO[:,gdf['node_id']-1,:]

In [None]:
minDailyDO_shp[run_type].shape

In [None]:
minDailyDO_shp[run_type]

In [None]:
minDailyDO_shp[run_type].transpose('dim_1','dim_0','dim_2').shape

## Calculate days exceedence for all layers

In [None]:
DO_thresh = gdf['DO_std']
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)

In [None]:
DO_thresh3D.shape

In [None]:
DO_data = (DO_thresh3D>0)
DO_data.shape

In [None]:
DO_data = (DO_thresh3D>0)
for depth in range(0,10):
    plt.plot(DO_thresh3D[:,1,depth][DO_data[:,1,depth]],'*')

In [None]:
minDailyDO_shp[run_type].shape

In [None]:
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)

In [None]:
DO_thresh3D.shape

In [None]:
%%time
nnodes = len(DO_thresh)
ndays=365
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)# (7494,365) x (7494,1) => element-wise multiplication
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)
for run_type in ['deviation','baseline']:
    print(run_type)
    Anoxic = minDailyDO_shp[run_type]<=DO_thresh3D
    AnoxicDays[run_type]=Anoxic.sum(axis=0)

### Take the max boolean value across depth (i.e. 1 if there is(are) an anoxic level(s)) 

In [None]:
np.sum(ssm['siglev_diff'])

In [None]:
lyr_volume_np=(np.ones((10,7494))*gdf['volume'].values).T*ssm['siglev_diff']/100

In [None]:
lyr_volume_time_np=np.ones((365,7494,10))*lyr_volume_np

In [None]:
lyr_volume_time_np.shape

In [None]:
%%time
nnodes = len(DO_thresh)
ndays=365
AnoxicVolDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh3D = np.ones((nnodes,ndays,10))*np.array(DO_thresh).reshape(nnodes,1,1)# (7494,365) x (7494,1) => element-wise multiplication
DO_thresh3D=DO_thresh3D.reshape(365,7494,10)
for run_type in ['deviation','baseline']:
    print(run_type)
    Anoxic = np.squeeze([minDailyDO_shp[run_type]<=DO_thresh3D]*lyr_volume_time_np)
    AnoxicVolDays[run_type]=Anoxic.sum(axis=0).sum(axis=1)

In [None]:
AnoxicVolDays[run_type].shape

In [None]:
TotalVolAnoxicDays={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in ['deviation','baseline']:
    TotalVolAnoxicDays[run_type]={}
    for basin in basins:
        TotalVolAnoxicDays[run_type][basin]=np.array(AnoxicVolDays[run_type])[gdf['Basin']==basin].sum()

In [None]:
pandas.DataFrame(TotalVolAnoxicDays).rename(columns={'deviation':'existing','baseline':'reference'})

###  Total volume [m^3] of depth levels with DO < DO_std in 2014

In [None]:
np.array(AnoxicDays[run_type])[test].sum()

In [None]:
np.array(AnoxicDays[run_type])[gdf['Basin']==basin].shape