# Days exceedence by basin

In [1]:
import sys
sys.path.insert(1, '../scripts/')
import xarray
import yaml
import numpy as np
import pandas
import pathlib
import time
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
# load functions from my scripts file "ssm_utils"
from ssm_utils import get_nearest_node, reshape_fvcom, calc_fvcom_stat, extract_fvcom_level

In [2]:
output_dir = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data')
graphics_directory = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/graphics')

# Load netcdf of min daily DO on bottom level
These netcdf were output from `dev_TS_ExistRef_graphic.ipynb`

In [3]:
%%time
MinDO={}
idx=0
# loop through comparison cases and get timeseries from model output
for run_type in ['deviation','baseline']:
    print(run_type)
    xr=xarray.open_dataset(
        output_dir/f'dailyDO_tmin_bottom_{run_type}.nc'
    )
    MinDO[run_type]=xr.DailyMinBottomDO

deviation
baseline
CPU times: user 21.5 ms, sys: 26.5 ms, total: 48 ms
Wall time: 113 ms


In [4]:
MinDO[run_type].shape

(365, 16012)

# Load DO threshold information

In [5]:
with open('../etc/SSM_config.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
# get shapefile path    
shp = ssm['shapefile_path']
# load shapefile into geopandas dataframe
gdf = gpd.read_file(shp)
gdf.head(1)

Unnamed: 0,node_id,Basin,uncategori,x,y,x_shp,y_shp,lat,lon,depth,included_i,DO_std,volume,area,Shape_Leng,Shape_Area,geometry
0,1249,SJF_Admiralty,,380473.98,5365288.5,380500.860294,5365280.0,48.429395,-124.615801,160.628998,1,7,1824350000.0,11357538.53,13249.778795,11357540.0,"POLYGON ((378570.860 5364434.875, 378213.530 5..."


In [39]:
DO_thresh = gdf['DO_std']
DO_thresh.shape

(7494,)

# Sub-sample model output with shapefile nodes
Assumption: array index = node_id -1
Why?  node_id = [1,16012], array index = [0,16012)

In [11]:
MinDO[run_type].shape

(365, 16012)

In [17]:
%%time
MinDO_basins={}
for run_type in ['deviation','baseline']:
    run_type
    MinDO_basins[run_type]=MinDO[run_type][:,gdf['node_id']-1]

CPU times: user 2.41 ms, sys: 0 ns, total: 2.41 ms
Wall time: 2.44 ms


In [18]:
MinDO_basins[run_type].shape

(365, 7494)

# Create boolean where MinDO < threshold

In [48]:
nnodes = len(DO_thresh)
ndays=365
AnoxicDays={}
# create array of DO_threshold values to determine number of days of noncompliance
DO_thresh2D = np.ones((nnodes,ndays))*np.array(DO_thresh).reshape(nnodes,1)
# (7494,365) x (7494,1) => element-wise multiplication
for run_type in ['deviation','baseline']:
    run_type
    Anoxic = MinDO_basins[run_type].transpose()<=DO_thresh2D
    AnoxicDays[run_type]=Anoxic.sum(axis=1)

In [50]:
AnoxicDays[run_type].median()

# Create pandas dataframe with Anoxic days by basin

In [61]:
gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()

['Hood_Canal',
 'Main_Basin',
 'SJF_Admiralty',
 'SOG_Bays',
 'South_Sound',
 'Whidbey_Basin']

In [76]:
TotalAnoxic={}
basins = gdf[['node_id','Basin']].groupby('Basin').count().index.to_list()
for run_type in ['deviation','baseline']:
    TotalAnoxic[run_type]={}
    for basin in basins:
        TotalAnoxic[run_type][basin]=np.array(AnoxicDays[run_type])[gdf['Basin']==basin].sum()

In [77]:
pandas.DataFrame(TotalAnoxic)

Unnamed: 0,deviation,baseline
Hood_Canal,222270,220923
Main_Basin,215083,203937
SJF_Admiralty,320815,320257
SOG_Bays,199846,199058
South_Sound,185479,173016
Whidbey_Basin,157313,147070


#### The above is total days where bottom DO < threshold, by basin, with "deviation" reflecting "existing" and "baseline" as "reference"

In [65]:
test = (gdf['Basin']==basin)
test.shape

(7494,)

In [74]:
np.array(AnoxicDays[run_type])[test].sum()

220923

In [None]:
np.array(AnoxicDays[run_type])[gdf['Basin']==basin].shape