In [1]:
import sys
import os
import yaml
import numpy as np
import pandas
import pathlib
import time
from datetime import date
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
from matplotlib.ticker import MaxNLocator
import matplotlib as mpl
import cmocean as cm

import xarray
from matplotlib.offsetbox import AnchoredText
sys.path.insert(1, '../scripts/')
from ssm_utils import reshape_fvcom

In [None]:
output_dir = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/data')

In [2]:
with open('../etc/SSM_config_KingCounty.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
    # get shapefile path    
    shp = ssm['paths']['shapefile']


mpl.rc('font', size=11)
# some of the following may be repetetive but can also be set relative to the font value above 
#    (eg "xx-small, x-small,small, medium, large, x-large, xx-large, larger, or smaller"; see link above for details)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
mpl.rc('legend', fontsize=12)
mpl.rc('axes', titlesize=16)
mpl.rc('axes', labelsize=12)
mpl.rc('figure', titlesize=16)
mpl.rc('text', usetex=False)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

# Define dimension sizes and load shapefile
gdf = gpd.read_file(shp)
gdf = gdf.rename(columns={'region_inf':'Regions'})
regions = gdf[['node_id','Regions']].groupby(
    'Regions').count().index.to_list()
regions.remove('Other')

In [3]:
%%time

node_id=6369 #Bbay low DO
# use NO3 directory to get a list of subdirectories 
model_var=['NO3','NH4','salinity','temp','DOXG']

paths=[
    '/mmfs1/gscratch/ssmc/USRS/PSI/Sukyong/kingcounty/wqm_baseline/ssm_output.nc',
    '/mmfs1/gscratch/ssmc/USRS/PSI/Sukyong/kingcounty/wqm_reference/ssm_output.nc'
]
case=[
    'base',
    'ref'
]


CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.53 µs


In [4]:
%%time
daily_median={}
daily_std={}
daily_min={}
daily_max={}
for idx,path in enumerate(paths):
    with xarray.open_dataset(path, engine='netcdf4') as ds:
        daily_median[case[idx]]={}
        daily_std[case[idx]]={}
        daily_min[case[idx]]={}
        daily_max[case[idx]]={}
        for var in model_var:
            if var in ['NO3','NH4','salinity','temp']:
                daily = ds[var][:,0,:] # surface
            else:
                daily = ds[var][:,9,:] # bottom
            daily_median[case[idx]][var]=np.median(daily,axis=0)
            daily_std[case[idx]][var]=np.std(daily,axis=0)
            daily_min[case[idx]][var]=np.min(daily,axis=0)
            daily_max[case[idx]][var]=np.max(daily,axis=0)

CPU times: user 21.8 s, sys: 6.73 s, total: 28.5 s
Wall time: 1min 51s


In [5]:
%%time
ds_r = xarray.open_dataset(paths[0], engine='netcdf4') 
ds_b = xarray.open_dataset(paths[1], engine='netcdf4') 
# ds_diff = ds_b-ds_r

CPU times: user 38 ms, sys: 21.1 ms, total: 59.1 ms
Wall time: 1.31 s


In [8]:
%%time
daily_median_err={}
daily_std_err={}
daily_min_err={}
daily_max_err={}
for var in model_var:
    if var in ['NO3','NH4','salinity','temp']:
        daily = 100*(ds_b[var][:,0,:]-ds_r[var][:,0,:])/ds_r[var][:,0,:] # surface
    else:
        daily = 100*(ds_b[var][:,9,:]-ds_r[var][:,9,:])/ds_r[var][:,9,:] # bottom
    daily_median_err[var]=np.median(daily,axis=0)
    daily_std_err[var]=np.std(daily,axis=0)
    daily_min_err[var]=np.min(daily,axis=0)
    daily_max_err[var]=np.max(daily,axis=0)

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


CPU times: user 13.1 s, sys: 9.03 s, total: 22.1 s
Wall time: 2min 30s


# Create README tab for excel files

In [10]:
# make README 
this_file = '=HYPERLINK("https://github.com/RachaelDMueller/KingCounty-Rachael/blob/main/notebooks/calc_2014_ref_stats.py","calc_2014_ref_stats.py")'
run_description = '=HYPERLINK("https://github.com/RachaelDMueller/KingCounty-Rachael/blob/main/docs/supporting/KingCounty_Model_Runs.xlsx","KingCounty_Model_Runs.xlsx")'
parameter_notes="NO3, NH4, salinity, and temperature stats are calculated from surface level, hourly-output. DO stats are calculated from bottom level, hourly output"
err="The relative error, or percent change, was calculated as 100*(2014-ref)/ref"
created_by = 'Rachael D. Mueller'
created_at = 'Puget Sound Institute'
created_from = 'Model results produced by Su Kyong Yun at the Salish Sea Modeling Center'
created_on = date.today().strftime("%B %d, %Y")
contact = 'Rachael D Mueller (rdmseas@uw.edu)'
header = {
    ' ':[created_by, created_at, created_on, this_file, 
        contact, created_from, 
        run_description, parameter_notes, err]
}
header_df = pandas.DataFrame(header, index=[
    'Created by',
    'Created at',                           
    'Created on',
    'Created with',
    'Contact',
    'Modeling by',
    'Model Run Overview',
    'Description',
    'Relative Error'
])

# Save % change stats to excel

In [12]:
median_df = pandas.DataFrame.from_dict(daily_median_err)
std_df = pandas.DataFrame.from_dict(daily_std_err)
min_df = pandas.DataFrame.from_dict(daily_min_err)
max_df = pandas.DataFrame.from_dict(daily_max_err)

# create a excel writer object
with pandas.ExcelWriter(output_dir/"2014_reference_percentChange_stats.xlsx") as writer:
    median_df.to_excel(writer, sheet_name='%change median', index=False)
    std_df.to_excel(writer, sheet_name='%change std', index=False)
    min_df.to_excel(writer, sheet_name='%change min', index=False)
    max_df.to_excel(writer, sheet_name='%change max', index=False)
    header_df.to_excel(writer, sheet_name='README', index=False)

# Save 2014 stats to excel

In [13]:
median_df = pandas.DataFrame.from_dict(daily_median['base'])
std_df = pandas.DataFrame.from_dict(daily_std['base'])
min_df = pandas.DataFrame.from_dict(daily_min['base'])
max_df = pandas.DataFrame.from_dict(daily_max['base'])
# create a excel writer object
with pandas.ExcelWriter(output_dir/"2014_stats.xlsx") as writer:
    median_df.to_excel(writer, sheet_name='2014 median', index=False)
    std_df.to_excel(writer, sheet_name='2014 std', index=False)
    min_df.to_excel(writer, sheet_name='2014 min', index=False)
    max_df.to_excel(writer, sheet_name='2014 max', index=False)
    header_df.to_excel(writer, sheet_name='README', index=False)

# Save reference stats to excel

In [14]:
median_df = pandas.DataFrame.from_dict(daily_median['ref'])
std_df = pandas.DataFrame.from_dict(daily_std['ref'])
min_df = pandas.DataFrame.from_dict(daily_min['ref'])
max_df = pandas.DataFrame.from_dict(daily_max['ref'])

# create a excel writer object
with pandas.ExcelWriter(output_dir/"Reference_stats.xlsx") as writer:
    median_df.to_excel(writer, sheet_name='ref median', index=False)
    std_df.to_excel(writer, sheet_name='ref std', index=False)
    min_df.to_excel(writer, sheet_name='ref min', index=False)
    max_df.to_excel(writer, sheet_name='ref max', index=False)
    header_df.to_excel(writer, sheet_name='README', index=False)

In [15]:
stat={}
for stat_type in ['median','min','max','std']:
    print(stat_type)
    stat[stat_type]={}
    stat[stat_type]['2014'] = pandas.read_excel(
        output_dir/'2014_stats.xlsx', sheet_name=f'2014 {stat_type}')
    stat[stat_type]['ref'] = pandas.read_excel(
        output_dir/'Reference_stats.xlsx', sheet_name=f'ref {stat_type}')
    stat[stat_type]['diff'] = pandas.read_excel(
        output_dir/'2014_minus_reference_stats.xlsx', sheet_name=f'2014-ref {stat_type}')
    

median
min
max
std


In [None]:
stat['median']['2014']

DataFrame.to_excel(excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=_NoDefault.no_default, inf_rep='inf', verbose=_NoDefault.no_default, freeze_panes=None, storage_options=None)[source]

# Plot results

In [None]:
shp='/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/SalishSeaModel-grid/shapefiles/SSMGrid2_tce_ecy_node_info_v2_10102022/SSMGrid2_tce_ecy_node_info_v2_10102022.shp'

In [None]:
# gdf = gpd.read_file(shp)
# gdf.shape

In [None]:
# for var in model_var:
#     gdf[f'{var}_median']=daily_median_diff[var]
#     gdf[f'{var}_std']=daily_std_diff[var]
#     gdf[f'{var}_min']=daily_min_diff[var]
#     gdf[f'{var}_max']=daily_max_diff[var]
#     for idx in [0,1]:
#         gdf[f'{var}_median_{case[idx]}']=daily_median[case[idx]][var]
#         gdf[f'{var}_std_{case[idx]}']=daily_std[case[idx]][var]
#         gdf[f'{var}_min_{case[idx]}']=daily_min[case[idx]][var]
#         gdf[f'{var}_max_{case[idx]}']=daily_max[case[idx]][var]

In [None]:
# from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
# cax={}
# var='salinity'
# run_type = case[0] #base case
# stat = ['median','min','max','std']

# fig, axs = plt.subplots(
#     1,4, figsize = (8,6),
#        gridspec_kw={
#        'width_ratios': [1,1,1,1],
#        'height_ratios': [1],
#    'wspace': 0.5,
#    'hspace': 0})
# for idx in [0,1,2,3]:
#     # create `cax` for the colorbar
#     divider = make_axes_locatable(axs[idx])
#     cax[idx] = divider.append_axes("right", size="5%", pad=0.1)

#     # median
#     gdf.plot(f'{var}_median_{run_type}', ax=axs[idx], cax=cax[idx], legend=True)
#     cax[idx].set_title(f'{stat[idx]} surface {var}', fontsize=14, fontdict={ha:'center'})
