# Explore sediment parameters

The model output files used in this notebook were created using [make_netcdf_output_klone.sh](https://github.com/UWModeling/SalishSeaModel-analysis/blob/main/bash_scripts/BenRoberts_postprocessing/make_netcdf_output_klone.sh)

In [1]:
import sys
import os
import yaml
import numpy as np
import pandas
import pathlib
import time
from datetime import date
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
from matplotlib.ticker import MaxNLocator
import matplotlib as mpl
import cmocean as cm

import xarray
from matplotlib.offsetbox import AnchoredText
sys.path.insert(1, '../scripts/')
from ssm_utils import reshape_fvcom

In [2]:
baseline  = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/SalishSeaModel-analysis/SSM_model_output')/'ssm_output_sediments_WQM.nc'
reference = pathlib.Path('/mmfs1/gscratch/ssmc/USRS/PSI/Rachael/projects/KingCounty/SalishSeaModel-analysis/SSM_model_output')/'ssm_output_sediments_WQM_REF.nc'

# List variables included in `ssm_station.out`

### Convert list of variables given in `ssm_station.out` from horizontal list to vertical for excel



In [2]:
var_list=["StationID","Node","Layer","depth(m)","DO","NO3","NH4","Alg1","Alg2","LDOC","RDOC","LPOC","RPOC","PO4","DIC","TALK","pH","pCO2","T","S","P1","P2","BM1","BM2","NL1","NL2","PL1","PL2","FI1","FI2","B1SZ","B2SZ","B1LZ","B2LZ","PR1","PR2","IAVG","DICUPT","DICBMP","DICPRD","DICMNL","DICDEN","DICGAS","DICSED","DICADV","DICVDIF","ALKNH4","ALKNO3","ALKNIT","ALKDEN","ALKREM","ALKNH4SED","ALKNO3SED","ALKADV","ALKVDIF","Jcin1","Jcin2","Jcin3","Jnin1","Jnin2","Jnin3","Jpin1","Jpin2","Jpin3","Jsin","O20","Depth","Tw","NH30","NO30","PO40","SI0","CH40","SALw","SOD","Jnh4","Jno3","JDenitT","Jch4","Jch4g","Jhs","Jpo4","Jsi","NH31","NH32","NO31","NO32","PO41","PO42","Si1","Si2","CH41","CH42","HS1","HS2","POC21","POC22","POC23","PON21","PON22","PON23","POP21","POP22","POP23","POS2","H1","BEN_STR"]

In [3]:
var_list

['StationID',
 'Node',
 'Layer',
 'depth(m)',
 'DO',
 'NO3',
 'NH4',
 'Alg1',
 'Alg2',
 'LDOC',
 'RDOC',
 'LPOC',
 'RPOC',
 'PO4',
 'DIC',
 'TALK',
 'pH',
 'pCO2',
 'T',
 'S',
 'P1',
 'P2',
 'BM1',
 'BM2',
 'NL1',
 'NL2',
 'PL1',
 'PL2',
 'FI1',
 'FI2',
 'B1SZ',
 'B2SZ',
 'B1LZ',
 'B2LZ',
 'PR1',
 'PR2',
 'IAVG',
 'DICUPT',
 'DICBMP',
 'DICPRD',
 'DICMNL',
 'DICDEN',
 'DICGAS',
 'DICSED',
 'DICADV',
 'DICVDIF',
 'ALKNH4',
 'ALKNO3',
 'ALKNIT',
 'ALKDEN',
 'ALKREM',
 'ALKNH4SED',
 'ALKNO3SED',
 'ALKADV',
 'ALKVDIF',
 'Jcin1',
 'Jcin2',
 'Jcin3',
 'Jnin1',
 'Jnin2',
 'Jnin3',
 'Jpin1',
 'Jpin2',
 'Jpin3',
 'Jsin',
 'O20',
 'Depth',
 'Tw',
 'NH30',
 'NO30',
 'PO40',
 'SI0',
 'CH40',
 'SALw',
 'SOD',
 'Jnh4',
 'Jno3',
 'JDenitT',
 'Jch4',
 'Jch4g',
 'Jhs',
 'Jpo4',
 'Jsi',
 'NH31',
 'NH32',
 'NO31',
 'NO32',
 'PO41',
 'PO42',
 'Si1',
 'Si2',
 'CH41',
 'CH42',
 'HS1',
 'HS2',
 'POC21',
 'POC22',
 'POC23',
 'PON21',
 'PON22',
 'PON23',
 'POP21',
 'POP22',
 'POP23',
 'POS2',
 'H1',
 'BEN_STR'

In [3]:
with open('../etc/SSM_config_SOG.yaml', 'r') as file:
    ssm = yaml.safe_load(file)
    # get shapefile path    
    shp = ssm['paths']['shapefile']


mpl.rc('font', size=11)
# some of the following may be repetetive but can also be set relative to the font value above 
#    (eg "xx-small, x-small,small, medium, large, x-large, xx-large, larger, or smaller"; see link above for details)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
mpl.rc('legend', fontsize=12)
mpl.rc('axes', titlesize=16)
mpl.rc('axes', labelsize=12)
mpl.rc('figure', titlesize=16)
mpl.rc('text', usetex=False)
mpl.rc('font', family='sans-serif', weight='normal', style='normal')

# Define dimension sizes and load shapefile
gdf = gpd.read_file(shp)
gdf = gdf.rename(columns={'region_inf':'Regions'})
regions = gdf[['node_id','Regions']].groupby(
    'Regions').count().index.to_list()
regions.remove('Other')

In [4]:
baseline_xr = xarray.open_dataset(baseline)
reference_xr = xarray.open_dataset(reference)

In [5]:
[*baseline_xr]

['h',
 'zeta',
 'NH4',
 'NO3',
 'netPP',
 'JPON1',
 'JPON2',
 'JPON3',
 'SODTM1S',
 'JNH4',
 'JNO3',
 'sed_NH41',
 'sed_NH42',
 'sed_NO31',
 'sed_NO32',
 'sed_CPON1',
 'sed_CPON2',
 'sed_CPON3']

In [7]:
for variable in [*baseline_xr]:
    print(f"{variable}: MIN = {baseline_xr[variable].min().item():.2f}, MAX = {baseline_xr[variable].max().item():.2f}")

h: MIN = 1.12, MAX = 627.26
zeta: MIN = -2.87, MAX = 55553.60
NH4: MIN = 0.00, MAX = 7.42
NO3: MIN = 0.00, MAX = 3.77
netPP: MIN = -180.00, MAX = 18.12
JPON1: MIN = 0.00, MAX = 1.02
JPON2: MIN = 0.00, MAX = 0.61
JPON3: MIN = 0.00, MAX = 0.41
SODTM1S: MIN = 0.00, MAX = 5.85
JNH4: MIN = -0.21, MAX = 0.34
JNO3: MIN = -0.21, MAX = 0.02
sed_NH41: MIN = 0.00, MAX = 6.14
sed_NH42: MIN = 0.00, MAX = 5.51
sed_NO31: MIN = 0.00, MAX = 2.08
sed_NO32: MIN = 0.00, MAX = 0.50
sed_CPON1: MIN = 0.02, MAX = 160.65
sed_CPON2: MIN = 0.80, MAX = 529.29
sed_CPON3: MIN = 4.22, MAX = 451.82


## Try again, using the include_indicator flag

In [8]:
[*gdf]

['tce',
 'node_id',
 'x',
 'y',
 'lat',
 'lon',
 'DO_std',
 'depth',
 'km2',
 'volume',
 'basin_info',
 'Regions',
 'included_i',
 'Shape_Leng',
 'Shape_Le_1',
 'Shape_Area',
 'geometry']

In [9]:
gdf.shape

(4144, 17)

In [10]:
gdf.loc[gdf['included_i']==0].shape

(0, 17)

In [11]:
baseline_xr['NH4'].shape

(8784, 1, 16012)

## Only want the 4144 values where included_i==1
This output netcdf doesn't have node_id in the netcdf, so I need a shapefile that has 16012 nodes

In [12]:
baseline_xr

#### Load Ben Robert's full SSM shapefile

In [13]:
full_ssm_gdf = gpd.read_file('../../ssm-analysis/gis/ssm full grid.shp')

In [14]:
full_ssm_gdf.shape

(16012, 3)

In [15]:
[*full_ssm_gdf]

['node_id', 'depth', 'geometry']

#### Use node_id to create a boolean array for subsampling sediment output to exclude masked regions

In [16]:
notmasked=(full_ssm_gdf['node_id'].isin(gdf['node_id']))

In [17]:
notmasked.shape

(16012,)

In [18]:
notmasked.sum()

4144

## Create sub-sampled variable list and print out min/max values

In [19]:
 baseline_xr[variable]

In [20]:
 baseline_xr[variable][0,notmasked].shape

(4144,)

#### Need to create a 8784x16012 boolean array

In [21]:
notmasked2d = np.tile(notmasked,(8784,1))
notmasked2d.shape

(8784, 16012)

In [22]:
len(baseline_xr['sed_CPON3'].shape)

2

# Print the min/max of variables at un-masked nodes

In [23]:
print("****  Print the min/max of variables at UN-MASKED nodes ***")
for variable in [*baseline_xr]:
    if len(baseline_xr[variable].shape)==2:
        masked_variable = np.ma.MaskedArray(baseline_xr[variable],notmasked2d)
        print(f"{variable}: MIN = {masked_variable.min().item():.2f}, MAX = {masked_variable.max().item():.2f}")
print("****  Print the min/max of variables at ALL nodes ***")
for variable in [*baseline_xr]:
    print(f"{variable}: MIN = {baseline_xr[variable].min().item():.2f}, MAX = {baseline_xr[variable].max().item():.2f}")

****  Print the min/max of variables at UN-MASKED nodes ***
zeta: MIN = -2.87, MAX = 55553.60
netPP: MIN = -180.00, MAX = 17.98
JPON1: MIN = 0.00, MAX = 1.01
JPON2: MIN = 0.00, MAX = 0.61
JPON3: MIN = 0.00, MAX = 0.41
JNH4: MIN = -0.21, MAX = 0.34
JNO3: MIN = -0.21, MAX = 0.02
sed_NH41: MIN = 0.00, MAX = 6.14
sed_NH42: MIN = 0.00, MAX = 5.51
sed_NO31: MIN = 0.00, MAX = 2.08
sed_NO32: MIN = 0.00, MAX = 0.50
sed_CPON1: MIN = 0.02, MAX = 160.65
sed_CPON2: MIN = 0.80, MAX = 529.29
sed_CPON3: MIN = 4.22, MAX = 451.82
****  Print the min/max of variables at ALL nodes ***
h: MIN = 1.12, MAX = 627.26
zeta: MIN = -2.87, MAX = 55553.60
NH4: MIN = 0.00, MAX = 7.42
NO3: MIN = 0.00, MAX = 3.77
netPP: MIN = -180.00, MAX = 18.12
JPON1: MIN = 0.00, MAX = 1.02
JPON2: MIN = 0.00, MAX = 0.61
JPON3: MIN = 0.00, MAX = 0.41
SODTM1S: MIN = 0.00, MAX = 5.85
JNH4: MIN = -0.21, MAX = 0.34
JNO3: MIN = -0.21, MAX = 0.02
sed_NH41: MIN = 0.00, MAX = 6.14
sed_NH42: MIN = 0.00, MAX = 5.51
sed_NO31: MIN = 0.00, MAX = 

In [24]:
for variable in [*baseline_xr]:
    if len(baseline_xr[variable].shape)==2:
        masked_variable = np.ma.MaskedArray(baseline_xr[variable],notmasked2d)
        print(f"{variable},{masked_variable.min().item():.2f},{masked_variable.max().item():.2f}")

zeta,-2.87,55553.60
netPP,-180.00,17.98
JPON1,0.00,1.01


KeyboardInterrupt: 

In [31]:
print("****  Print the min/max of variables at ALL nodes ***")
for variable in [*baseline_xr]:
    print(f"{variable},\
          {baseline_xr[variable].min().item():.2f}, \
          {baseline_xr[variable].max().item():.2f},\
          {baseline_xr[variable].mean().item():.2f},\
          {baseline_xr[variable].median().item():.2f}")

****  Print the min/max of variables at ALL nodes ***
h,          1.12,           627.26,          67.87,          49.32
zeta,          -2.87,           55553.60,          5.78,          1.36
NH4,          0.00,           7.42,          0.03,          0.02
NO3,          0.00,           3.77,          0.31,          0.33
netPP,          -180.00,           18.12,          0.56,          -0.01
JPON1,          0.00,           1.02,          0.03,          0.02
JPON2,          0.00,           0.61,          0.02,          0.01
JPON3,          0.00,           0.41,          0.01,          0.01
SODTM1S,          0.00,           5.85,          0.58,          0.48
JNH4,          -0.21,           0.34,          0.02,          0.01
JNO3,          -0.21,           0.02,          -0.01,          -0.01
sed_NH41,          0.00,           6.14,          0.22,          0.22
sed_NH42,          0.00,           5.51,          0.67,          0.61
sed_NO31,          0.00,           2.08,          0.19,     