In [4]:
import scipy.stats as stats
import geopandas as gpd
from pathlib import Path
import numpy as np
import geoviews as gv
import xarray as xr

import networkx as nx
import geonetworkx as gnx

import warnings
warnings.filterwarnings("ignore")

In [5]:
def get_stats(obs, mod):
    corr, corr_p = stats.pearsonr(obs, mod)
    nse = 1 - (np.sum((obs - mod)**2)/np.sum((obs - np.mean(obs))**2))
    nse1 = 1 - (np.sum(np.abs(obs - mod))/np.sum(np.abs(obs - np.mean(obs))))
    rmse = np.sqrt(np.sum((obs-mod)**2)/len(mod))
    norm_rmse = rmse/(np.max(obs) - np.min(obs))
    mae = np.sum(np.abs(mod-obs))/len(mod)
    norm_mae = mae/(np.max(obs) - min(obs))
    kge = 1 - np.sqrt((corr-1)**2 +  (np.std(mod)/np.std(obs) - 1)**2 + (np.mean(mod)/np.mean(obs) - 1)**2) 

    print({'pearson-r': corr, 'pearson-r p-val': corr_p, 'nse': nse, 'nse1': nse1, 'rmse': rmse, 'norm_rmse': norm_rmse, 'mae': mae, 'norm_mae': norm_mae, 'kge': kge})

    return {'pearson-r': corr, 'pearson-r p-val': corr_p, 'nse': nse, 'nse1': nse1, 'rmse': rmse, 'norm_rmse': norm_rmse, 'mae': mae, 'norm_mae': norm_mae, 'kge': kge}

In [6]:
river_network_fn = Path('../../data-cumberland/cumberland_rivreg/cumberland_rivreg.geojson')
river_network_pts_fn = Path('../../data-cumberland/cumberland_rivreg/cumberland_rivreg_pts.geojson')

river_network = gpd.read_file(river_network_fn)
river_network.to_xarray()

river_network_pts = gpd.read_file(river_network_pts_fn)

G = gnx.read_geofiles(river_network_pts_fn, river_network_fn, directed=True)

In [7]:
def plot_graph(G):
    x = []
    y = []
    node_indices = []
    node_labels = []

    for node in G.nodes:
        x.append(float(G.nodes[node]['x']))
        y.append(float(G.nodes[node]['y']))
        node_indices.append(int(node))
        node_labels.append(G.nodes[node]['name'])

    source = [n[0] for n in G.edges]
    target = [n[1] for n in G.edges]

    nodes = gv.Nodes((x, y, node_indices, node_labels), vdims='Type')
    simple_graph = gv.Graph(((source, target), nodes)).opts(height=400, width=400, xlabel='lon (°)', ylabel='lat (°)', arrowhead_length=0.02, directed=True, aspect='equal')
    
    return simple_graph

plot_graph(G) * gv.tile_sources.OSM * gv.feature.rivers * gv.feature.coastline

# Calculate statistics

In [26]:
fn = "/water2/pdas47/2023_01_24-river-regulation/data-era5-2010_2021/regulation/regulation_data.insitu.obs_outflow.monthly-2.ERA5.nc"

subset_start_time = '2011-01-01'
subset_end_time = '2016-03-01'
run_type = 'monthly'

ds = xr.open_dataset(fn).sel(time=slice(subset_start_time, subset_end_time))
ds

In [6]:
dims = ['run_type', 'station', 'flow_type']
stat_vars = ['pearson-r', 'pearson-r p-val', 'nse', 'nse1', 'rmse', 'norm_rmse', 'mae', 'norm_mae', 'kge']

results_stats_data = {k: [] for k in dims + stat_vars} # make dictionary to store results

for node in G.nodes:
    station = G.nodes[node]['name']

    modeled_inflow = ds['inflow'].sel(node=node)
    tnr = ds['theoretical_natural_runoff'].sel(node=node)
    observed_inflow = ds['obs_inflow'].sel(node=node)

    no_nan_mod_inflow = xr.merge([modeled_inflow, observed_inflow]).dropna(dim="time")
    if no_nan_mod_inflow.count('time')['obs_inflow'] > 2:
        mod_inflow_stats = get_stats(no_nan_mod_inflow['obs_inflow'].data, no_nan_mod_inflow['inflow'].data)
    
    no_nan_nat_obs = xr.merge([tnr, observed_inflow]).dropna(dim="time")
    if no_nan_nat_obs.count('time')['obs_inflow'] > 2:
        nat_inflow_stats = get_stats(no_nan_nat_obs['obs_inflow'].data, no_nan_nat_obs['theoretical_natural_runoff'].data)

    # store results for regulated flow
    results_stats_data['run_type'].append(run_type)
    results_stats_data['station'].append(station)
    results_stats_data['flow_type'].append('regulated')
    for var in stat_vars:
        results_stats_data[var].append(mod_inflow_stats[var])

    # store results for unregulated flow
    results_stats_data['run_type'].append(run_type)
    results_stats_data['station'].append(station)
    results_stats_data['flow_type'].append('unregulated')
    for var in stat_vars:
        results_stats_data[var].append(nat_inflow_stats[var])

import pandas as pd
import datetime

# convert results into dataset
resds = pd.DataFrame(results_stats_data).set_index(dims).to_xarray()

# add attributes
resds['pearson-r'].attrs = {'long_name': 'Pearson correlation coefficient', 'min': -1, 'max': 1, 'unit': '1'}
resds['pearson-r p-val'].attrs = {'long_name': 'Pearson correlation coefficient p-value', 'min': 0, 'max': 1, 'unit': '1'}
resds['nse'].attrs = {'long_name': 'Nash-Sutcliffe efficiency', 'min': -np.inf, 'max': 1, 'unit': '1'}
resds['nse1'].attrs = {'long_name': 'Nash-Sutcliffe efficiency (NSE1)', 'min': -np.inf, 'max': 1, 'unit': '1'}
resds['rmse'].attrs = {'long_name': 'Root mean squared error', 'min': 0, 'max': np.inf, 'unit': '1'}
resds['norm_rmse'].attrs = {'long_name': 'Normalized root mean squared error', 'min': 0, 'max': 1, 'unit': '1'}
resds['mae'].attrs = {'long_name': 'Mean absolute error', 'min': 0, 'max': np.inf, 'unit': '1'}
resds['norm_mae'].attrs = {'long_name': 'Normalized mean absolute error', 'min': 0, 'max': 1, 'unit': '1'}
resds['kge'].attrs = {'long_name': 'Kling-Gupta efficiency', 'min': -np.inf, 'max': 1, 'unit': '1'}

# add coordinate attributes
resds.coords['run_type'].attrs = {'long_name': 'Run type', 'description': 'Type of run to compare between different model runs. For example: `ERA5`, `IMERG-Late`, `IMERG-Final` etc. denoting which type of precipitation product used.'}
resds.coords['station'].attrs = {'long_name': 'Station/reservoir name', 'description': 'Name of station/reservoir'}
resds.coords['flow_type'].attrs = {'long_name': 'Flow type', 'description': 'Regulated or Natural flow'}

# add dataset attributes
resds.attrs = {
    'description': 'Statistics for inflow at each station/reservoir for the specifed time period.', 
    'date_created': str(datetime.datetime.now())
}

try:
    START_DATE = str(ds['time'].min().values)
    END_DATE = str(ds['time'].max().values)
    resds.attrs['from_date'] = START_DATE
    resds.attrs['to_date'] = END_DATE
except:
    pass

resds

{'pearson-r': 0.9544016108254294, 'pearson-r p-val': 3.384857915856277e-33, 'nse': 0.9045320842666728, 'nse1': 0.7109403891525616, 'rmse': 460848998.7235985, 'norm_rmse': 0.08033728142346279, 'mae': 359799702.6553723, 'norm_mae': 0.06272191118644332, 'kge': 0.9341481932257645}
{'pearson-r': 0.9123625072231579, 'pearson-r p-val': 5.988154737130709e-25, 'nse': 0.7560070582564887, 'nse1': 0.5356600157716894, 'rmse': 736747310.7772682, 'norm_rmse': 0.1284331227968923, 'mae': 577975552.4009314, 'norm_mae': 0.10075531190849761, 'kge': 0.8081297058835899}
{'pearson-r': 0.8663146789099077, 'pearson-r p-val': 9.619174336359105e-20, 'nse': 0.7143375188734422, 'nse1': 0.5419747074517562, 'rmse': 17316495.26080557, 'norm_rmse': 0.13665141818086857, 'mae': 12630475.78246715, 'norm_mae': 0.09967215663321001, 'kge': 0.6870877282825305}
{'pearson-r': 0.8663146789099077, 'pearson-r p-val': 9.619174336359105e-20, 'nse': 0.7143375188734422, 'nse1': 0.5419747074517562, 'rmse': 17316495.26080557, 'norm_rms

In [7]:
# save resulting statistics 
save_dir = Path("../../results/rr_stats")
save_dir.mkdir(exist_ok=True, parents=True)
# nc
save_fp = save_dir / f"{str(run_type)}.nc"
resds.to_netcdf(save_fp)
# csv
save_fp = save_dir / f"{str(run_type)}.csv"
resds.to_dataframe().to_csv(save_fp)

# Plot results

In [1]:
import holoviews as hv
import numpy as np
import xarray as xr
import panel as pn
import hvplot.xarray
from holoviews import opts
from bokeh.models import NumeralTickFormatter

hv.extension('bokeh')
opts.defaults(
    opts.Bars(cmap='Pastel1', width=400, height=250, show_legend=False),
)

In [2]:
fn = "/water2/pdas47/2023_01_24-river-regulation/results/rr_stats/monthly.nc"
ds = xr.open_dataset(fn)
RUN_TYPE = ds['run_type'].values[0]
ds

In [8]:
# filter to only include stations which have upstream dams
deg = nx.degree(G)
nodes_with_upstreams = [G.nodes[k]['name'] for k, val in deg if val > 1]
ds_subset = ds.sel(run_type=RUN_TYPE, station=nodes_with_upstreams)
ds_subset

In [9]:
nse_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='nse'
).opts(multi_level=False, title='Nash-Sutcliffe Efficiency (NSE)')

nse_hv

In [10]:
pearsonr_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='pearson-r'
).opts(multi_level=False, title='Pearson correlation coefficient')
pearsonr_hv

In [20]:
percent_formatter = NumeralTickFormatter(format='0.0%')
normrmse_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='norm_rmse'
).opts(multi_level=False, title='Normalized root mean squared error')
normrmse_hv

In [21]:
normmae_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='norm_mae'
).opts(multi_level=False, title='Normalized mean absolute error')
normmae_hv

In [22]:
kge_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='kge'
).opts(multi_level=False, title='Kling-Gupta Efficiency')
kge_hv

In [23]:
nse1_hv = ds_subset.hvplot.barh(
    by='flow_type',
    y='nse1'
).opts(multi_level=False, title='NSE1')
nse1_hv

In [24]:
layout = pn.Column(
    pn.Row(pearsonr_hv.opts(show_legend=True, legend_position='top_right'), nse_hv),
    pn.Row(kge_hv, nse1_hv),
    pn.Row(normrmse_hv, normmae_hv),
)
layout

BokehModel(combine_events=True, render_bundle={'docs_json': {'5e141ed0-e2de-4ea7-a1ae-0f07e5c89c58': {'version…

In [18]:
from bokeh.resources import INLINE

# save layout
save_dir = Path("../../results/rr_stats")
save_dir.mkdir(exist_ok=True, parents=True)
save_fp = save_dir / f"{str(RUN_TYPE)}-stats.html"

layout.save(save_fp, resources=INLINE)