# Cascaded calibration of subcatchments defined by multiple gauges

In [None]:
from swift2.doc_helper import pkg_versions_info

print(pkg_versions_info("This document was generated from a jupyter notebook"))

## Use case

**2021-01: this vignette works structurally, but is confined to overly short (and possibly difficult) data to keep runtime low**

This vignette demonstrates how one can calibrate a catchment using multiple gauging points available within this catchment. Instead of setting up a whole-of-catchment calibration definition, it makes sense, at least in a system where subareas above a gauge points do not have a behavior dependent on other catchment processes (meaning mostly, no managed reservoirs). SWIFT offers capabilities to calibrate such subcatchments sequentially, feeding the input flow of upstream and already calibrated subcatchments to other subcatchments, thus cutting down on the complexity and runtime of the overall catchment calibration. 

In [None]:
import datetime as dt
from collections import OrderedDict

import numpy as np

In [None]:
import swift2.doc_helper as std
import swift2.parameteriser as sp

In [None]:
from cinterop.timeseries import xr_ts_end, xr_ts_start
from swift2.classes import CompositeParameteriser, ObjectiveEvaluator, Simulation
from swift2.const import CATCHMENT_FLOWRATE_VARID
from swift2.vis import plot_two_series

In [None]:
%matplotlib inline

## Data

The sample data that comes with the package contains a model definition for the South Esk catchment, including a short subset of the climate and flow record data.

In [None]:
model_id = 'GR4J'
site_id = 'South_Esk'
simulation = std.sample_catchment_model(site_id=site_id, config_id='catchment')
simulation = simulation.swap_model('LagAndRoute', 'channel_routing')

A visual of the catchment structure (note: may not render yet through GitHub)

In [None]:
# import swift2.wrap.swift_wrap_generated as swg
# dot_graph = swg.GetCatchmentDOTGraph_py(simulation)
# import graphviz
# # Using graphviz package directly
# graph = graphviz.Source(dot_graph)
# graph  # This will display the graph in a Jupyter Notebook

In [None]:
# Other possible visualisation resources:
# https://towardsdatascience.com/visualizing-networks-in-python-d70f4cbeb259
# https://medium.com/@ludvig.hult/drawing-graphs-with-python-in-2019-bdd42bf9d5db

In [None]:
# def loadSwiftV1TextDef(controlFile, dataDir):
#     import swift2.wrap.swift_wrap_generated as swg
#     # controlFile = mkPathToPlatform(controlFile)
#     # dataDir = mkPathToPlatform(dataDir)
#     return swg.LoadVersionOneControlFile_py(controlFile, dataDir)


# ctrl_file = '/home/per202/mnt/hydrofct/work/common/Staff/per202/sample_data/South_Esk/201601/SWIFT_Control.txt')
# stopifnot(file.exists(ctrl_file))
# ms <- loadSwiftV1TextDef(ctrl_file, 'dummy')
# ms <- swapModel(ms, 'MuskingumNonLinear', 'channel_routing')



In [None]:
se_climate = std.sample_series(site_id=site_id, var_name='climate')
se_flows = std.sample_series(site_id=site_id, var_name='flow')

In [None]:
se_climate["subcatchment.4.P"].plot();

The names of the climate series is already set to the climate input identifiers of the model simulation, so setting them as inputs is easy:

In [None]:
se_climate.head(3)

In [None]:
simulation.play_input(se_climate)
simulation.set_simulation_span(xr_ts_start(se_climate), xr_ts_end(se_climate))
simulation.set_simulation_time_step('hourly')

The `doc_helper` submodule has helper functions to configure the gr4j model to such that it is fit to run on hourly data:

In [None]:
std.configure_hourly_gr4j(simulation)

## Parameterisation

We define a function creating a realistic feasible parameter space. This is not the main object of this vignette, so we do not describe in details. 

In [None]:
import swift2.helpers as hlp
import swift2.parameteriser as sp
from swift2.utils import as_xarray_series, c, paste0, rep

def create_meta_parameteriser(simulation:Simulation, ref_area=250, time_span=3600):  
    time_span = int(time_span)
    parameteriser = std.define_gr4j_scaled_parameter(ref_area, time_span)
  
    # Let's define _S0_ and _R0_ parameters such that for each GR4J model instance, _S = S0 * x1_ and _R = R0 * x3_
    p_states = sp.linear_parameteriser(
                      param_name=c("S0","R0"), 
                      state_name=c("S","R"), 
                      scaling_var_name=c("x1","x3"),
                      min_p_val=c(0.0,0.0), 
                      max_p_val=c(1.0,1.0), 
                      value=c(0.9,0.9), 
                      selector_type='each subarea')
  
    init_parameteriser = p_states.make_state_init_parameteriser()
    parameteriser = sp.concatenate_parameterisers(parameteriser, init_parameteriser)
    
    hlp.lag_and_route_linear_storage_type(simulation)
    hlp.set_reach_lengths_lag_n_route(simulation)

    lnrp = hlp.parameteriser_lag_and_route()
    parameteriser = CompositeParameteriser.concatenate(parameteriser, lnrp, strategy='')
    return parameteriser

In [None]:
parameteriser = create_meta_parameteriser(simulation)
parameteriser.as_dataframe()

Now, checking that a default parameter set works structurally on the simulation:

In [None]:
parameteriser.set_parameter_value('asinh_x2', 0)
parameteriser.apply_sys_config(simulation)
simulation.exec_simulation()

We are now ready to enter the main topic of this vignette, subsetting the catchment into subcatchments for calibration purposes.

## Splitting the catchment in subcatchments

The sample gauge data flow contains identifiers that are of course distinct from the network node identifiers. We create a map between them (note - this information used to be in the NodeLink file in swiftv1), and we use these node as splitting points to derive subcatchments

In [None]:
gauges = c( '92106', '592002', '18311', '93044',    '25',   '181')
node_ids = paste0('node.', c('7',   '12',   '25',   '30',   '40',   '43'))
node_gauges = OrderedDict([(node_ids[i], gauges[i]) for i in range(len(gauges))])
# names(gauges) = node_ids

### Test running and recording streamflows

In [None]:
simulation.get_variable_ids(node_ids[0])

In [None]:
simulation.record_state(paste0(node_ids, ".OutflowRate"))

In [None]:
simulation.exec_simulation()

In [None]:
modelled = simulation.get_all_recorded()

In [None]:
modelled

In [None]:
modelled.sel(variable_identifiers='node.7.OutflowRate').plot()

In [None]:
se_flows[gauges[3]].plot()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_multivariate_time_series(df, cols_wrap=3):
    """
    Plots all columns of a Pandas DataFrame (time series) in a grid using Seaborn.

    Args:
        df (pd.DataFrame): DataFrame with a DatetimeIndex.
        cols_wrap (int): Number of columns in the grid.  Defaults to 3.
    """

    num_cols = len(df.columns)
    num_rows = (num_cols + cols_wrap - 1) // cols_wrap  # Calculate number of rows needed

    fig, axes = plt.subplots(num_rows, cols_wrap, figsize=(15, 5 * num_rows)) # Adjust figure size as needed
    axes = axes.flatten()  # Flatten the axes array for easy indexing

    for i, col in enumerate(df.columns):
        sns.lineplot(x=df.index, y=df[col], ax=axes[i])
        axes[i].set_title(col)
        axes[i].tick_params(axis='x', rotation=45)  # Rotate x-axis labels for readability

    # Remove any unused subplots
    for i in range(num_cols, len(axes)):
        fig.delaxes(axes[i])

    plt.tight_layout()  # Adjust layout to prevent overlapping titles/labels
    plt.show()

# Example usage (assuming you have a DataFrame called 'se_flows')
plot_multivariate_time_series(se_flows)

In [None]:
split_element_ids = node_ids
sub_cats = simulation.split_to_subcatchments(split_element_ids)
sub_cats

The resulting list of subcatchment simulations is already ordered in an upstream to downstream order by SWIFT.

If we are to set up the first step of the sequential calibration:

In [None]:
sub_cats['node.40'].describe()

In [None]:
def first(d:OrderedDict):
    return list(sub_cats.items())[0]  

In [None]:
element_id = first(sub_cats)[0]
element_id

In [None]:
gaugeId = node_gauges[element_id]
gaugeId

In [None]:
gauge_flow = se_flows[[gaugeId]]
gauge_flow.head()

In [None]:
sc = sub_cats[element_id]
sc

In [None]:
parameteriser.apply_sys_config(sc)
var_id = CATCHMENT_FLOWRATE_VARID
sc.record_state(var_id)

In [None]:
# DiagrammeR(getCatchmentDotGraph(sc))

Let's view the default, uncalibrated output 

In [None]:
simulation.get_simulation_span()

In [None]:
def plot_obs_vs_calc(obs, calc, ylab="streamflow (m3/s)"):
    plot_two_series(obs, calc, start_time = xr_ts_start(obs), end_time = xr_ts_end(obs))

In [None]:
gauge_flow = as_xarray_series(gauge_flow)

In [None]:
sc.exec_simulation()
plot_obs_vs_calc(gauge_flow, sc.get_recorded(var_id))

Now, setting up an objective (NSE) and optimiser:

In [None]:
objectiveId = 'NSE'
objective = sc.create_objective(var_id, gauge_flow, objectiveId, xr_ts_start(se_flows), xr_ts_end(se_flows))
score = objective.get_score(parameteriser)  

In [None]:
# termination = getMarginalTermination( tolerance = 1e-04, cutoff_no_improvement = 30, max_hours = 2/60) 
termination = sp.create_sce_termination_wila('relative standard deviation', c('0.05','0.0167'))
sce_params = sp.get_default_sce_parameters()
params = parameteriser.as_dataframe()

In [None]:
np.count_nonzero(abs(params.Max-params.Min)>0)

In [None]:
npars = np.count_nonzero(abs(params.Max-params.Min)>0)
sce_params = std.sce_parameter(npars)
optimiser = objective.create_sce_optim_swift(termination_criterion = termination, population_initialiser = parameteriser,sce_params = sce_params)
calib_logger = optimiser.set_calibration_logger("dummy")

In [None]:
%%time
calib_results = optimiser.execute_optimisation()

And the resulting hydrograph follows. The NSE score is decent, but the magnitude of the peak is not well represented. We used a uniform value for the routing parameters; having a scaling based on link properties may be a line of enquiry.

In [None]:
sorted_results = calib_results.sort_by_score('NSE')
d = sorted_results.as_dataframe()
d.head()

In [None]:
d.tail()

In [None]:
p = sorted_results.get_parameters_at_index(1)
p

In [None]:
p.apply_sys_config(sc)
sc.exec_simulation()
plot_obs_vs_calc(gauge_flow, sc.get_recorded(var_id))

We can create a subcatchment parameteriser, such that when applied to the whole of the South Esk, only the states of the subareas, links and nodes of the subcatchment are potentially affected.

In [None]:
sp = p.subcatchment_parameteriser(sc)
sp.apply_sys_config(simulation)
simulation.get_state_value(paste0('subarea.', np.arange(34,stop=41), '.x2'))
# saIds = get_subarea_ids(simulation)

In [None]:
# TODO
# spFile = tempfile()
# SaveParameterizer_R(sp, spFile)
# # Following fails 2020-06, see https://jira.csiro.au/browse/WIRADA-631 
# # sp2 = LoadParameterizer_R(spFile)

# if(file.exists(spFile)) { file.remove(spFile) }

In [None]:
p = sorted_results.get_parameters_at_index(1)
p.as_dataframe()

In [None]:
# swoop(sc, p, param_name, from, to, num=10, var_id) {
#   if(missing(from)) { from = GetParameterMinValue_R(p, param_name)}
#   if(missing(to))   { to = GetParameterMaxValue_R(p, param_name)}
#   oat(sc, p, param_name, from=from, to=to, num=num, var_id) 
# }

# testp(sim, p, ...) {
#   q = CloneHypercubeParameterizer_R(p)
#   execSimulation(sim)
#   params = list(...)
#   for(pname in names(params)) {set_parameter_value(q, pname, params[[pname]])}
#   plot_obs_vs_calc(gaugeFlow, getRecorded(sim, var_id))
# }

# flows = swoop(sc, p, 'log_x4', var_id=var_id)

# flows = swoop('log_x1')
# flows = swoop('Alpha')
# flows = merge(flows, gaugeFlow)
# zoo::plot.zoo(flows, plot.type='single')
# col=c('orange', 'black','blue','red')

# f(...) {
# params = list(...)
# params
# set_parameter_value(p, names(params), as.numeric(params))
# applySysConfig(p, sc)
# execSimulation(sc)
# plot_obs_vs_calc(gaugeFlow, getRecorded(sc, var_id))
# }

## Whole of catchment calibration combining point gauges

In [None]:
gauges = c( '92106', '592002', '18311', '93044',    '25',   '181')
node_ids = paste0('node.', c('7',   '12',   '25',   '30',   '40',   '43'))
node_gauges = OrderedDict([(node_ids[i], gauges[i]) for i in range(len(gauges))])
# names(gauges) = node_ids

In [None]:
calibNodes = paste0('node.', ["7","12"])



In [None]:
element_id = first(sub_cats)[0]
element_id

In [None]:
gaugeId = [node_gauges[k] for k in calibNodes]
gauge_flow = se_flows[gaugeId]

In [None]:
sc = sub_cats[element_id]
parameteriser.apply_sys_config(sc)

var_id = paste0(calibNodes, '.OutflowRate')
simulation.record_state(var_id)

In [None]:
objectiveId = 'NSE'

def create_obj_station(i:int):
    obs = as_xarray_series(gauge_flow[[gaugeId[i]]])
    return simulation.create_objective(var_id[i], obs, objectiveId, xr_ts_start(se_flows), xr_ts_end(se_flows))

objectives = [create_obj_station(i) for i in [0,1]]

co = ObjectiveEvaluator.create_composite_objective(objectives, [1.0,1.0], var_id[:2])

In [None]:
score = co.get_score(parameteriser) 
# scoresAsDataFrame(score)

In [None]:
score