# Error correction models - ERRIS

## About this document


In [None]:
from swift2.doc_helper import pkg_versions_info

print(pkg_versions_info("This document was generated from a jupyter notebook"))

[Li, Ming; Wang, QJ; Bennett, James; Robertson, David. Error reduction and representation in stages (ERRIS) in hydrological modelling for ensemble streamflow forecasting. Hydrology and Earth System Sciences. 2016; 20:3561-3579. https://doi.org/10.5194/hess-20-3561-2016 ](https://doi.org/10.5194/hess-20-3561-2016)

## Calibrating ERRIS

### Model structure

We use sample hourly data from the Adelaide catchment [this catchment in the Northern Territory, TBC](https://en.wikipedia.org/wiki/Adelaide_River). The catchment model set up is not the key point of this vignette so we do not comment on that section: 

In [None]:
import datetime as dt

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
import seaborn as sns

In [None]:
import swift2
import swift2.wrap.swift_wrap_custom as swc

In [None]:
# Only temporary, under construction...
import swift2.wrap.swift_wrap_generated as swg
import xarray as xr

In [None]:
from cinterop.timeseries import (
    TIME_DIMNAME,
    pd_series_to_xr_series,
    slice_xr_time_series,
    xr_ts_end,
    xr_ts_start,
)

In [None]:
from swift2.doc_helper import *

# from swift2.prototypes import extractOptimizationLog
from swift2.model_definitions import *
from swift2.parameteriser import *
from swift2.play_record import *
from swift2.simulation import *
from swift2.statistics import *
from swift2.system import *
from swift2.utils import *
from swift2.vis import OptimisationPlots
from uchronia.time_series import get_item, mk_date

In [None]:
%matplotlib inline

In [None]:
catchmentStructure = sample_catchment_model(site_id= "Adelaide", config_id="catchment")

hydromodel = "GR4J";
channel_routing = 'MuskingumNonLinear';
hydroModelRainfallId = 'P'
hydroModelEvapId = 'E'

from swift2.classes import HypercubeParameteriser, Parameteriser, Simulation

In [None]:
# set models
insimulation = swap_model(catchmentStructure, model_id= hydromodel ,what = "runoff")
simulation:Simulation = swap_model(insimulation, model_id = channel_routing ,what = "channel_routing")

saId = get_subarea_ids(simulation)

In [None]:
saId = saId[0] # HACK: need to test robustness to different time series passed to player functions

precipTs = sample_series(site_id = "Adelaide", var_name= "rain")
evapTs = sample_series(site_id = "Adelaide", var_name = "evap")
flowRateTs = sample_series(site_id = "Adelaide", var_name = "flow")

play_input(simulation, precipTs, mk_full_data_id('subarea', saId, hydroModelRainfallId))
play_input(simulation, evapTs, mk_full_data_id('subarea', saId, hydroModelEvapId))
configure_hourly_gr4j(simulation)
set_simulation_time_step(simulation, 'hourly')

# Small time interval only, to reduce runtimes in this vignette
simstart = mk_date(2010,12,1)  
simend = mk_date(2011,6,30,23)  
simwarmup = simstart

set_simulation_span(simulation, simstart, simend)

In [None]:
flowRateTs

In [None]:
get_state_value(simulation, "subarea.1.areaKm2")
# getCatchmentStructure(simulation)

In [None]:
get_state_value(catchmentStructure, "subarea.1.areaKm2")

In [None]:
def templateHydroParameterizer(simulation):
    return define_parameteriser_gr4j_muskingum(ref_area=250.0,
        time_span=3600,
        simulation=simulation,
        objfun="NSE",
        delta_t=1.0,
        param_name_k='Alpha')

nodeId = 'node.2'
flowId = mk_full_data_id(nodeId, 'OutflowRate')

record_state(simulation, flowId)

We use pre-calibrated hydrologic parameters (reproducible with doc/error_correction_doc_preparation.r in this package structure) 

In [None]:
p = templateHydroParameterizer(simulation)
set_min_parameter_value(p, 'R0', 0.0)
set_max_parameter_value(p, 'R0', 1.0)
set_min_parameter_value(p, 'S0', 0.0)
set_max_parameter_value(p, 'S0', 1.0)
set_parameter_value( p, 'log_x4', 1.017730e+00)
set_parameter_value( p, 'log_x1', 2.071974e+00	)
set_parameter_value( p, 'log_x3', 1.797909e+00	)
set_parameter_value( p, 'asinh_x2', -1.653842e+00)	
set_parameter_value( p, 'R0', 2.201930e-11	)
set_parameter_value( p, 'S0', 3.104968e-11	)
set_parameter_value( p, 'X', 6.595537e-03	) # Gotcha: needs to be set before alpha is changed.
set_parameter_value( p, 'Alpha', 6.670534e-01	)
p.as_dataframe()

    
sViz = mk_date(2010,12,1)
eViz = mk_date(2011,4,30,23)

def one_wet_season(tts):
    from cinterop.timeseries import ts_window
    return ts_window(tts, from_date=sViz, to_date=eViz) 

def plot_obs_vs_calc(obs, calc, ylab="flow (m3/s)"):
    from swift2.vis import plot_two_series
    obs = as_xarray_series(one_wet_season(obs))
    calc = one_wet_season(calc)
    plot_two_series(obs, calc, start_time= xr_ts_start(obs), end_time= xr_ts_end(obs))

In [None]:
p.apply_sys_config(simulation)

In [None]:
simulation.exec_simulation()

In [None]:
plot_obs_vs_calc(flowRateTs, simulation.get_recorded(flowId))

In [None]:
flowRateTs

### Set up the error correction model

In [None]:
simulation.get_node_ids(), simulation.get_node_names()

In [None]:
errorModelElementId = 'node.2';
simulation.set_error_correction_model('ERRIS', errorModelElementId, length=-1, seed=0)

flowRateTsGapped = flowRateTs

In [None]:
flowRateTsGapped['2011-02'] = np.nan

# plot(flowRateTsGapped)

play_input(simulation,flowRateTsGapped,var_ids=mk_full_data_id(errorModelElementId,"ec","Observation"))

Now, prepare a model with error correction, and set up for generation

In [None]:
from swift2.prototypes import *

ecs = simulation.clone()

ecs.set_state_value(mk_full_data_id(nodeId,"ec","Generating"),False)
updatedFlowVarID = mk_full_data_id(nodeId,"ec","Updated")
inputFlowVarID = mk_full_data_id(nodeId,"ec","Input")
ecs.record_state(var_ids=c(updatedFlowVarID, inputFlowVarID))

### ERRIS calibration in stages

In [None]:
#termination = getMaxRuntimeTermination(0.005)
termination = create_sce_termination_wila('relative standard deviation', c('0.05','0.0167'))

We could set up a four-stages estimation in one go, but we will instead work in each stages for didactic purposes.

In [None]:
%%time 
censOpt = 0.0
estimator = create_erris_parameter_estimator (simulation, flowRateTs, errorModelElementId,
                                            estimation_start = simstart, estimation_end=simend, cens_thr=0.0, cens_opt=censOpt,
                                            termination_condition=termination, restriction_on=True, weighted_least_square=False)

stageOnePset = swg.CalibrateERRISStageOne_py(estimator)
print(stageOnePset.as_dataframe())

#### Stage 2

Stage two can be logged:

In [None]:
%%time 
swg.SetERRISVerboseCalibration_py(estimator, True)
stageTwoPset = swg.CalibrateERRISStageTwo_py(estimator, stageOnePset)
print(stageTwoPset.as_dataframe())

In [None]:
def mkEcIds(p:HypercubeParameteriser) -> HypercubeParameteriser:
    df = p.as_dataframe()
    df.Name = mk_full_data_id(nodeId, 'ec', df.Name)
    return create_parameteriser('Generic',df)

mkEcIds(stageTwoPset).apply_sys_config(ecs)
ecs.exec_simulation()
plot_obs_vs_calc(flowRateTsGapped, ecs.get_recorded(updatedFlowVarID))

A helper function to process the calibration log:

In [None]:
# def prepOptimLog(estimator, fitness_name = "Log.likelihood"):
#     optimLog = getLoggerContent(estimator)
#     # head(optimLog)
#     # optimLog$PointNumber = 1:nrow(optimLog)   
#     logMh = mkOptimLog(optimLog, fitness = fitness_name, messages = "Message", categories = "Category") 
#     geom_ops = mhplot::subset_by_message(logMh)
#     d = list(data=logMh, geom_ops=geom_ops)
# }

In [None]:
type(estimator), str(estimator)

In [None]:
opt_log = estimator.extract_optimisation_log(fitness_name = "Log-likelihood")

In [None]:
geom_ops = opt_log.subset_by_message(pattern= 'Initial.*|Reflec.*|Contrac.*|Add.*') 

In [None]:
def get_max_ll(): return np.max(geom_ops._data["Log-likelihood"])

In [None]:
v = OptimisationPlots(geom_ops)
g = v.parameter_evolution('Sigma1_Rising', c(0, get_max_ll()))
plt.gcf().set_size_inches(10,8)

#### Stage 3

In [None]:
%%time
stageThreePset = swg.CalibrateERRISStageThree_py(estimator, stageTwoPset)
print(stageThreePset.as_dataframe())

In [None]:
opt_log = estimator.extract_optimisation_log(fitness_name = "Log-likelihood")

In [None]:
geom_ops = opt_log.subset_by_message(pattern= 'Initial.*|Reflec.*|Contrac.*|Add.*') 

In [None]:
v = OptimisationPlots(geom_ops)
g = v.parameter_evolution('Rho', c(0, get_max_ll()))
plt.gcf().set_size_inches(10,8)

#### Stage 3a, generating and fitting M and S if free

In [None]:
%%time
stageThreePsetMS = swg.CalibrateERRISStageThreeMS_py(estimator, stageThreePset)
print(stageThreePsetMS.as_dataframe())

In [None]:
mkEcIds(stageThreePsetMS).apply_sys_config(ecs)
ecs.exec_simulation()
plot_obs_vs_calc(flowRateTsGapped, ecs.get_recorded(updatedFlowVarID))

#### Stage 4, rising limb 

In [None]:
%%time
stageFourPsetRising = swg.CalibrateERRISStageFour_py(estimator, stageThreePsetMS, useRising = True)
print(stageFourPsetRising.as_dataframe())

In [None]:
opt_log = estimator.extract_optimisation_log(fitness_name = "Log-likelihood")

In [None]:
geom_ops = opt_log.subset_by_message(pattern= 'Initial.*|Reflec.*|Contrac.*|Add.*') 

In [None]:
v = OptimisationPlots(geom_ops)
g = v.parameter_evolution('Weight_Rising', c(0, get_max_ll()))
plt.gcf().set_size_inches(10,8)

In [None]:
mkEcIds(stageFourPsetRising).apply_sys_config(ecs)
ecs.exec_simulation()
plot_obs_vs_calc(flowRateTsGapped, ecs.get_recorded(updatedFlowVarID))

#### Stage 4, falling limbs

In [None]:
%%time
stageFourPsetFalling = swg.CalibrateERRISStageFour_py(estimator, stageThreePsetMS, useRising = False)
print(stageFourPsetFalling.as_dataframe())

In [None]:
opt_log = estimator.extract_optimisation_log(fitness_name = "Log-likelihood")

In [None]:
geom_ops = opt_log.subset_by_message(pattern= 'Initial.*|Reflec.*|Contrac.*|Add.*') 

In [None]:
v = OptimisationPlots(geom_ops)
g = v.parameter_evolution('Weight_Rising', c(0, get_max_ll()))
plt.gcf().set_size_inches(10,8)

#### Final consolidated parameter set

In [None]:
finalPset = swg.ConcatenateERRISStagesParameters_py(estimator, hydroParams = create_parameteriser(), stage1_result =  stageOnePset, stage2_result = stageTwoPset, 
                                   stage3_result = stageThreePsetMS, stage4a_result = stageFourPsetRising, stage4b_result = stageFourPsetFalling, toLongParameterName = False)

print(finalPset.as_dataframe())

### Legacy call

Check that the previous "one stop shop" call gives the same results.

In [None]:
%%time

censOpt = 0.0
termination = create_sce_termination_wila('relative standard deviation', c('0.05','0.0167'))

dummyDate = simstart

from swift2.prototypes import estimate_erris_parameters

psetFullEstimate = estimate_erris_parameters(simulation, flowRateTs, errorModelElementId,
  warmup_start=dummyDate, warmup_end=dummyDate, warmup=False, estimation_start = simstart, estimation_end=simend, cens_thr=0.0,
 cens_opt = censOpt, exclusion_start=dummyDate, exclusion_end=dummyDate, exclusion=False, termination_condition = termination,
  hydro_params = None, erris_params = None, restriction_on = True,
  weighted_least_square = False)

print(psetFullEstimate.as_dataframe())