From 7e3861ba2d041251920ce5c01570501560c55b9f Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Fri, 8 Dec 2023 22:13:06 -0500 Subject: [PATCH 01/29] adding xhydro calibration and modelling --- environment-dev.yml | 2 + environment.yml | 2 + tests/test_calibration.py | 50 +++++++++++ xhydro/calibration.py | 174 +++++++++++++++++++++++++++++++++++++ xhydro/test_calibration.py | 11 +++ 5 files changed, 239 insertions(+) create mode 100644 tests/test_calibration.py create mode 100644 xhydro/calibration.py create mode 100644 xhydro/test_calibration.py diff --git a/environment-dev.yml b/environment-dev.yml index 1cb530d4..a3b430fb 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -12,6 +12,8 @@ dependencies: - xscen >=0.7.1 - pip - pip: + - hydroeval + - spotpy - xdatasets # Dev - bump-my-version >=0.12.0 diff --git a/environment.yml b/environment.yml index ffbcea69..abdc1f03 100644 --- a/environment.yml +++ b/environment.yml @@ -12,4 +12,6 @@ dependencies: - xscen >=0.7.1 - pip - pip: + - hydroeval + - spotpy - xdatasets diff --git a/tests/test_calibration.py b/tests/test_calibration.py new file mode 100644 index 00000000..6dec99a5 --- /dev/null +++ b/tests/test_calibration.py @@ -0,0 +1,50 @@ +import pytest + +from xhydro.calibration import (perform_calibration, + get_objective_function_value, + dummy_model, + spot_setup) +import numpy as np +import spotpy + + + +def test_spotpy_calibration(): + + """ + Make sure the calibration works for a few test cases + """ + bounds_low = np.array([0,0,0]) + bounds_high = np.array([10,10,10]) + + model_config={ + 'precip':np.array([10,11,12,13,14,15]), + 'temperature':np.array([10,3,-5,1,15,0]), + 'Qobs':np.array([120,130,140,150,160,170]), + 'drainage_area':np.array([10]), + 'model_name':'Dummy', + } + + best_parameters, best_simulation, best_objfun = perform_calibration(model_config, 'nse', maximize = True, bounds_low=bounds_low, bounds_high=bounds_high, evaluations=1000, algorithm="DDS") + + # Test that the results have the same size as expected (number of parameters) + assert len(best_parameters) == len(bounds_high) + + # Test that the objective function is calculated correctly + objfun = get_objective_function_value(model_config['Qobs'], best_simulation) + assert objfun == best_objfun + + # Test Spotpy setup and sampler + spotSetup = spot_setup(model_config, bounds_high, bounds_low, obj_func="nse") + sampler = spotpy.algorithms.dds(spotSetup, dbname="tmp", dbformat="ram", save_sim=False) + sampler.sample(20, trials=1) + assert spotSetup.diagnostics is not None + + # Test dummy model response + model_config['parameters']=[5,5,5] + Qsim = dummy_model(model_config) + assert Qsim[3]==10.000 # Todo: update when xhydro is installed. + + + + \ No newline at end of file diff --git a/xhydro/calibration.py b/xhydro/calibration.py new file mode 100644 index 00000000..51f9b26f --- /dev/null +++ b/xhydro/calibration.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Dec 8 16:48:34 2023 + +@author: ets +""" + +import numpy as np +from spotpy.objectivefunctions import rmse +from spotpy import analyser +from spotpy.parameter import Uniform +import spotpy +import hydroeval as he + +class spot_setup(object): + + + def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): + + self.model_config = model_config + self.obj_func = obj_func + + + # Create the sampler for each parameter based on the bounds + self.parameters=[Uniform("param"+str(i), bounds_low[i], bounds_high[i]) for i in range(0,len(bounds_high))] + + def simulation(self, x): + + self.model_config['parameters'] = x + + return hydrological_model_selector(self.model_config) + + + def evaluation(self): + + return self.model_config['Qobs'] + + + def objectivefunction(self, simulation, evaluation, params=None, transform=None, epsilon=None): + + return get_objective_function_value(evaluation, simulation, params, transform, epsilon, self.obj_func) + + +def perform_calibration(model_config, evaluation_metric, maximize, bounds_high, bounds_low, evaluations, algorithm="DDS"): + + """ + 'model_config' is the model configuration object (dict type) that contains all info to run the model. + The model function called to run this model should always use this object and read-in data it requires. + It will be up to the user to provide the data that the model requires. + It should also contain parameters, even NaN or dummy ones, and the optimizer will update these. + """ + + spotpy_setup = spot_setup(model_config, bounds_high=bounds_high, bounds_low=bounds_low, obj_func=evaluation_metric) + + if algorithm == "DDS": + sampler = spotpy.algorithms.dds(spotpy_setup, dbname="DDS_optim", dbformat='ram', save_sim=False) + sampler.sample(evaluations, trials=1) + + elif algorithm == "SCEUA": + sampler = spotpy.algorithms.sceua(spotpy_setup, dbname="SCEUA_optim", dbformat='ram', save_sim=False) + sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) + + results = sampler.getdata() + + best_parameters = analyser.get_best_parameterset(results,like_index=1, maximize=maximize) + best_parameters=[best_parameters[0][i] for i in range(0,len(best_parameters[0]))] + + if maximize==True: + bestindex, bestobjf = analyser.get_maxlikeindex(results) + else: + bestindex, bestobjf = analyser.get_minlikeindex(results) + + best_model_run = results[bestindex] + + model_config['parameters'] = best_parameters + + Qsim = hydrological_model_selector(model_config) + + #objfun = get_objective_function_value(model_config['Qobs'], Qsim, best_parameters, obj_func=evaluation_metric) + + return best_parameters, Qsim, bestobjf + + +def dummy_model(model_config): + + """ + Dummy model to show the implementation we should be aiming for. Each model + will have its own required data that users can pass. We could envision a setup + where a class generates the model_config object for all model formats making it + possible to replace models on the fly + """ + + precip = model_config['precip'] + temperature = model_config['temperature'] + drainage_area = model_config['drainage_area'] + parameters = model_config['parameters'] + + Q = np.empty(len(precip)) + + for t in range(0,len(precip)): + + Q[t]= (precip[t] * parameters[0] + abs(temperature[t])*parameters[1])* parameters[2] * drainage_area + + + return Q + + + + +def transform_flows(Qobs, Qsim, transform=None, epsilon=None): + + # This subset of code is taken from the hydroeval package: + # https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py + # + # generate a subset of simulation and evaluation series where evaluation + # data is available + Qsim = Qsim[~np.isnan(Qobs[:, 0]), :] + Qobs = Qobs[~np.isnan(Qobs[:, 0]), :] + + # transform the flow series if required + if transform == 'log': # log transformation + if not epsilon: + # determine an epsilon value to avoid zero divide + # (following recommendation in Pushpalatha et al. (2012)) + epsilon = 0.01 * np.mean(Qobs) + Qobs, Qsim = np.log(Qobs + epsilon), np.log(Qsim + epsilon) + + elif transform == 'inv': # inverse transformation + if not epsilon: + # determine an epsilon value to avoid zero divide + # (following recommendation in Pushpalatha et al. (2012)) + epsilon = 0.01 * np.mean(Qobs) + Qobs, Qsim = 1.0 / (Qobs + epsilon), 1.0 / (Qsim + epsilon) + + elif transform == 'sqrt': # square root transformation + Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) + + return Qobs, Qsim + + + +def hydrological_model_selector(model_config): + + if model_config['model_name'] == 'Dummy': + Qsim = dummy_model(model_config) + + # ADD OTHER MODELS HERE + + return Qsim + + +def get_objective_function_value(Qobs, Qsim, params, transform=None, epsilon=None, obj_func=None): + + # Transform flows if needed + if transform: + Qobs, Qsim = transform_flows(Qobs, Qsim, transform, epsilon) + + # Default objective function if none provided by the user + if not obj_func: + like = rmse(Qobs,Qsim) + + # Popular ones we can use hydroeval package + elif obj_func.lower() in ['nse','kge','kgeprime','kgenp','rmse','mare','pbias']: + + # Get the correct function from the package + like = he.evaluator(getattr(he,obj_func.lower()), simulations=Qsim, evaluation=Qobs) + + # In neither of these cases, use the obj_func method from spotpy + else: + like = obj_func(Qobs, Qsim) + + # Return results + return like \ No newline at end of file diff --git a/xhydro/test_calibration.py b/xhydro/test_calibration.py new file mode 100644 index 00000000..b2335a22 --- /dev/null +++ b/xhydro/test_calibration.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Dec 8 19:36:09 2023 + +@author: ets +""" + + + + From 6667a896a5cbf07003234850c53b4cc4f2210dd8 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Fri, 8 Dec 2023 23:18:01 -0500 Subject: [PATCH 02/29] update calibration tests and packages for tests --- environment-dev.yml | 1 + environment.yml | 1 + pyproject.toml | 1 + tests/test_calibration.py | 14 +++----------- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index a3b430fb..9d6aebb4 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -5,6 +5,7 @@ dependencies: - python >=3.9,<3.12 # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages + - esmpy - numpy - statsmodels - xarray diff --git a/environment.yml b/environment.yml index abdc1f03..86662a74 100644 --- a/environment.yml +++ b/environment.yml @@ -5,6 +5,7 @@ dependencies: - python >=3.9,<3.12 # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages + - esmf - numpy - statsmodels - xarray diff --git a/pyproject.toml b/pyproject.toml index 059c9f12..d7d1e775 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ classifiers = [ dynamic = ["description", "version"] dependencies = [ # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! + "esmpy", "numpy", "statsmodels", "xarray", diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 6dec99a5..4ec08f1f 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -5,8 +5,6 @@ dummy_model, spot_setup) import numpy as np -import spotpy - def test_spotpy_calibration(): @@ -31,19 +29,13 @@ def test_spotpy_calibration(): assert len(best_parameters) == len(bounds_high) # Test that the objective function is calculated correctly - objfun = get_objective_function_value(model_config['Qobs'], best_simulation) + objfun = get_objective_function_value(model_config['Qobs'], best_simulation,best_parameters, obj_func='nse') assert objfun == best_objfun - - # Test Spotpy setup and sampler - spotSetup = spot_setup(model_config, bounds_high, bounds_low, obj_func="nse") - sampler = spotpy.algorithms.dds(spotSetup, dbname="tmp", dbformat="ram", save_sim=False) - sampler.sample(20, trials=1) - assert spotSetup.diagnostics is not None - + # Test dummy model response model_config['parameters']=[5,5,5] Qsim = dummy_model(model_config) - assert Qsim[3]==10.000 # Todo: update when xhydro is installed. + assert Qsim[3]==3500.00 From dbe412b7923f9a92f58e9c00357d3cea859e4b38 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sat, 9 Dec 2023 00:02:58 -0500 Subject: [PATCH 03/29] added hydrological modelling basis --- xhydro/hydrological_modelling.py | 37 ++++++++++++++++++++++++++++++++ xhydro/test_calibration.py | 11 ---------- 2 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 xhydro/hydrological_modelling.py delete mode 100644 xhydro/test_calibration.py diff --git a/xhydro/hydrological_modelling.py b/xhydro/hydrological_modelling.py new file mode 100644 index 00000000..13dfba19 --- /dev/null +++ b/xhydro/hydrological_modelling.py @@ -0,0 +1,37 @@ +import numpy as np + + +def hydrological_model_selector(model_config): + + if model_config['model_name'] == 'Dummy': + Qsim = dummy_model(model_config) + + # ADD OTHER MODELS HERE + + return Qsim + + +def dummy_model(model_config): + + """ + Dummy model to show the implementation we should be aiming for. Each model + will have its own required data that users can pass. We could envision a setup + where a class generates the model_config object for all model formats making it + possible to replace models on the fly + """ + + precip = model_config['precip'] + temperature = model_config['temperature'] + drainage_area = model_config['drainage_area'] + parameters = model_config['parameters'] + + Q = np.empty(len(precip)) + + for t in range(0,len(precip)): + + Q[t]= (precip[t] * parameters[0] + abs(temperature[t])*parameters[1])* parameters[2] * drainage_area + + + return Q + + diff --git a/xhydro/test_calibration.py b/xhydro/test_calibration.py deleted file mode 100644 index b2335a22..00000000 --- a/xhydro/test_calibration.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Dec 8 19:36:09 2023 - -@author: ets -""" - - - - From f46847e9e689e69c86c87d16e2c6345fbdd37d2b Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sat, 9 Dec 2023 00:05:15 -0500 Subject: [PATCH 04/29] cleaned calibration --- xhydro/calibration.py | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/xhydro/calibration.py b/xhydro/calibration.py index 51f9b26f..d5028d15 100644 --- a/xhydro/calibration.py +++ b/xhydro/calibration.py @@ -12,6 +12,7 @@ from spotpy.parameter import Uniform import spotpy import hydroeval as he +from xhydro.hydrological_modelling import hydrological_model_selector class spot_setup(object): @@ -80,32 +81,7 @@ def perform_calibration(model_config, evaluation_metric, maximize, bounds_high, #objfun = get_objective_function_value(model_config['Qobs'], Qsim, best_parameters, obj_func=evaluation_metric) return best_parameters, Qsim, bestobjf - - -def dummy_model(model_config): - - """ - Dummy model to show the implementation we should be aiming for. Each model - will have its own required data that users can pass. We could envision a setup - where a class generates the model_config object for all model formats making it - possible to replace models on the fly - """ - - precip = model_config['precip'] - temperature = model_config['temperature'] - drainage_area = model_config['drainage_area'] - parameters = model_config['parameters'] - - Q = np.empty(len(precip)) - - for t in range(0,len(precip)): - - Q[t]= (precip[t] * parameters[0] + abs(temperature[t])*parameters[1])* parameters[2] * drainage_area - - - return Q - - + def transform_flows(Qobs, Qsim, transform=None, epsilon=None): @@ -139,17 +115,6 @@ def transform_flows(Qobs, Qsim, transform=None, epsilon=None): return Qobs, Qsim - -def hydrological_model_selector(model_config): - - if model_config['model_name'] == 'Dummy': - Qsim = dummy_model(model_config) - - # ADD OTHER MODELS HERE - - return Qsim - - def get_objective_function_value(Qobs, Qsim, params, transform=None, epsilon=None, obj_func=None): # Transform flows if needed From 8ae97e80b07436797a8d772b2798487753bbd719 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Tue, 12 Dec 2023 21:30:07 -0500 Subject: [PATCH 05/29] Documented calibration and hydromodel packages --- tests/test_calibration.py | 17 +- xhydro/calibration.py | 266 ++++++++++++++++++++++++++----- xhydro/hydrological_modelling.py | 71 +++++++-- 3 files changed, 294 insertions(+), 60 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 4ec08f1f..8229916f 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,11 +1,18 @@ +""" +Import packages +""" import pytest -from xhydro.calibration import (perform_calibration, - get_objective_function_value, - dummy_model, - spot_setup) import numpy as np - +from xhydro.hydrological_modelling import dummy_model +from xhydro.calibration import (perform_calibration, + get_objective_function_value + ) + +""" +Test suite for the calibration algorithm in calibration.py. +Also tests the dummy model implementation. +""" def test_spotpy_calibration(): diff --git a/xhydro/calibration.py b/xhydro/calibration.py index d5028d15..594d19e3 100644 --- a/xhydro/calibration.py +++ b/xhydro/calibration.py @@ -1,100 +1,270 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- """ Created on Fri Dec 8 16:48:34 2023 -@author: ets +@author: Richard Arsenault """ +""" +This package contains the main framework for hydrological model calibration. It +uses the spotpy calibration package applied on a "model_config" object. This +object is meant to be a container that can be used as needed by any hydrologic +model. For example, it can store datasets directly, paths to datasets (nc files +or other), csv files, basically anything that can be stored in a dictionary. + +It then becomes the user's responsibility to ensure that required data for a +given model be provided in the model_config object both in the data preparation +stage and in the hydrological model implementation. This can be addressed by +a set of pre-defined codes for given model structures. + +For example, for GR4J, only small datasets are required and can be stored +directly in the model_config dictionary. However, for Hydrotel or Raven models, +maybe it is better to pass paths to netcdf files which can be passed to the +models. This will require pre- and post-processing, but this can easily be +handled at the stage where we create a hydrological model and prepare the data. + +The calibration aspect then becomes trivial: + + 1. A model_config object is passed to the calibrator. + 2. Lower and upper bounds for calibration parameters are defined and passed + 3. An objective function, optimizer and hyperparameters are also passed. + 4. The calibrator uses this information to develop parameter sets that are + then passed as inputs to the "model_config" object. + 5. The calibrator launches the desired hydrological model with the + model_config object (now containing the parameter set) as input. + 6. The appropriate hydrological model function then parses "model_config", + takes the parameters and required data, launches a simulation and + returns simulated flow (Qsim). + 7. The calibration package then compares Qobs and Qsim and computes the + objective function value, and returns this to the sampler that will then + repeat the process to find optimal parameter sets. + 8. The code returns the best parameter set, objective function value, and + we also return the simulated streamflow on the calibration period for + user convenience. + +This system has the advantage of being extremely flexible, robust, and +efficient as all data can be either in-memory or only the reference to the +required datasets on disk is passed around the callstack. + +Currently, the model_config object has 3 mandatory keywords for the package to +run correctly in all instances: + + - model_config["Qobs"]: Contains the observed streamflow used as the + calibration target. + + - model_config["model_name"]: Contains a string refering to the + hydrological model to be run. + + - model_config["parameters"]: While not necessary to provide this, it is + a reserved keyword used by the optimizer. + +Any comments are welcome! +""" + +""" +Import packages +""" +import spotpy import numpy as np +import hydroeval as he +from xhydro.hydrological_modelling import hydrological_model_selector from spotpy.objectivefunctions import rmse from spotpy import analyser from spotpy.parameter import Uniform -import spotpy -import hydroeval as he -from xhydro.hydrological_modelling import hydrological_model_selector + class spot_setup(object): - - + """ + This class is used to create the spotpy calibration system that is used for + hydrological model calibration. + """ + def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): + + """ + The initialization of the spot_setup object includes a generic + "model_config" object containing hydrological modelling data required, + low and high parameter bounds, as well as an objective function. + Depending on the objective function, either spotpy or hydroeval will + compute the value, since some functions are found only in one package. + accepted obj_func values: + Computed by hydroeval: ["nse", "kge", "kgeprime", "kgenp", "rmse", + "mare", "pbias"] + Computed by spotpy: ["bias","nashsutcliffe", "lognashsutcliffe", + "log_p", "correlationcoefficient", "rsquared", + "mse", "mae", "rrmse", "agreementindex", + "covariance", "decomposed_mse", "rsr", + "volume_error"] + """ + + # Gather the model_config dictionary and obj_func string. self.model_config = model_config self.obj_func = obj_func - # Create the sampler for each parameter based on the bounds - self.parameters=[Uniform("param"+str(i), bounds_low[i], bounds_high[i]) for i in range(0,len(bounds_high))] + self.parameters=[ + Uniform( + "param"+str(i), bounds_low[i], bounds_high[i] + ) + for i in range(0,len(bounds_high)) + ] def simulation(self, x): - self.model_config['parameters'] = x - + """ + This is where the optimizer generates a parameter set from within the + given bounds and generates the simulation results. We add the parameter + "x" that is generated by spotpy to the model_config object at the + reserved keyword "parameters". + """ + self.model_config.update({'parameters': x}) + + # Run the model and return Qsim, with model_config containing the + # tested parameter set. return hydrological_model_selector(self.model_config) def evaluation(self): + """ + Here is where we get the observed streamflow and make it available to + compare the simulation and compute an objective function. It has to be + in the Qobs keyword, although with some small changes + model_config['Qobs'] could be a string to a file. Probably more + efficient to load it into memory during preprocessing anyways to + prevent recurring input/output and associated overhead. Currently, the + package supposes that Qobs and Qsim have the same length, but this can + be changed in the model_config parameterization and adding conditions + here. + """ return self.model_config['Qobs'] - def objectivefunction(self, simulation, evaluation, params=None, transform=None, epsilon=None): + def objectivefunction(self, simulation, evaluation, params=None, + transform=None, epsilon=None + ): - return get_objective_function_value(evaluation, simulation, params, transform, epsilon, self.obj_func) + """ + This function is where spotpy computes the objective function. Note + that there are other possible inputs: + - params: if we force a parameter set, this function can be used to + compute the objective function easily. + - transform: Possibility to transform flows before computing the + objective function. "inv" takes 1/Q, "log" takes log(Q) and + "sqrt" takes sqrt(Q) before computing. + - epsilon: a small value to adjust flows before transforming to + prevent log(0) or 1/0 computations when flow is zero. + """ + return get_objective_function_value(evaluation, simulation, params, + transform, epsilon, self.obj_func + ) -def perform_calibration(model_config, evaluation_metric, maximize, bounds_high, bounds_low, evaluations, algorithm="DDS"): +def perform_calibration(model_config, evaluation_metric, maximize, bounds_high, + bounds_low, evaluations, algorithm="DDS" + ): + """ + TODO: maximize is not automatically defined. We should remove this option + and force the algo/obj_fun to be coordinated to return the best value. """ - 'model_config' is the model configuration object (dict type) that contains all info to run the model. - The model function called to run this model should always use this object and read-in data it requires. - It will be up to the user to provide the data that the model requires. - It should also contain parameters, even NaN or dummy ones, and the optimizer will update these. + + """ + This is the entrypoint for the model calibration. After setting-up the + model_config object and other arguments, calling "perform_calibration" will + return the optimal parameter set, objective function value and simulated + flows on the calibration period. + + Inputs are as follows: + + - 'model_config' is the model configuration object (dict type) that + contains all info to run the model. The model function called to run this + model should always use this object and read-in data it requires. It will + be up to the user to provide the data that the model requires. + + - evaluation_metric: The objective function (string) used for calibrating. + + - maximize: a Boolean flag to indicate that the objective function should + be maximized (ex: "nse", "kge") instead of minimized (ex: "rmse", "mae"). + + - bounds_high, bounds_low: high and low bounds respectively for the model + parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters + to calibrate. These are in np.array type. + + - evaluations: Maximum number of model evaluations (calibration budget) to + perform before stopping the calibration process (int). + + - algorithm: The optimization algorithm to use. Currently, "DDS" and + "SCEUA" are available, but it would be trivial to add more. + """ - - spotpy_setup = spot_setup(model_config, bounds_high=bounds_high, bounds_low=bounds_low, obj_func=evaluation_metric) + # Setup the spotpy object to prepare the calibration + spotpy_setup = spot_setup(model_config, bounds_high=bounds_high, + bounds_low=bounds_low, obj_func=evaluation_metric + ) + + # Select an optimization algorithm and parameterize it, then run the + # optimization process. if algorithm == "DDS": - sampler = spotpy.algorithms.dds(spotpy_setup, dbname="DDS_optim", dbformat='ram', save_sim=False) + sampler = spotpy.algorithms.dds(spotpy_setup, dbname="DDS_optim", + dbformat='ram', save_sim=False + ) sampler.sample(evaluations, trials=1) elif algorithm == "SCEUA": - sampler = spotpy.algorithms.sceua(spotpy_setup, dbname="SCEUA_optim", dbformat='ram', save_sim=False) + sampler = spotpy.algorithms.sceua(spotpy_setup, dbname="SCEUA_optim", + dbformat='ram', save_sim=False + ) sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) + # Gather optimization results results = sampler.getdata() - best_parameters = analyser.get_best_parameterset(results,like_index=1, maximize=maximize) - best_parameters=[best_parameters[0][i] for i in range(0,len(best_parameters[0]))] + # Get the best parameter set + best_parameters = analyser.get_best_parameterset( + results,like_index=1, + maximize=maximize + ) + best_parameters=[ + best_parameters[0][i] for i in range(0,len(best_parameters[0])) + ] + # Get the best objective function as well depending if maximized or + # minimized if maximize==True: bestindex, bestobjf = analyser.get_maxlikeindex(results) else: bestindex, bestobjf = analyser.get_minlikeindex(results) - best_model_run = results[bestindex] - - model_config['parameters'] = best_parameters + # Update the parameter set to put the best parameters in model_config... + model_config.update({'parameters': best_parameters}) + #... which can be used to run the hydrological model and get the best Qsim. Qsim = hydrological_model_selector(model_config) - #objfun = get_objective_function_value(model_config['Qobs'], Qsim, best_parameters, obj_func=evaluation_metric) - + # Return the best parameters, Qsim and best objective function value. return best_parameters, Qsim, bestobjf - + def transform_flows(Qobs, Qsim, transform=None, epsilon=None): + """ + This subset of code is taken from the hydroeval package: + https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py - # This subset of code is taken from the hydroeval package: - # https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py - # - # generate a subset of simulation and evaluation series where evaluation - # data is available + It is used to transform flows such that the objective function is computed + on a transformed flow metric rather than on the original units of flow + (ex: inverse, log-transformed, square-root) + """ + + #Generate a subset of simulation and evaluation series where evaluation + #data is available Qsim = Qsim[~np.isnan(Qobs[:, 0]), :] Qobs = Qobs[~np.isnan(Qobs[:, 0]), :] - # transform the flow series if required + # Transform the flow series if required if transform == 'log': # log transformation if not epsilon: # determine an epsilon value to avoid zero divide @@ -112,11 +282,20 @@ def transform_flows(Qobs, Qsim, transform=None, epsilon=None): elif transform == 'sqrt': # square root transformation Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) + # Return the flows after transformation (or original if no transform) return Qobs, Qsim -def get_objective_function_value(Qobs, Qsim, params, transform=None, epsilon=None, obj_func=None): +def get_objective_function_value(Qobs, Qsim, params, transform=None, + epsilon=None, obj_func=None + ): + """ + This code returns the objective function as determined by the user, after + transforming (if required) using the Qobs provided by the user and the Qsim + simulated by the model controled by spotpy during the calibration process. + """ + # Transform flows if needed if transform: Qobs, Qsim = transform_flows(Qobs, Qsim, transform, epsilon) @@ -125,11 +304,16 @@ def get_objective_function_value(Qobs, Qsim, params, transform=None, epsilon=Non if not obj_func: like = rmse(Qobs,Qsim) - # Popular ones we can use hydroeval package - elif obj_func.lower() in ['nse','kge','kgeprime','kgenp','rmse','mare','pbias']: + # Popular ones we can use the hydroeval package + elif obj_func.lower() in [ + 'nse','kge','kgeprime','kgenp','rmse','mare','pbias' + ]: # Get the correct function from the package - like = he.evaluator(getattr(he,obj_func.lower()), simulations=Qsim, evaluation=Qobs) + like = he.evaluator( + getattr(he,obj_func.lower()), simulations=Qsim, + evaluation=Qobs + ) # In neither of these cases, use the obj_func method from spotpy else: diff --git a/xhydro/hydrological_modelling.py b/xhydro/hydrological_modelling.py index 13dfba19..516d980d 100644 --- a/xhydro/hydrological_modelling.py +++ b/xhydro/hydrological_modelling.py @@ -1,13 +1,57 @@ import numpy as np +""" +This collection of functions should serve as the main entry point for +hydrological modelling. The entire framework is based on the "model_config" +object. This object is meant to be a container that can be used as needed by +any hydrologic model. For example, it can store datasets directly, paths to +datasets (nc files or other), csv files, basically anything that can be stored +in a dictionary. + +It then becomes the user's responsibility to ensure that required data for a +given model be provided in the model_config object both in the data preparation +stage and in the hydrological model implementation. This can be addressed by +a set of pre-defined codes for given model structures. This present package +(hydrological_modelling.py) should contain the logic to: + + 1. From the model_config["model_name"] key, select the appropriate function + (hydrological model) to run. + 2. Pass the model_config object to the correct hydrological modelling + function. + 3. Parse the model_config object to exract required data for the given + model, such as: + - parameters + - meteorological data + - paths to input files + - catchment characteristics as required + 4. Run the hydrological model with the given data + 5. Return the streamflow (Qsim). + +This will make it very easy to add hydrological models, no matter their +complexity. It will also make it easy for newer Python users to implement +models as the logic is clearly exposed and does not make use of complex classes +or other dynamic objects. Models can be added here, and a specific routine +should also be defined to produce the required model_config for each model. + +Once this is accomplished, running the model from a model_config object becomes +trivial and allows for easy calibration, regionalization, analysis and any +other type of interaction. +""" def hydrological_model_selector(model_config): + """ + This is the main hydrological model selector. This is the code that looks + at the "model_config["model_name"]" keyword and calls the appropriate + hydrological model function. + """ if model_config['model_name'] == 'Dummy': Qsim = dummy_model(model_config) - + + elif model_config['model_name'] == "ADD_OTHER_HERE": # ADD OTHER MODELS HERE - + Qsim=0; + return Qsim @@ -15,23 +59,22 @@ def dummy_model(model_config): """ Dummy model to show the implementation we should be aiming for. Each model - will have its own required data that users can pass. We could envision a setup - where a class generates the model_config object for all model formats making it - possible to replace models on the fly + will have its own required data that users can pass. """ + # Parse the model_config object to extract required information precip = model_config['precip'] temperature = model_config['temperature'] - drainage_area = model_config['drainage_area'] - parameters = model_config['parameters'] - - Q = np.empty(len(precip)) + area = model_config['drainage_area'] + X = model_config['parameters'] + # Run the dummy model using these data. Keeping it simple to calculate by + # hand to ensure the calibration algorithm is working correctly and data + # are handled correctly + Qsim = np.empty(len(precip)) for t in range(0,len(precip)): - - Q[t]= (precip[t] * parameters[0] + abs(temperature[t])*parameters[1])* parameters[2] * drainage_area - - - return Q + Qsim[t]= (precip[t] * X[0] + abs(temperature[t])*X[1])* X[2] * area + + return Qsim From 4553a95fc44588e2e577e805ec232ac181986616 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 02:40:04 +0000 Subject: [PATCH 06/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_calibration.py | 58 ++++--- xhydro/calibration.py | 282 +++++++++++++++---------------- xhydro/hydrological_modelling.py | 65 ++++--- 3 files changed, 203 insertions(+), 202 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 8229916f..c2178b2e 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,49 +1,53 @@ """ Import packages """ +import numpy as np import pytest -import numpy as np +from xhydro.calibration import get_objective_function_value, perform_calibration from xhydro.hydrological_modelling import dummy_model -from xhydro.calibration import (perform_calibration, - get_objective_function_value - ) - + """ -Test suite for the calibration algorithm in calibration.py. +Test suite for the calibration algorithm in calibration.py. Also tests the dummy model implementation. """ + def test_spotpy_calibration(): - """ Make sure the calibration works for a few test cases """ - bounds_low = np.array([0,0,0]) - bounds_high = np.array([10,10,10]) - - model_config={ - 'precip':np.array([10,11,12,13,14,15]), - 'temperature':np.array([10,3,-5,1,15,0]), - 'Qobs':np.array([120,130,140,150,160,170]), - 'drainage_area':np.array([10]), - 'model_name':'Dummy', + bounds_low = np.array([0, 0, 0]) + bounds_high = np.array([10, 10, 10]) + + model_config = { + "precip": np.array([10, 11, 12, 13, 14, 15]), + "temperature": np.array([10, 3, -5, 1, 15, 0]), + "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "drainage_area": np.array([10]), + "model_name": "Dummy", } - - best_parameters, best_simulation, best_objfun = perform_calibration(model_config, 'nse', maximize = True, bounds_low=bounds_low, bounds_high=bounds_high, evaluations=1000, algorithm="DDS") + + best_parameters, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + maximize=True, + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="DDS", + ) # Test that the results have the same size as expected (number of parameters) assert len(best_parameters) == len(bounds_high) - + # Test that the objective function is calculated correctly - objfun = get_objective_function_value(model_config['Qobs'], best_simulation,best_parameters, obj_func='nse') + objfun = get_objective_function_value( + model_config["Qobs"], best_simulation, best_parameters, obj_func="nse" + ) assert objfun == best_objfun - + # Test dummy model response - model_config['parameters']=[5,5,5] + model_config["parameters"] = [5, 5, 5] Qsim = dummy_model(model_config) - assert Qsim[3]==3500.00 - - - - \ No newline at end of file + assert Qsim[3] == 3500.00 diff --git a/xhydro/calibration.py b/xhydro/calibration.py index 594d19e3..92b7c575 100644 --- a/xhydro/calibration.py +++ b/xhydro/calibration.py @@ -5,319 +5,319 @@ """ """ -This package contains the main framework for hydrological model calibration. It -uses the spotpy calibration package applied on a "model_config" object. This +This package contains the main framework for hydrological model calibration. It +uses the spotpy calibration package applied on a "model_config" object. This object is meant to be a container that can be used as needed by any hydrologic model. For example, it can store datasets directly, paths to datasets (nc files -or other), csv files, basically anything that can be stored in a dictionary. +or other), csv files, basically anything that can be stored in a dictionary. -It then becomes the user's responsibility to ensure that required data for a +It then becomes the user's responsibility to ensure that required data for a given model be provided in the model_config object both in the data preparation stage and in the hydrological model implementation. This can be addressed by a set of pre-defined codes for given model structures. -For example, for GR4J, only small datasets are required and can be stored +For example, for GR4J, only small datasets are required and can be stored directly in the model_config dictionary. However, for Hydrotel or Raven models, -maybe it is better to pass paths to netcdf files which can be passed to the +maybe it is better to pass paths to netcdf files which can be passed to the models. This will require pre- and post-processing, but this can easily be handled at the stage where we create a hydrological model and prepare the data. The calibration aspect then becomes trivial: - - 1. A model_config object is passed to the calibrator. + + 1. A model_config object is passed to the calibrator. 2. Lower and upper bounds for calibration parameters are defined and passed 3. An objective function, optimizer and hyperparameters are also passed. 4. The calibrator uses this information to develop parameter sets that are - then passed as inputs to the "model_config" object. + then passed as inputs to the "model_config" object. 5. The calibrator launches the desired hydrological model with the model_config object (now containing the parameter set) as input. 6. The appropriate hydrological model function then parses "model_config", - takes the parameters and required data, launches a simulation and + takes the parameters and required data, launches a simulation and returns simulated flow (Qsim). - 7. The calibration package then compares Qobs and Qsim and computes the + 7. The calibration package then compares Qobs and Qsim and computes the objective function value, and returns this to the sampler that will then repeat the process to find optimal parameter sets. - 8. The code returns the best parameter set, objective function value, and - we also return the simulated streamflow on the calibration period for + 8. The code returns the best parameter set, objective function value, and + we also return the simulated streamflow on the calibration period for user convenience. - + This system has the advantage of being extremely flexible, robust, and efficient as all data can be either in-memory or only the reference to the required datasets on disk is passed around the callstack. Currently, the model_config object has 3 mandatory keywords for the package to run correctly in all instances: - - - model_config["Qobs"]: Contains the observed streamflow used as the + + - model_config["Qobs"]: Contains the observed streamflow used as the calibration target. - - - model_config["model_name"]: Contains a string refering to the + + - model_config["model_name"]: Contains a string refering to the hydrological model to be run. - + - model_config["parameters"]: While not necessary to provide this, it is a reserved keyword used by the optimizer. - + Any comments are welcome! """ """ Import packages """ -import spotpy -import numpy as np import hydroeval as he -from xhydro.hydrological_modelling import hydrological_model_selector -from spotpy.objectivefunctions import rmse +import numpy as np +import spotpy from spotpy import analyser +from spotpy.objectivefunctions import rmse from spotpy.parameter import Uniform +from xhydro.hydrological_modelling import hydrological_model_selector + -class spot_setup(object): +class spot_setup: """ - This class is used to create the spotpy calibration system that is used for + This class is used to create the spotpy calibration system that is used for hydrological model calibration. """ - + def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): - """ The initialization of the spot_setup object includes a generic "model_config" object containing hydrological modelling data required, - low and high parameter bounds, as well as an objective function. - Depending on the objective function, either spotpy or hydroeval will + low and high parameter bounds, as well as an objective function. + Depending on the objective function, either spotpy or hydroeval will compute the value, since some functions are found only in one package. - + accepted obj_func values: Computed by hydroeval: ["nse", "kge", "kgeprime", "kgenp", "rmse", "mare", "pbias"] Computed by spotpy: ["bias","nashsutcliffe", "lognashsutcliffe", "log_p", "correlationcoefficient", "rsquared", - "mse", "mae", "rrmse", "agreementindex", + "mse", "mae", "rrmse", "agreementindex", "covariance", "decomposed_mse", "rsr", "volume_error"] """ - + # Gather the model_config dictionary and obj_func string. self.model_config = model_config self.obj_func = obj_func - + # Create the sampler for each parameter based on the bounds - self.parameters=[ - Uniform( - "param"+str(i), bounds_low[i], bounds_high[i] - ) - for i in range(0,len(bounds_high)) - ] - + self.parameters = [ + Uniform("param" + str(i), bounds_low[i], bounds_high[i]) + for i in range(0, len(bounds_high)) + ] + def simulation(self, x): - """ - This is where the optimizer generates a parameter set from within the + This is where the optimizer generates a parameter set from within the given bounds and generates the simulation results. We add the parameter - "x" that is generated by spotpy to the model_config object at the + "x" that is generated by spotpy to the model_config object at the reserved keyword "parameters". """ - self.model_config.update({'parameters': x}) - + self.model_config.update({"parameters": x}) + # Run the model and return Qsim, with model_config containing the # tested parameter set. return hydrological_model_selector(self.model_config) - - + def evaluation(self): - """ - Here is where we get the observed streamflow and make it available to + Here is where we get the observed streamflow and make it available to compare the simulation and compute an objective function. It has to be - in the Qobs keyword, although with some small changes + in the Qobs keyword, although with some small changes model_config['Qobs'] could be a string to a file. Probably more efficient to load it into memory during preprocessing anyways to prevent recurring input/output and associated overhead. Currently, the - package supposes that Qobs and Qsim have the same length, but this can + package supposes that Qobs and Qsim have the same length, but this can be changed in the model_config parameterization and adding conditions here. """ - return self.model_config['Qobs'] - - - def objectivefunction(self, simulation, evaluation, params=None, - transform=None, epsilon=None - ): - + return self.model_config["Qobs"] + + def objectivefunction( + self, simulation, evaluation, params=None, transform=None, epsilon=None + ): """ This function is where spotpy computes the objective function. Note - that there are other possible inputs: + that there are other possible inputs: - params: if we force a parameter set, this function can be used to compute the objective function easily. - - transform: Possibility to transform flows before computing the + - transform: Possibility to transform flows before computing the objective function. "inv" takes 1/Q, "log" takes log(Q) and "sqrt" takes sqrt(Q) before computing. - - epsilon: a small value to adjust flows before transforming to + - epsilon: a small value to adjust flows before transforming to prevent log(0) or 1/0 computations when flow is zero. """ - return get_objective_function_value(evaluation, simulation, params, - transform, epsilon, self.obj_func - ) - - -def perform_calibration(model_config, evaluation_metric, maximize, bounds_high, - bounds_low, evaluations, algorithm="DDS" - ): - - """ + return get_objective_function_value( + evaluation, simulation, params, transform, epsilon, self.obj_func + ) + + +def perform_calibration( + model_config, + evaluation_metric, + maximize, + bounds_high, + bounds_low, + evaluations, + algorithm="DDS", +): + """ TODO: maximize is not automatically defined. We should remove this option and force the algo/obj_fun to be coordinated to return the best value. """ - + """ - This is the entrypoint for the model calibration. After setting-up the + This is the entrypoint for the model calibration. After setting-up the model_config object and other arguments, calling "perform_calibration" will return the optimal parameter set, objective function value and simulated flows on the calibration period. - + Inputs are as follows: - - - 'model_config' is the model configuration object (dict type) that + + - 'model_config' is the model configuration object (dict type) that contains all info to run the model. The model function called to run this model should always use this object and read-in data it requires. It will be up to the user to provide the data that the model requires. - + - evaluation_metric: The objective function (string) used for calibrating. - + - maximize: a Boolean flag to indicate that the objective function should be maximized (ex: "nse", "kge") instead of minimized (ex: "rmse", "mae"). - + - bounds_high, bounds_low: high and low bounds respectively for the model parameters to be calibrated. Spotpy will sample parameter sets from within these bounds. The size must be equal to the number of parameters to calibrate. These are in np.array type. - + - evaluations: Maximum number of model evaluations (calibration budget) to - perform before stopping the calibration process (int). - + perform before stopping the calibration process (int). + - algorithm: The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but it would be trivial to add more. - + """ - + # Setup the spotpy object to prepare the calibration - spotpy_setup = spot_setup(model_config, bounds_high=bounds_high, - bounds_low=bounds_low, obj_func=evaluation_metric - ) - + spotpy_setup = spot_setup( + model_config, + bounds_high=bounds_high, + bounds_low=bounds_low, + obj_func=evaluation_metric, + ) + # Select an optimization algorithm and parameterize it, then run the # optimization process. if algorithm == "DDS": - sampler = spotpy.algorithms.dds(spotpy_setup, dbname="DDS_optim", - dbformat='ram', save_sim=False - ) + sampler = spotpy.algorithms.dds( + spotpy_setup, dbname="DDS_optim", dbformat="ram", save_sim=False + ) sampler.sample(evaluations, trials=1) - + elif algorithm == "SCEUA": - sampler = spotpy.algorithms.sceua(spotpy_setup, dbname="SCEUA_optim", - dbformat='ram', save_sim=False - ) + sampler = spotpy.algorithms.sceua( + spotpy_setup, dbname="SCEUA_optim", dbformat="ram", save_sim=False + ) sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) - + # Gather optimization results results = sampler.getdata() - + # Get the best parameter set best_parameters = analyser.get_best_parameterset( - results,like_index=1, - maximize=maximize - ) - best_parameters=[ - best_parameters[0][i] for i in range(0,len(best_parameters[0])) - ] + results, like_index=1, maximize=maximize + ) + best_parameters = [best_parameters[0][i] for i in range(0, len(best_parameters[0]))] - # Get the best objective function as well depending if maximized or + # Get the best objective function as well depending if maximized or # minimized - if maximize==True: + if maximize == True: bestindex, bestobjf = analyser.get_maxlikeindex(results) else: bestindex, bestobjf = analyser.get_minlikeindex(results) - + # Update the parameter set to put the best parameters in model_config... - model_config.update({'parameters': best_parameters}) - - #... which can be used to run the hydrological model and get the best Qsim. + model_config.update({"parameters": best_parameters}) + + # ... which can be used to run the hydrological model and get the best Qsim. Qsim = hydrological_model_selector(model_config) - + # Return the best parameters, Qsim and best objective function value. return best_parameters, Qsim, bestobjf - - + def transform_flows(Qobs, Qsim, transform=None, epsilon=None): """ This subset of code is taken from the hydroeval package: https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py - - It is used to transform flows such that the objective function is computed + + It is used to transform flows such that the objective function is computed on a transformed flow metric rather than on the original units of flow (ex: inverse, log-transformed, square-root) """ - #Generate a subset of simulation and evaluation series where evaluation - #data is available + # Generate a subset of simulation and evaluation series where evaluation + # data is available Qsim = Qsim[~np.isnan(Qobs[:, 0]), :] Qobs = Qobs[~np.isnan(Qobs[:, 0]), :] - + # Transform the flow series if required - if transform == 'log': # log transformation + if transform == "log": # log transformation if not epsilon: # determine an epsilon value to avoid zero divide - # (following recommendation in Pushpalatha et al. (2012)) + # (following recommendation in Pushpalatha et al. (2012)) epsilon = 0.01 * np.mean(Qobs) Qobs, Qsim = np.log(Qobs + epsilon), np.log(Qsim + epsilon) - elif transform == 'inv': # inverse transformation - if not epsilon: + elif transform == "inv": # inverse transformation + if not epsilon: # determine an epsilon value to avoid zero divide # (following recommendation in Pushpalatha et al. (2012)) epsilon = 0.01 * np.mean(Qobs) Qobs, Qsim = 1.0 / (Qobs + epsilon), 1.0 / (Qsim + epsilon) - - elif transform == 'sqrt': # square root transformation + + elif transform == "sqrt": # square root transformation Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) - + # Return the flows after transformation (or original if no transform) return Qobs, Qsim -def get_objective_function_value(Qobs, Qsim, params, transform=None, - epsilon=None, obj_func=None - ): - +def get_objective_function_value( + Qobs, Qsim, params, transform=None, epsilon=None, obj_func=None +): """ This code returns the objective function as determined by the user, after transforming (if required) using the Qobs provided by the user and the Qsim simulated by the model controled by spotpy during the calibration process. """ - + # Transform flows if needed if transform: Qobs, Qsim = transform_flows(Qobs, Qsim, transform, epsilon) # Default objective function if none provided by the user if not obj_func: - like = rmse(Qobs,Qsim) - + like = rmse(Qobs, Qsim) + # Popular ones we can use the hydroeval package elif obj_func.lower() in [ - 'nse','kge','kgeprime','kgenp','rmse','mare','pbias' - ]: - + "nse", + "kge", + "kgeprime", + "kgenp", + "rmse", + "mare", + "pbias", + ]: # Get the correct function from the package like = he.evaluator( - getattr(he,obj_func.lower()), simulations=Qsim, - evaluation=Qobs - ) - + getattr(he, obj_func.lower()), simulations=Qsim, evaluation=Qobs + ) + # In neither of these cases, use the obj_func method from spotpy else: like = obj_func(Qobs, Qsim) - + # Return results - return like \ No newline at end of file + return like diff --git a/xhydro/hydrological_modelling.py b/xhydro/hydrological_modelling.py index 516d980d..17b1b56c 100644 --- a/xhydro/hydrological_modelling.py +++ b/xhydro/hydrological_modelling.py @@ -1,22 +1,22 @@ import numpy as np -""" -This collection of functions should serve as the main entry point for -hydrological modelling. The entire framework is based on the "model_config" -object. This object is meant to be a container that can be used as needed by -any hydrologic model. For example, it can store datasets directly, paths to -datasets (nc files or other), csv files, basically anything that can be stored -in a dictionary. +""" +This collection of functions should serve as the main entry point for +hydrological modelling. The entire framework is based on the "model_config" +object. This object is meant to be a container that can be used as needed by +any hydrologic model. For example, it can store datasets directly, paths to +datasets (nc files or other), csv files, basically anything that can be stored +in a dictionary. -It then becomes the user's responsibility to ensure that required data for a +It then becomes the user's responsibility to ensure that required data for a given model be provided in the model_config object both in the data preparation stage and in the hydrological model implementation. This can be addressed by -a set of pre-defined codes for given model structures. This present package +a set of pre-defined codes for given model structures. This present package (hydrological_modelling.py) should contain the logic to: - + 1. From the model_config["model_name"] key, select the appropriate function (hydrological model) to run. - 2. Pass the model_config object to the correct hydrological modelling + 2. Pass the model_config object to the correct hydrological modelling function. 3. Parse the model_config object to exract required data for the given model, such as: @@ -28,7 +28,7 @@ 5. Return the streamflow (Qsim). This will make it very easy to add hydrological models, no matter their -complexity. It will also make it easy for newer Python users to implement +complexity. It will also make it easy for newer Python users to implement models as the logic is clearly exposed and does not make use of complex classes or other dynamic objects. Models can be added here, and a specific routine should also be defined to produce the required model_config for each model. @@ -38,43 +38,40 @@ other type of interaction. """ -def hydrological_model_selector(model_config): +def hydrological_model_selector(model_config): """ This is the main hydrological model selector. This is the code that looks - at the "model_config["model_name"]" keyword and calls the appropriate + at the "model_config["model_name"]" keyword and calls the appropriate hydrological model function. """ - if model_config['model_name'] == 'Dummy': + if model_config["model_name"] == "Dummy": Qsim = dummy_model(model_config) - - elif model_config['model_name'] == "ADD_OTHER_HERE": + + elif model_config["model_name"] == "ADD_OTHER_HERE": # ADD OTHER MODELS HERE - Qsim=0; - + Qsim = 0 + return Qsim -def dummy_model(model_config): - +def dummy_model(model_config): """ - Dummy model to show the implementation we should be aiming for. Each model + Dummy model to show the implementation we should be aiming for. Each model will have its own required data that users can pass. """ - + # Parse the model_config object to extract required information - precip = model_config['precip'] - temperature = model_config['temperature'] - area = model_config['drainage_area'] - X = model_config['parameters'] - + precip = model_config["precip"] + temperature = model_config["temperature"] + area = model_config["drainage_area"] + X = model_config["parameters"] + # Run the dummy model using these data. Keeping it simple to calculate by - # hand to ensure the calibration algorithm is working correctly and data + # hand to ensure the calibration algorithm is working correctly and data # are handled correctly Qsim = np.empty(len(precip)) - for t in range(0,len(precip)): - Qsim[t]= (precip[t] * X[0] + abs(temperature[t])*X[1])* X[2] * area - - return Qsim - + for t in range(0, len(precip)): + Qsim[t] = (precip[t] * X[0] + abs(temperature[t]) * X[1]) * X[2] * area + return Qsim From 6f116bdaccfe3c3b96e8221191918159306cd85f Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Dec 2023 11:59:57 -0500 Subject: [PATCH 07/29] Docstring adjustments --- xhydro/calibration.py | 158 ++++++++++++++++--------------- xhydro/hydrological_modelling.py | 24 ++--- 2 files changed, 94 insertions(+), 88 deletions(-) diff --git a/xhydro/calibration.py b/xhydro/calibration.py index 92b7c575..75c0a4b3 100644 --- a/xhydro/calibration.py +++ b/xhydro/calibration.py @@ -1,10 +1,7 @@ -""" -Created on Fri Dec 8 16:48:34 2023 - -@author: Richard Arsenault -""" +# Created on Fri Dec 8 16:48:34 2023 +# @author: Richard Arsenault +"""Calibration package for hydrological models. -""" This package contains the main framework for hydrological model calibration. It uses the spotpy calibration package applied on a "model_config" object. This object is meant to be a container that can be used as needed by any hydrologic @@ -51,7 +48,7 @@ - model_config["Qobs"]: Contains the observed streamflow used as the calibration target. - - model_config["model_name"]: Contains a string refering to the + - model_config["model_name"]: Contains a string referring to the hydrological model to be run. - model_config["parameters"]: While not necessary to provide this, it is @@ -59,10 +56,9 @@ Any comments are welcome! """ +from typing import Optional -""" -Import packages -""" +# Import packages import hydroeval as he import numpy as np import spotpy @@ -74,29 +70,24 @@ class spot_setup: - """ - This class is used to create the spotpy calibration system that is used for - hydrological model calibration. - """ + """Create the spotpy calibration system that is used for hydrological model calibration.""" def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): - """ + """Initialize the spot_setup object. + The initialization of the spot_setup object includes a generic "model_config" object containing hydrological modelling data required, low and high parameter bounds, as well as an objective function. Depending on the objective function, either spotpy or hydroeval will compute the value, since some functions are found only in one package. - accepted obj_func values: - Computed by hydroeval: ["nse", "kge", "kgeprime", "kgenp", "rmse", - "mare", "pbias"] - Computed by spotpy: ["bias","nashsutcliffe", "lognashsutcliffe", - "log_p", "correlationcoefficient", "rsquared", - "mse", "mae", "rrmse", "agreementindex", - "covariance", "decomposed_mse", "rsr", - "volume_error"] + Notes + ----- + Accepted obj_func values: + Computed by hydroeval: ["nse", "kge", "kgeprime", "kgenp", "rmse", "mare", "pbias"] + Computed by spotpy: ["bias","nashsutcliffe", "lognashsutcliffe", "log_p", "correlationcoefficient", + "rsquared", "mse", "mae", "rrmse", "agreementindex","covariance", "decomposed_mse", "rsr", "volume_error"] """ - # Gather the model_config dictionary and obj_func string. self.model_config = model_config self.obj_func = obj_func @@ -108,7 +99,8 @@ def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): ] def simulation(self, x): - """ + """Simulation function for spotpy. + This is where the optimizer generates a parameter set from within the given bounds and generates the simulation results. We add the parameter "x" that is generated by spotpy to the model_config object at the @@ -121,12 +113,13 @@ def simulation(self, x): return hydrological_model_selector(self.model_config) def evaluation(self): - """ + """Evaluation function for spotpy. + Here is where we get the observed streamflow and make it available to compare the simulation and compute an objective function. It has to be in the Qobs keyword, although with some small changes model_config['Qobs'] could be a string to a file. Probably more - efficient to load it into memory during preprocessing anyways to + efficient to load it into memory during preprocessing anyway to prevent recurring input/output and associated overhead. Currently, the package supposes that Qobs and Qsim have the same length, but this can be changed in the model_config parameterization and adding conditions @@ -135,11 +128,20 @@ def evaluation(self): return self.model_config["Qobs"] def objectivefunction( - self, simulation, evaluation, params=None, transform=None, epsilon=None + self, + simulation, + evaluation, + params=None, + transform: Optional[str] = None, + epsilon: Optional[float] = None, ): - """ - This function is where spotpy computes the objective function. Note - that there are other possible inputs: + """Objective function for spotpy. + + This function is where spotpy computes the objective function. + + Notes + ----- + There are other possible inputs: - params: if we force a parameter set, this function can be used to compute the objective function easily. - transform: Possibility to transform flows before computing the @@ -154,51 +156,47 @@ def objectivefunction( def perform_calibration( - model_config, - evaluation_metric, - maximize, - bounds_high, - bounds_low, - evaluations, - algorithm="DDS", + model_config: dict, + evaluation_metric: str, + maximize: bool, + bounds_high: np.array, + bounds_low: np.array, + evaluations: int, + algorithm: str = "DDS", ): - """ - TODO: maximize is not automatically defined. We should remove this option - and force the algo/obj_fun to be coordinated to return the best value. - """ + """Perform calibration using spotpy. - """ This is the entrypoint for the model calibration. After setting-up the model_config object and other arguments, calling "perform_calibration" will return the optimal parameter set, objective function value and simulated flows on the calibration period. - Inputs are as follows: - - - 'model_config' is the model configuration object (dict type) that - contains all info to run the model. The model function called to run this - model should always use this object and read-in data it requires. It will - be up to the user to provide the data that the model requires. - - - evaluation_metric: The objective function (string) used for calibrating. - - - maximize: a Boolean flag to indicate that the objective function should - be maximized (ex: "nse", "kge") instead of minimized (ex: "rmse", "mae"). - - - bounds_high, bounds_low: high and low bounds respectively for the model - parameters to be calibrated. Spotpy will sample parameter sets from - within these bounds. The size must be equal to the number of parameters - to calibrate. These are in np.array type. - - - evaluations: Maximum number of model evaluations (calibration budget) to - perform before stopping the calibration process (int). - - - algorithm: The optimization algorithm to use. Currently, "DDS" and - "SCEUA" are available, but it would be trivial to add more. - + Parameters + ---------- + model_config : dict + The model configuration object that contains all info to run the model. + The model function called to run this model should always use this object and read-in data it requires. + It will be up to the user to provide the data that the model requires. + evaluation_metric : str + The objective function used for calibrating. + maximize : bool + A flag to indicate that the objective function should be maximized (ex: "nse", "kge") + instead of minimized (ex: "rmse", "mae"). + bounds_high : np.array + High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + bounds_low : np.array + Low bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + evaluations : int + Maximum number of model evaluations (calibration budget) to perform before stopping the calibration process. + algorithm : str + The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but more can be easily added. """ + # TODO: maximize is not automatically defined. + # We should remove this option and force the algo/obj_fun to be coordinated to return the best value. - # Setup the spotpy object to prepare the calibration + # Set up the spotpy object to prepare the calibration spotpy_setup = spot_setup( model_config, bounds_high=bounds_high, @@ -231,7 +229,7 @@ def perform_calibration( # Get the best objective function as well depending if maximized or # minimized - if maximize == True: + if maximize: bestindex, bestobjf = analyser.get_maxlikeindex(results) else: bestindex, bestobjf = analyser.get_minlikeindex(results) @@ -247,15 +245,17 @@ def perform_calibration( def transform_flows(Qobs, Qsim, transform=None, epsilon=None): - """ - This subset of code is taken from the hydroeval package: - https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py + """Transform flows before computing the objective function. It is used to transform flows such that the objective function is computed on a transformed flow metric rather than on the original units of flow (ex: inverse, log-transformed, square-root) - """ + Notes + ----- + This subset of code is taken from the hydroeval package: + https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py + """ # Generate a subset of simulation and evaluation series where evaluation # data is available Qsim = Qsim[~np.isnan(Qobs[:, 0]), :] @@ -284,14 +284,19 @@ def transform_flows(Qobs, Qsim, transform=None, epsilon=None): def get_objective_function_value( - Qobs, Qsim, params, transform=None, epsilon=None, obj_func=None + Qobs, + Qsim, + params, + transform=None, + epsilon: Optional[float] = None, + obj_func: Optional[str] = None, ): - """ + """Get the objective function value. + This code returns the objective function as determined by the user, after transforming (if required) using the Qobs provided by the user and the Qsim - simulated by the model controled by spotpy during the calibration process. + simulated by the model controlled by spotpy during the calibration process. """ - # Transform flows if needed if transform: Qobs, Qsim = transform_flows(Qobs, Qsim, transform, epsilon) @@ -317,6 +322,7 @@ def get_objective_function_value( # In neither of these cases, use the obj_func method from spotpy else: + # FIXME: What is the type of obj_func? like = obj_func(Qobs, Qsim) # Return results diff --git a/xhydro/hydrological_modelling.py b/xhydro/hydrological_modelling.py index 17b1b56c..163cb178 100644 --- a/xhydro/hydrological_modelling.py +++ b/xhydro/hydrological_modelling.py @@ -1,6 +1,5 @@ -import numpy as np +"""Hydrological modelling framework. -""" This collection of functions should serve as the main entry point for hydrological modelling. The entire framework is based on the "model_config" object. This object is meant to be a container that can be used as needed by @@ -18,12 +17,8 @@ (hydrological model) to run. 2. Pass the model_config object to the correct hydrological modelling function. - 3. Parse the model_config object to exract required data for the given - model, such as: - - parameters - - meteorological data - - paths to input files - - catchment characteristics as required + 3. Parse the model_config object to extract required data for the given + model, such as: parameters, meteorological data, paths to input files, and catchment characteristics as required 4. Run the hydrological model with the given data 5. Return the streamflow (Qsim). @@ -34,13 +29,16 @@ should also be defined to produce the required model_config for each model. Once this is accomplished, running the model from a model_config object becomes -trivial and allows for easy calibration, regionalization, analysis and any +trivial and allows for easy calibration, regionalisation, analysis and any other type of interaction. """ +import numpy as np + def hydrological_model_selector(model_config): - """ + """Hydrological model selector. + This is the main hydrological model selector. This is the code that looks at the "model_config["model_name"]" keyword and calls the appropriate hydrological model function. @@ -51,16 +49,18 @@ def hydrological_model_selector(model_config): elif model_config["model_name"] == "ADD_OTHER_HERE": # ADD OTHER MODELS HERE Qsim = 0 + else: + raise NotImplementedError() return Qsim def dummy_model(model_config): - """ + """Dummy model. + Dummy model to show the implementation we should be aiming for. Each model will have its own required data that users can pass. """ - # Parse the model_config object to extract required information precip = model_config["precip"] temperature = model_config["temperature"] From 978d35d9cb1b575077cc705aad3bb0ea662594fc Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:32:26 -0500 Subject: [PATCH 08/29] adjust test --- tests/test_calibration.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index c2178b2e..c638e56e 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,22 +1,15 @@ -""" -Import packages -""" +"""Test suite for the calibration algorithm in calibration.py.""" + +# Also tests the dummy model implementation. + import numpy as np -import pytest from xhydro.calibration import get_objective_function_value, perform_calibration from xhydro.hydrological_modelling import dummy_model -""" -Test suite for the calibration algorithm in calibration.py. -Also tests the dummy model implementation. -""" - def test_spotpy_calibration(): - """ - Make sure the calibration works for a few test cases - """ + """Make sure the calibration works for a few test cases.""" bounds_low = np.array([0, 0, 0]) bounds_high = np.array([10, 10, 10]) From 679b6ef3bf410e03d13f5b9c8d77aad7c1313c01 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:33:20 -0500 Subject: [PATCH 09/29] remove esmpy/esmf, add spotpy --- environment-dev.yml | 3 +-- environment.yml | 3 +-- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 9d6aebb4..d2f464fd 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -5,8 +5,8 @@ dependencies: - python >=3.9,<3.12 # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - - esmpy - numpy + - spotpy - statsmodels - xarray - xclim >=0.45.0 @@ -14,7 +14,6 @@ dependencies: - pip - pip: - hydroeval - - spotpy - xdatasets # Dev - bump-my-version >=0.12.0 diff --git a/environment.yml b/environment.yml index 86662a74..7e21b4c1 100644 --- a/environment.yml +++ b/environment.yml @@ -5,8 +5,8 @@ dependencies: - python >=3.9,<3.12 # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - - esmf - numpy + - spotpy - statsmodels - xarray - xclim >=0.45.0 @@ -14,5 +14,4 @@ dependencies: - pip - pip: - hydroeval - - spotpy - xdatasets diff --git a/pyproject.toml b/pyproject.toml index d7d1e775..7b53fb9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,8 @@ classifiers = [ dynamic = ["description", "version"] dependencies = [ # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! - "esmpy", "numpy", + "spotpy", "statsmodels", "xarray", "xclim>=0.45.0", From 179f6aa4d6aa81a1a87caf33c631fe46477bb39b Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 21 Dec 2023 21:01:53 -0500 Subject: [PATCH 10/29] rewrote objective function tools and added mask/transform --- environment-dev.yml | 1 - environment.yml | 1 - tests/test_calibration.py | 25 +- xhydro/{ => modelling}/calibration.py | 239 +++--- .../{ => modelling}/hydrological_modelling.py | 0 xhydro/modelling/obj_funcs.py | 762 ++++++++++++++++++ 6 files changed, 894 insertions(+), 134 deletions(-) rename xhydro/{ => modelling}/calibration.py (61%) rename xhydro/{ => modelling}/hydrological_modelling.py (100%) create mode 100644 xhydro/modelling/obj_funcs.py diff --git a/environment-dev.yml b/environment-dev.yml index d7d01e5c..17b32528 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -13,7 +13,6 @@ dependencies: - xscen >=0.7.1 - pip - pip: - - hydroeval - xdatasets # Dev - bump-my-version >=0.12.0 diff --git a/environment.yml b/environment.yml index 7e21b4c1..fe309fe6 100644 --- a/environment.yml +++ b/environment.yml @@ -13,5 +13,4 @@ dependencies: - xscen >=0.7.1 - pip - pip: - - hydroeval - xdatasets diff --git a/tests/test_calibration.py b/tests/test_calibration.py index c638e56e..eacbbb46 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -4,8 +4,9 @@ import numpy as np -from xhydro.calibration import get_objective_function_value, perform_calibration -from xhydro.hydrological_modelling import dummy_model +from xhydro.modelling.calibration import perform_calibration +from xhydro.modelling.hydrological_modelling import dummy_model +from xhydro.modelling.obj_funcs import get_objective_function def test_spotpy_calibration(): @@ -20,27 +21,33 @@ def test_spotpy_calibration(): "drainage_area": np.array([10]), "model_name": "Dummy", } - + + mask = np.array([0, 0, 0, 0, 1, 1]) + best_parameters, best_simulation, best_objfun = perform_calibration( model_config, - "nse", - maximize=True, + "mae", bounds_low=bounds_low, bounds_high=bounds_high, evaluations=1000, algorithm="DDS", + mask=mask, ) # Test that the results have the same size as expected (number of parameters) assert len(best_parameters) == len(bounds_high) # Test that the objective function is calculated correctly - objfun = get_objective_function_value( - model_config["Qobs"], best_simulation, best_parameters, obj_func="nse" - ) + objfun = get_objective_function( + model_config["Qobs"], + best_simulation, + obj_func="mae", + mask=mask, + ) + assert objfun == best_objfun # Test dummy model response model_config["parameters"] = [5, 5, 5] Qsim = dummy_model(model_config) - assert Qsim[3] == 3500.00 + assert Qsim[3] == 3500.00 \ No newline at end of file diff --git a/xhydro/calibration.py b/xhydro/modelling/calibration.py similarity index 61% rename from xhydro/calibration.py rename to xhydro/modelling/calibration.py index 75c0a4b3..5c6feb9e 100644 --- a/xhydro/calibration.py +++ b/xhydro/modelling/calibration.py @@ -59,20 +59,32 @@ from typing import Optional # Import packages -import hydroeval as he import numpy as np import spotpy from spotpy import analyser -from spotpy.objectivefunctions import rmse from spotpy.parameter import Uniform -from xhydro.hydrological_modelling import hydrological_model_selector +from xhydro.modelling.hydrological_modelling import hydrological_model_selector +from xhydro.modelling.obj_funcs import (get_objective_function, + get_objfun_minimize_or_maximize, + get_optimizer_minimize_or_maximize, + ) class spot_setup: """Create the spotpy calibration system that is used for hydrological model calibration.""" - def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): + def __init__(self, + model_config, + bounds_high, + bounds_low, + obj_func=None, + take_negative=False, + mask=None, + transform=None, + epsilon=None, + ): + """Initialize the spot_setup object. The initialization of the spot_setup object includes a generic @@ -84,13 +96,34 @@ def __init__(self, model_config, bounds_high, bounds_low, obj_func=None): Notes ----- Accepted obj_func values: - Computed by hydroeval: ["nse", "kge", "kgeprime", "kgenp", "rmse", "mare", "pbias"] - Computed by spotpy: ["bias","nashsutcliffe", "lognashsutcliffe", "log_p", "correlationcoefficient", - "rsquared", "mse", "mae", "rrmse", "agreementindex","covariance", "decomposed_mse", "rsr", "volume_error"] + - "abs_bias" : Absolute value of the "bias" metric + - "abs_pbias": Absolute value of the "pbias" metric + - "abs_volume_error" : Absolute value of the volume_error metric + - "agreement_index": Index of agreement + - "bias" : Bias metric + - "correlation_coeff": Correlation coefficient + - "kge" : Kling Gupta Efficiency metric (2009 version) + - "kge_mod" : Kling Gupta Efficiency metric (2012 version) + - "mae": Mean Absolute Error metric + - "mare": Mean Absolute Relative Error metric + - "mse" : Mean Square Error metric + - "nse": Nash-Sutcliffe Efficiency metric + - "pbias" : Percent bias (relative bias) + - "r2" : r-squared, i.e. square of correlation_coeff. + - "rmse" : Root Mean Square Error + - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) + - "rsr" : Ratio of RMSE to standard deviation. + - "volume_error": Total volume error over the period. """ - # Gather the model_config dictionary and obj_func string. + + # Gather the model_config dictionary and obj_func string, and other + # optional arguments. self.model_config = model_config self.obj_func = obj_func + self.mask = mask + self.transform = transform + self.epsilon = epsilon + self.take_negative = take_negative # Create the sampler for each parameter based on the bounds self.parameters = [ @@ -131,9 +164,6 @@ def objectivefunction( self, simulation, evaluation, - params=None, - transform: Optional[str] = None, - epsilon: Optional[float] = None, ): """Objective function for spotpy. @@ -141,28 +171,34 @@ def objectivefunction( Notes ----- - There are other possible inputs: - - params: if we force a parameter set, this function can be used to - compute the objective function easily. - - transform: Possibility to transform flows before computing the - objective function. "inv" takes 1/Q, "log" takes log(Q) and - "sqrt" takes sqrt(Q) before computing. - - epsilon: a small value to adjust flows before transforming to - prevent log(0) or 1/0 computations when flow is zero. + The inputs are: + - model_config object (dict type) with all required information, + - simulation, observation : vectors of streamflow used to compute + the objective function """ - return get_objective_function_value( - evaluation, simulation, params, transform, epsilon, self.obj_func - ) + + obj_fun_val = get_objective_function(evaluation, + simulation, + obj_func=self.obj_func, + take_negative=self.take_negative, + mask=self.mask, + transform=self.transform, + epsilon=self.epsilon, + ) + + return obj_fun_val def perform_calibration( model_config: dict, - evaluation_metric: str, - maximize: bool, + obj_func: str, bounds_high: np.array, bounds_low: np.array, evaluations: int, algorithm: str = "DDS", + mask: np.array = None, + transform: str = None, + epsilon: float = 0.01, ): """Perform calibration using spotpy. @@ -177,11 +213,25 @@ def perform_calibration( The model configuration object that contains all info to run the model. The model function called to run this model should always use this object and read-in data it requires. It will be up to the user to provide the data that the model requires. - evaluation_metric : str - The objective function used for calibrating. - maximize : bool - A flag to indicate that the objective function should be maximized (ex: "nse", "kge") - instead of minimized (ex: "rmse", "mae"). + obj_func : str + The objective function used for calibrating. Can be any one of these: + + - "abs_bias" : Absolute value of the "bias" metric + - "abs_pbias": Absolute value of the "pbias" metric + - "abs_volume_error" : Absolute value of the volume_error metric + - "agreement_index": Index of agreement + - "correlation_coeff": Correlation coefficient + - "kge" : Kling Gupta Efficiency metric (2009 version) + - "kge_mod" : Kling Gupta Efficiency metric (2012 version) + - "mae": Mean Absolute Error metric + - "mare": Mean Absolute Relative Error metric + - "mse" : Mean Square Error metric + - "nse": Nash-Sutcliffe Efficiency metric + - "r2" : r-squared, i.e. square of correlation_coeff. + - "rmse" : Root Mean Square Error + - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) + - "rsr" : Ratio of RMSE to standard deviation. + bounds_high : np.array High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from within these bounds. The size must be equal to the number of parameters to calibrate. @@ -192,18 +242,42 @@ def perform_calibration( Maximum number of model evaluations (calibration budget) to perform before stopping the calibration process. algorithm : str The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but more can be easily added. + mask : np.array + A vector indicating which values to preserve/remove from the objective function computation. 0=remove, 1=preserve. + transform : str + The method to transform streamflow prior to computing the objective function. Can be one of: + Square root ('sqrt'), inverse ('inv'), or logarithmic ('log') transformation. + epsilon : scalar float + Used to add a small delta to observations for log and inverse transforms, to eliminate errors + caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow + times this value of epsilon. """ - # TODO: maximize is not automatically defined. - # We should remove this option and force the algo/obj_fun to be coordinated to return the best value. + # Get objective function and algo optimal convregence direction. Necessary + # to ensure that the algorithm is optimizing in the correct direction + # (maximizing or minimizing). This code determines the required direction + # for the objective function and the working direction of the algorithm. + of_maximize = get_objfun_minimize_or_maximize(obj_func) + algo_maximize = get_optimizer_minimize_or_maximize(algorithm) + + # They are not working in the same direction. Take the negative of the OF. + if of_maximize != algo_maximize: + take_negative = True + else: + take_negative = False + # Set up the spotpy object to prepare the calibration spotpy_setup = spot_setup( model_config, bounds_high=bounds_high, bounds_low=bounds_low, - obj_func=evaluation_metric, + obj_func=obj_func, + take_negative=take_negative, + mask=mask, + transform=transform, + epsilon=epsilon, ) - + # Select an optimization algorithm and parameterize it, then run the # optimization process. if algorithm == "DDS": @@ -223,17 +297,21 @@ def perform_calibration( # Get the best parameter set best_parameters = analyser.get_best_parameterset( - results, like_index=1, maximize=maximize + results, like_index=1, maximize=of_maximize ) best_parameters = [best_parameters[0][i] for i in range(0, len(best_parameters[0]))] # Get the best objective function as well depending if maximized or # minimized - if maximize: - bestindex, bestobjf = analyser.get_maxlikeindex(results) + if of_maximize: + _, bestobjf = analyser.get_maxlikeindex(results) else: - bestindex, bestobjf = analyser.get_minlikeindex(results) + _, bestobjf = analyser.get_minlikeindex(results) + # Reconvert objective function if required. + if take_negative: + bestobjf = bestobjf * -1 + # Update the parameter set to put the best parameters in model_config... model_config.update({"parameters": best_parameters}) @@ -241,89 +319,4 @@ def perform_calibration( Qsim = hydrological_model_selector(model_config) # Return the best parameters, Qsim and best objective function value. - return best_parameters, Qsim, bestobjf - - -def transform_flows(Qobs, Qsim, transform=None, epsilon=None): - """Transform flows before computing the objective function. - - It is used to transform flows such that the objective function is computed - on a transformed flow metric rather than on the original units of flow - (ex: inverse, log-transformed, square-root) - - Notes - ----- - This subset of code is taken from the hydroeval package: - https://github.com/ThibHlln/hydroeval/blob/main/hydroeval/hydroeval.py - """ - # Generate a subset of simulation and evaluation series where evaluation - # data is available - Qsim = Qsim[~np.isnan(Qobs[:, 0]), :] - Qobs = Qobs[~np.isnan(Qobs[:, 0]), :] - - # Transform the flow series if required - if transform == "log": # log transformation - if not epsilon: - # determine an epsilon value to avoid zero divide - # (following recommendation in Pushpalatha et al. (2012)) - epsilon = 0.01 * np.mean(Qobs) - Qobs, Qsim = np.log(Qobs + epsilon), np.log(Qsim + epsilon) - - elif transform == "inv": # inverse transformation - if not epsilon: - # determine an epsilon value to avoid zero divide - # (following recommendation in Pushpalatha et al. (2012)) - epsilon = 0.01 * np.mean(Qobs) - Qobs, Qsim = 1.0 / (Qobs + epsilon), 1.0 / (Qsim + epsilon) - - elif transform == "sqrt": # square root transformation - Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) - - # Return the flows after transformation (or original if no transform) - return Qobs, Qsim - - -def get_objective_function_value( - Qobs, - Qsim, - params, - transform=None, - epsilon: Optional[float] = None, - obj_func: Optional[str] = None, -): - """Get the objective function value. - - This code returns the objective function as determined by the user, after - transforming (if required) using the Qobs provided by the user and the Qsim - simulated by the model controlled by spotpy during the calibration process. - """ - # Transform flows if needed - if transform: - Qobs, Qsim = transform_flows(Qobs, Qsim, transform, epsilon) - - # Default objective function if none provided by the user - if not obj_func: - like = rmse(Qobs, Qsim) - - # Popular ones we can use the hydroeval package - elif obj_func.lower() in [ - "nse", - "kge", - "kgeprime", - "kgenp", - "rmse", - "mare", - "pbias", - ]: - # Get the correct function from the package - like = he.evaluator( - getattr(he, obj_func.lower()), simulations=Qsim, evaluation=Qobs - ) - - # In neither of these cases, use the obj_func method from spotpy - else: - # FIXME: What is the type of obj_func? - like = obj_func(Qobs, Qsim) - - # Return results - return like + return best_parameters, Qsim, bestobjf \ No newline at end of file diff --git a/xhydro/hydrological_modelling.py b/xhydro/modelling/hydrological_modelling.py similarity index 100% rename from xhydro/hydrological_modelling.py rename to xhydro/modelling/hydrological_modelling.py diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py new file mode 100644 index 00000000..766028ee --- /dev/null +++ b/xhydro/modelling/obj_funcs.py @@ -0,0 +1,762 @@ +# Created on Tue Dec 12 21:55:25 2023 +# @author: Richard Arsenault +""" Objective function package for xhydro, for calibration and model evaluation + +This package provides a flexible suite of popular objective function metrics in +hydrological modelling and hydrological model calibration. The main function +'get_objective_function' returns the value of the desired objective function +while allowing users to customize many aspects: + + 1- Select the objective function to run; + 2- Allow providing a mask to remove certain elements from the objective + function calculation (e.g. for odd/even year calibration, or calibration + on high or low flows only, or any custom setup). + 3- Apply a transformation on the flows to modify the behaviour of the + objective function calculation (e.g taking the log, inverse or square + root transform of the flows before computing the objective function). + +This function also contains some tools and inputs reserved for the calibration +toolbox, such as the ability to take the negative of the objective function to +maximize instead of minimize a metric according to the needs of the optimizing +algorithm. +""" + +# Import packages +import numpy as np +import sys + +def get_objective_function(Qobs, + Qsim, + obj_func='rmse', + take_negative=False, + mask=None, + transform=None, + epsilon=None, + ): + """ + This function is the entrypoint for objective function calculation. More + can be added by adding the function to this file and adding the option in + this function. + + NOTE: All data corresponding to NaN values in the observation set are + removed from the calculation. If a mask is passed, it must be the + same size as the Qsim and Qobs vectors. If any NaNs are present in + the Qobs dataset, all corresponding data in the Qobs, Qsim and mask + will be removed prior to passing to the processing function. + + Parameters + ---------- + Qobs: numpy array of size n + Vector containing the Observed streamflow to be used in the objective + function calculation. It is the target to attain. + + Qsim: numpy array of size n + Vector containing the Simulated streamflow as generated by the hydro- + logical model. It is modified by changing parameters and resumulating + the hydrological model. + + obj_func : string, optional + String representing the objective function to use in the calibration. + Options must be one of the accepted objective functions: + + - "abs_bias" : Absolute value of the "bias" metric + - "abs_pbias": Absolute value of the "pbias" metric + - "abs_volume_error" : Absolute value of the volume_error metric + - "agreement_index": Index of agreement + - "bias" : Bias metric + - "correlation_coeff": Correlation coefficient + - "kge" : Kling Gupta Efficiency metric (2009 version) + - "kge_mod" : Kling Gupta Efficiency metric (2012 version) + - "mae": Mean Absolute Error metric + - "mare": Mean Absolute Relative Error metric + - "mse" : Mean Square Error metric + - "nse": Nash-Sutcliffe Efficiency metric + - "pbias" : Percent bias (relative bias) + - "r2" : r-squared, i.e. square of correlation_coeff. + - "rmse" : Root Mean Square Error + - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) + - "rsr" : Ratio of RMSE to standard deviation. + - "volume_error": Total volume error over the period. + + The default is 'rmse'. + + take_negative : Boolean used to force the objective function to be + multiplied by minus one (-1) such that it is possible to maximize it + if the optimizer is a minimizer and vice-versa. Should always be set + to False unless required by an optimization setup, which is handled + internally and transparently to the user. + + The default is False. + + mask : numpy array (vector) of size n, optional + array of 0 or 1 on which the objective function should be applied. + Values of 1 indicate that the value is included in the calculation, and + values of 0 indicate that the value is excluded and will have no impact + on the objective function calculation. This can be useful for specific + optimization strategies such as odd/even year calibration, seasonal + calibration or calibration based on high/low flows. + + The default is None, and all data are preserved. + + transform : string indicating the type of transformation required. Can be + one of the following values: + + - "sqrt" : Square root transformation of the flows [sqrt(Q)] + - "log" : Logarithmic transformation of the flows [log(Q)] + - "inv" : Inverse transformation of the flows [1/Q] + + The default value is "None", by which no transformation is performed. + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and + logarithmic transformation. The perturbation is equal to: + + perturbation = epsilon * mean(Qobs) + + The default value is 0.01. + + + NOTE: All data corresponding to NaN values in the observation set are + removed from the calculation + + Returns + ------- + obj_fun_val: scalar float representing the value of the selected objective + function (obj_fun). + """ + + # List of available objective functions + obj_func_list = ["abs_bias", + "abs_pbias", + "abs_volume_error", + "agreement_index", + "bias", + "correlation_coeff", + "kge", + "kge_mod", + "mae", + "mare", + "mse", + "nse", + "pbias", + "r2", + "rmse", + "rrmse", + "rsr", + "volume_error", + ] + + # Basic error checking + if Qobs.shape[0] != Qsim.shape[0]: + sys.exit("Observed and Simulated flows are not of the same size.") + + # Check mask size and contents + if mask is not None: + + # Size + if Qobs.shape[0]!= mask.shape[0]: + sys.exit("Mask is not of the same size as the streamflow data.") + + # All zero or one? + if not np.setdiff1d(np.unique(mask),np.array([0, 1])).size == 0: + sys.exit("Mask contains values other 0 or 1. Please modify.") + + # Check that the objective function is in the list of available methods + if obj_func not in obj_func_list: + sys.exit("Selected objective function is currently unavailable. " + + "Consider contributing to our project at: " + + "github.com/hydrologie/xhydro") + + # Ensure there are no NaNs in the dataset + if mask is not None: + mask = mask[~np.isnan(Qobs)] + Qsim = Qsim[~np.isnan(Qobs)] + Qobs = Qobs[~np.isnan(Qobs)] + + # Apply mask before trasform + if mask is not None: + Qsim=Qsim[mask==1] + Qobs=Qobs[mask==1] + mask=mask[mask==1] + + # Transform data if needed + if transform is not None: + Qsim, Qobs = transform_flows(Qsim, Qobs, transform, epsilon) + + # Compute objective function by switching to the correct algorithm + match obj_func: + case "abs_bias": + obj_fun_val = abs_bias(Qsim, Qobs) + case "abs_pbias": + obj_fun_val = abs_pbias(Qsim, Qobs) + case "abs_volume_error": + obj_fun_val = abs_volume_error(Qsim, Qobs) + case "agreement_index": + obj_fun_val = agreement_index(Qsim, Qobs) + case "bias": + obj_fun_val = bias(Qsim, Qobs) + case "correlation_coeff": + obj_fun_val = correlation_coeff(Qsim, Qobs) + case "kge": + obj_fun_val = kge(Qsim, Qobs) + case "kge_mod": + obj_fun_val = kge_mod(Qsim, Qobs) + case "mae": + obj_fun_val = mae(Qsim, Qobs) + case "mare": + obj_fun_val = mare(Qsim, Qobs) + case "mse": + obj_fun_val = mse(Qsim, Qobs) + case "nse": + obj_fun_val = nse(Qsim, Qobs) + case "pbias": + obj_fun_val = pbias(Qsim, Qobs) + case "r2": + obj_fun_val = r2(Qsim, Qobs) + case "rmse": + obj_fun_val = rmse(Qsim, Qobs) + case "rrmse": + obj_fun_val = rrmse(Qsim, Qobs) + case "rsr": + obj_fun_val = rsr(Qsim, Qobs) + case "volume_error": + obj_fun_val = volume_error(Qsim, Qobs) + + # Take the negative value of the Objective Function to return to the + # optimizer. + if take_negative: + obj_fun_val = obj_fun_val * -1 + + print(obj_fun_val) + + return obj_fun_val + + +def get_objfun_minimize_or_maximize(obj_func): + """ + This function checks the name of the objective function and returns whether + it should be maximized or minimized. Returns a boolean value, where True + means it should be maximized, and Flase means that it should be minimized. + Objective functions other than those programmed here will raise an error. + + Inputs: + obj_func: string containing the label for the desired objective + function. + """ + + # Define metrics that need to be maximized: + if obj_func in ["agreement_index", + "correlation_coeff", + "kge", + "kge_mod", + "nse", + "r2" + ]: + maximize = True + + # Define the metrics that need to be minimized: + elif obj_func in ["abs_bias", + "abs_pbias", + "abs_volume_error", + "mae", + "mare", + "mse", + "rmse", + "rrmse", + "rsr", + ]: + maximize = False + + # Check for the metrics that exist but cannot be used for optimization + elif obj_func in ["bias","pbias","volume_error"]: + sys.exit("The bias, pbias and volume_error metrics cannot be minimized or maximized. \ + Please use the abs_bias, abs_pbias and abs_volume_error instead.") + else: + sys.exit("The objective function is unknown.") + + return maximize + + +def get_optimizer_minimize_or_maximize(algorithm): + """ + This function finds the direction in which the optimizer searches. Some + optimizers try to maximize the objective function value, and others try to + minimize it. Since our objective functions include some that need to be + maximized and others minimized, it is imperative to ensure that the + optimizer/objective-function pair work in tandem. + + Inputs: + algorithm: string containing the direction of the optimizer search + """ + + # Define metrics that need to be maximized: + if algorithm in ["DDS", + ]: + + maximize = True + + # Define the metrics that need to be minimized: + elif algorithm in ["SCEUA", + ]: + + maximize = False + + # Any other optimizer at this date + else: + sys.exit("The optimization algorithm is unknown.") + + return maximize + + +def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): + """ + Transform flows before computing the objective function. + + It is used to transform flows such that the objective function is computed + on a transformed flow metric rather than on the original units of flow + (ex: inverse, log-transformed, square-root) + + Parameters + ---------- + + Qsim : Simulated streamflow vector (numpy array) + + Qobs : Observed streamflow vector (numpy array) + + transform : string indicating the type of transformation required. Can be + one of the following values: + + - "sqrt" : Square root transformation of the flows [sqrt(Q)] + - "log" : Logarithmic transformation of the flows [log(Q)] + - "inv" : Inverse transformation of the flows [1/Q] + + The default value is "None", by which no transformation is performed. + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and + logarithmic transformation. The perturbation is equal to: + + perturbation = epsilon * mean(Qobs) + + The default value is 0.01. + + Returns + ------- + Qsim, Qobs transformed according to the transformation function requested + by the user in "transform". Qsim and Qobs are numpy arrays. + + """ + + # Quick check + if transform is not None: + if transform not in ['log','inv','sqrt']: + sys.exit("Flow transformation method not recognized.") + + # Transform the flow series if required + if transform == "log": # log transformation + epsilon = epsilon * np.nanmean(Qobs) + Qobs, Qsim = np.log(Qobs + epsilon), np.log(Qsim + epsilon) + + elif transform == "inv": # inverse transformation + epsilon = epsilon * np.nanmean(Qobs) + Qobs, Qsim = 1.0 / (Qobs + epsilon), 1.0 / (Qsim + epsilon) + + elif transform == "sqrt": # square root transformation + Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) + + # Return the flows after transformation (or original if no transform) + return Qsim, Qobs + + +""" +BEGIN OBJECTIVE FUNCTIONS DEFINITIONS +""" + +def abs_bias(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + abs_bias: positive scalar float representing the absolute value of the + "bias" metric. This metric is useful when calibrating on the bias, because + bias should aim to be 0 but can take large positive or negative values. + Taking the absolute value of the bias will let the optimizer minimize + the value to zero. + + The abs_bias should be MINIMIZED. + """ + return np.abs(bias(Qsim, Qobs)) + + +def abs_pbias(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + abs_pbias: positive scalar float representing the absolute value of the + "pbias" metric. This metric is useful when calibrating on the pbias, + because pbias should aim to be 0 but can take large positive or negative + values. Taking the absolute value of the pbias will let the optimizer + minimize the value to zero. + + The abs_pbias should be MINIMIZED. + """ + return np.abs(bias(Qsim, Qobs)) + + +def abs_volume_error(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + abs_volume_error: positive scalar float representing the absolute value of + the "volume_error" metric. This metric is useful when calibrating on the + volume_error, because volume_error should aim to be 0 but can take large + positive or negative values. Taking the absolute value of the volume_error + will let the optimizer minimize the value to zero. + + The abs_volume_error should be MINIMIZED. + """ + return np.abs(volume_error(Qsim, Qobs)) + + +def agreement_index(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + agreement_index: scalar float representing the agreement index of Willmott + (1981). Varies between 0 and 1. + + The Agreement index should be MAXIMIZED. + """ + + #Decompose into clearer chunks + a = np.sum((Qobs-Qsim)**2) + b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs-np.mean(Qobs)) + c = np.sum(b**2) + + return (1 - (a/c)) + + +def bias(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + bias: scalar float representing the bias in the simulation. Can be negative + or positive and gives the average error between the observed and simulated + flows. This interpretation uses the definition that a positive bias value + means that the simulation overestimates the true value (as opposed to many + other sources on bias calculations that use the contrary interpretation). + + BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR + CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE. + """ + return np.mean(Qsim - Qobs) + + +def correlation_coeff(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + correlation_coeff: scalar float representing the correlation coefficient. + + The correlation_coeff should be MAXIMIZED. + """ + return np.corrcoef(Qobs, Qsim)[0,1] + + +def kge(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of + 2009. It can take values from -inf to 1 (best case). + + The KGE should be MAXIMIZED. + """ + # This pops up a lot, precalculate. + Qsim_mean = np.mean(Qsim) + Qobs_mean = np.mean(Qobs) + + # Calculate the components of KGE + r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) + r_den = np.sqrt( + np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) + ) + r = r_num / r_den + a = np.std(Qsim) / np.std(Qobs) + b = np.sum(Qsim) / np.sum(Qobs) + + # Calculate the KGE + kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2) + + return kge + + +def kge_mod(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + kge_mod: scalar float representing the modified Kling-Gupta Efficiency + (KGE) metric of 2012. It can take values from -inf to 1 (best case). + + The kge_mod should be MAXIMIZED. + """ + + # These pop up a lot, precalculate + Qsim_mean = np.mean(Qsim) + Qobs_mean = np.mean(Qobs) + + # Calc KGE components + r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) + r_den = np.sqrt( + np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) + ) + r = r_num / r_den + g = ((np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean)) + b = (np.mean(Qsim) / np.mean(Qobs)) + + # Calc the modified KGE metric + kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2) + + return kge_mod + + +def mae(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + mae: scalar float representing the Mean Absolute Error. It can be + interpreted as the average error (absolute) between observations and + simulations for any time step. + + The mae should be MINIMIZED. + """ + return np.mean(np.abs(Qsim-Qobs)) + + +def mare(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + mare: scalar float representing the Mean Absolute Relative Error. For + streamflow, where Qobs is always zero or positive, the MARE is always + positive. + + The mare should be MINIMIZED. + """ + + return (np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs)) + + +def mse(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + mse: scalar float representing the Mean Square Error. It is the sum of + squared errors for each day divided by the total number of days. Units are + thus squared units, and the best possible value is 0. + + The mse should be MINIMIZED. + """ + return np.mean((Qobs-Qsim)**2) + + +def nse(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + nse: scalar float representing the Nash-Sutcliffe Efficiency (NSE) metric. + It can take values from -inf to 1, with 0 being as good as using the mean + observed flow as the estimator. + + The nse should be MAXIMIZED. + """ + + num = np.sum((Qobs - Qsim) ** 2) + den = np.sum((Qobs - np.mean(Qobs)) ** 2) + + return (1 - (num / den)) + + +def pbias(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + pbias: scalar float representing the Percent bias. Can be negative or + positive and gives the average relative error between the observed and + simulated flows. This interpretation uses the definition that a positive + bias value means that the simulation overestimates the true value (as + opposed to many other sources on bias calculations that use the contrary + interpretation). + + PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR + CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE. + """ + + return (np.sum(Qsim - Qobs)/ np.sum(Qobs)) * 100 + + +def r2(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + r2: scalar float representing the r-squared (R2) metric equal to the square + of the correlation coefficient. + + The r2 should be MAXIMIZED. + """ + return correlation_coeff(Qsim, Qobs) ** 2 + + +def rmse(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + rmse: scalar float representing the Root Mean Square Error. Units are the + same as the timeseries data (ex. m3/s). It can take zero or positive + values. + + The rmse should be MINIMIZED. + """ + + return np.sqrt(np.mean((Qobs - Qsim) ** 2)) + + +def rrmse(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + rrmse: scalar float representing the ratio of the RMSE to the mean of the + observations. It allows scaling RMSE values to compare results between + time series of different magnitudes (ex. flows from small and large + watersheds). Also known as the CVRMSE. + + The rrmse should be MINIMIZED. + """ + return rmse(Qsim, Qobs) / np.mean(Qobs) + + +def rsr(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + rsr: scalar float representing the Root Mean Square Error (RMSE) divided by + the standard deviation of the observations. Also known as the "Ratio of the + Root Mean Square Error to the Standard Deviation of Observations". + + The rsr should be MINIMIZED. + """ + return rmse(Qobs, Qsim) / np.std(Qobs) + + +def volume_error(Qsim, Qobs): + """ + Parameters + ---------- + Qsim : Simulated streamflow vector (numpy array) + Qobs : Observed streamflow vector (numpy array) + + Returns + ------- + volume_error: scalar float representing the total error in terms of volume + over the entire period. Expressed in terms of the same units as input data, + so for flow rates it is important to multiply by the duration of the time- + step to obtain actual volumes. + + The volume_error should be MINIMIZED. + """ + return np.sum(Qsim-Qobs) / np.sum(Qobs) + + + +""" +ADD OBJECTIVE FUNCTIONS HERE +""" + + + + + \ No newline at end of file From 19efaf46726d6b7e2c74364a8f74b390609f3ee7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Dec 2023 02:02:08 +0000 Subject: [PATCH 11/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_calibration.py | 20 +- xhydro/modelling/calibration.py | 68 ++--- xhydro/modelling/obj_funcs.py | 448 ++++++++++++++++---------------- 3 files changed, 269 insertions(+), 267 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index eacbbb46..f77596b8 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -21,9 +21,9 @@ def test_spotpy_calibration(): "drainage_area": np.array([10]), "model_name": "Dummy", } - + mask = np.array([0, 0, 0, 0, 1, 1]) - + best_parameters, best_simulation, best_objfun = perform_calibration( model_config, "mae", @@ -38,16 +38,16 @@ def test_spotpy_calibration(): assert len(best_parameters) == len(bounds_high) # Test that the objective function is calculated correctly - objfun = get_objective_function( - model_config["Qobs"], - best_simulation, - obj_func="mae", - mask=mask, - ) - + objfun = get_objective_function( + model_config["Qobs"], + best_simulation, + obj_func="mae", + mask=mask, + ) + assert objfun == best_objfun # Test dummy model response model_config["parameters"] = [5, 5, 5] Qsim = dummy_model(model_config) - assert Qsim[3] == 3500.00 \ No newline at end of file + assert Qsim[3] == 3500.00 diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 5c6feb9e..50fd9a2a 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -65,26 +65,27 @@ from spotpy.parameter import Uniform from xhydro.modelling.hydrological_modelling import hydrological_model_selector -from xhydro.modelling.obj_funcs import (get_objective_function, - get_objfun_minimize_or_maximize, - get_optimizer_minimize_or_maximize, - ) +from xhydro.modelling.obj_funcs import ( + get_objective_function, + get_objfun_minimize_or_maximize, + get_optimizer_minimize_or_maximize, +) class spot_setup: """Create the spotpy calibration system that is used for hydrological model calibration.""" - def __init__(self, - model_config, - bounds_high, - bounds_low, - obj_func=None, - take_negative=False, - mask=None, - transform=None, - epsilon=None, - ): - + def __init__( + self, + model_config, + bounds_high, + bounds_low, + obj_func=None, + take_negative=False, + mask=None, + transform=None, + epsilon=None, + ): """Initialize the spot_setup object. The initialization of the spot_setup object includes a generic @@ -115,7 +116,7 @@ def __init__(self, - "rsr" : Ratio of RMSE to standard deviation. - "volume_error": Total volume error over the period. """ - + # Gather the model_config dictionary and obj_func string, and other # optional arguments. self.model_config = model_config @@ -176,16 +177,17 @@ def objectivefunction( - simulation, observation : vectors of streamflow used to compute the objective function """ - - obj_fun_val = get_objective_function(evaluation, - simulation, - obj_func=self.obj_func, - take_negative=self.take_negative, - mask=self.mask, - transform=self.transform, - epsilon=self.epsilon, - ) - + + obj_fun_val = get_objective_function( + evaluation, + simulation, + obj_func=self.obj_func, + take_negative=self.take_negative, + mask=self.mask, + transform=self.transform, + epsilon=self.epsilon, + ) + return obj_fun_val @@ -215,7 +217,7 @@ def perform_calibration( It will be up to the user to provide the data that the model requires. obj_func : str The objective function used for calibrating. Can be any one of these: - + - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric @@ -231,7 +233,7 @@ def perform_calibration( - "rmse" : Root Mean Square Error - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - + bounds_high : np.array High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from within these bounds. The size must be equal to the number of parameters to calibrate. @@ -259,13 +261,13 @@ def perform_calibration( # for the objective function and the working direction of the algorithm. of_maximize = get_objfun_minimize_or_maximize(obj_func) algo_maximize = get_optimizer_minimize_or_maximize(algorithm) - + # They are not working in the same direction. Take the negative of the OF. if of_maximize != algo_maximize: take_negative = True else: take_negative = False - + # Set up the spotpy object to prepare the calibration spotpy_setup = spot_setup( model_config, @@ -277,7 +279,7 @@ def perform_calibration( transform=transform, epsilon=epsilon, ) - + # Select an optimization algorithm and parameterize it, then run the # optimization process. if algorithm == "DDS": @@ -311,7 +313,7 @@ def perform_calibration( # Reconvert objective function if required. if take_negative: bestobjf = bestobjf * -1 - + # Update the parameter set to put the best parameters in model_config... model_config.update({"parameters": best_parameters}) @@ -319,4 +321,4 @@ def perform_calibration( Qsim = hydrological_model_selector(model_config) # Return the best parameters, Qsim and best objective function value. - return best_parameters, Qsim, bestobjf \ No newline at end of file + return best_parameters, Qsim, bestobjf diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 766028ee..6b469b20 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -6,59 +6,62 @@ hydrological modelling and hydrological model calibration. The main function 'get_objective_function' returns the value of the desired objective function while allowing users to customize many aspects: - + 1- Select the objective function to run; 2- Allow providing a mask to remove certain elements from the objective function calculation (e.g. for odd/even year calibration, or calibration on high or low flows only, or any custom setup). - 3- Apply a transformation on the flows to modify the behaviour of the + 3- Apply a transformation on the flows to modify the behaviour of the objective function calculation (e.g taking the log, inverse or square root transform of the flows before computing the objective function). - + This function also contains some tools and inputs reserved for the calibration toolbox, such as the ability to take the negative of the objective function to maximize instead of minimize a metric according to the needs of the optimizing algorithm. """ +import sys + # Import packages import numpy as np -import sys -def get_objective_function(Qobs, - Qsim, - obj_func='rmse', - take_negative=False, - mask=None, - transform=None, - epsilon=None, - ): + +def get_objective_function( + Qobs, + Qsim, + obj_func="rmse", + take_negative=False, + mask=None, + transform=None, + epsilon=None, +): """ This function is the entrypoint for objective function calculation. More - can be added by adding the function to this file and adding the option in - this function. - + can be added by adding the function to this file and adding the option in + this function. + NOTE: All data corresponding to NaN values in the observation set are removed from the calculation. If a mask is passed, it must be the - same size as the Qsim and Qobs vectors. If any NaNs are present in - the Qobs dataset, all corresponding data in the Qobs, Qsim and mask + same size as the Qsim and Qobs vectors. If any NaNs are present in + the Qobs dataset, all corresponding data in the Qobs, Qsim and mask will be removed prior to passing to the processing function. - + Parameters ---------- Qobs: numpy array of size n Vector containing the Observed streamflow to be used in the objective function calculation. It is the target to attain. - + Qsim: numpy array of size n Vector containing the Simulated streamflow as generated by the hydro- - logical model. It is modified by changing parameters and resumulating + logical model. It is modified by changing parameters and resumulating the hydrological model. - + obj_func : string, optional - String representing the objective function to use in the calibration. + String representing the objective function to use in the calibration. Options must be one of the accepted objective functions: - + - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric @@ -77,45 +80,45 @@ def get_objective_function(Qobs, - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - "volume_error": Total volume error over the period. - + The default is 'rmse'. - + take_negative : Boolean used to force the objective function to be multiplied by minus one (-1) such that it is possible to maximize it - if the optimizer is a minimizer and vice-versa. Should always be set + if the optimizer is a minimizer and vice-versa. Should always be set to False unless required by an optimization setup, which is handled internally and transparently to the user. - + The default is False. - + mask : numpy array (vector) of size n, optional - array of 0 or 1 on which the objective function should be applied. + array of 0 or 1 on which the objective function should be applied. Values of 1 indicate that the value is included in the calculation, and values of 0 indicate that the value is excluded and will have no impact on the objective function calculation. This can be useful for specific - optimization strategies such as odd/even year calibration, seasonal - calibration or calibration based on high/low flows. - + optimization strategies such as odd/even year calibration, seasonal + calibration or calibration based on high/low flows. + The default is None, and all data are preserved. - - transform : string indicating the type of transformation required. Can be + + transform : string indicating the type of transformation required. Can be one of the following values: - + - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] - + The default value is "None", by which no transformation is performed. - - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: - + perturbation = epsilon * mean(Qobs) - + The default value is 0.01. - - + + NOTE: All data corresponding to NaN values in the observation set are removed from the calculation @@ -124,65 +127,67 @@ def get_objective_function(Qobs, obj_fun_val: scalar float representing the value of the selected objective function (obj_fun). """ - + # List of available objective functions - obj_func_list = ["abs_bias", - "abs_pbias", - "abs_volume_error", - "agreement_index", - "bias", - "correlation_coeff", - "kge", - "kge_mod", - "mae", - "mare", - "mse", - "nse", - "pbias", - "r2", - "rmse", - "rrmse", - "rsr", - "volume_error", - ] - + obj_func_list = [ + "abs_bias", + "abs_pbias", + "abs_volume_error", + "agreement_index", + "bias", + "correlation_coeff", + "kge", + "kge_mod", + "mae", + "mare", + "mse", + "nse", + "pbias", + "r2", + "rmse", + "rrmse", + "rsr", + "volume_error", + ] + # Basic error checking if Qobs.shape[0] != Qsim.shape[0]: sys.exit("Observed and Simulated flows are not of the same size.") - + # Check mask size and contents if mask is not None: - # Size - if Qobs.shape[0]!= mask.shape[0]: + if Qobs.shape[0] != mask.shape[0]: sys.exit("Mask is not of the same size as the streamflow data.") - + # All zero or one? - if not np.setdiff1d(np.unique(mask),np.array([0, 1])).size == 0: + if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0: sys.exit("Mask contains values other 0 or 1. Please modify.") # Check that the objective function is in the list of available methods if obj_func not in obj_func_list: - sys.exit("Selected objective function is currently unavailable. " + - "Consider contributing to our project at: " + - "github.com/hydrologie/xhydro") - + sys.exit( + "Selected objective function is currently unavailable. " + + "Consider contributing to our project at: " + + "github.com/hydrologie/xhydro" + ) + # Ensure there are no NaNs in the dataset if mask is not None: mask = mask[~np.isnan(Qobs)] Qsim = Qsim[~np.isnan(Qobs)] Qobs = Qobs[~np.isnan(Qobs)] - + # Apply mask before trasform if mask is not None: - Qsim=Qsim[mask==1] - Qobs=Qobs[mask==1] - mask=mask[mask==1] - + Qsim = Qsim[mask == 1] + Qobs = Qobs[mask == 1] + mask = mask[mask == 1] + # Transform data if needed if transform is not None: Qsim, Qobs = transform_flows(Qsim, Qobs, transform, epsilon) - + # Compute objective function by switching to the correct algorithm match obj_func: case "abs_bias": @@ -221,90 +226,94 @@ def get_objective_function(Qobs, obj_fun_val = rsr(Qsim, Qobs) case "volume_error": obj_fun_val = volume_error(Qsim, Qobs) - - # Take the negative value of the Objective Function to return to the + + # Take the negative value of the Objective Function to return to the # optimizer. if take_negative: obj_fun_val = obj_fun_val * -1 - + print(obj_fun_val) - + return obj_fun_val def get_objfun_minimize_or_maximize(obj_func): """ This function checks the name of the objective function and returns whether - it should be maximized or minimized. Returns a boolean value, where True + it should be maximized or minimized. Returns a boolean value, where True means it should be maximized, and Flase means that it should be minimized. Objective functions other than those programmed here will raise an error. - + Inputs: - obj_func: string containing the label for the desired objective + obj_func: string containing the label for the desired objective function. """ - + # Define metrics that need to be maximized: - if obj_func in ["agreement_index", - "correlation_coeff", - "kge", - "kge_mod", - "nse", - "r2" - ]: + if obj_func in [ + "agreement_index", + "correlation_coeff", + "kge", + "kge_mod", + "nse", + "r2", + ]: maximize = True - + # Define the metrics that need to be minimized: - elif obj_func in ["abs_bias", - "abs_pbias", - "abs_volume_error", - "mae", - "mare", - "mse", - "rmse", - "rrmse", - "rsr", - ]: + elif obj_func in [ + "abs_bias", + "abs_pbias", + "abs_volume_error", + "mae", + "mare", + "mse", + "rmse", + "rrmse", + "rsr", + ]: maximize = False - + # Check for the metrics that exist but cannot be used for optimization - elif obj_func in ["bias","pbias","volume_error"]: - sys.exit("The bias, pbias and volume_error metrics cannot be minimized or maximized. \ - Please use the abs_bias, abs_pbias and abs_volume_error instead.") + elif obj_func in ["bias", "pbias", "volume_error"]: + sys.exit( + "The bias, pbias and volume_error metrics cannot be minimized or maximized. \ + Please use the abs_bias, abs_pbias and abs_volume_error instead." + ) else: sys.exit("The objective function is unknown.") - + return maximize - - + + def get_optimizer_minimize_or_maximize(algorithm): """ This function finds the direction in which the optimizer searches. Some optimizers try to maximize the objective function value, and others try to minimize it. Since our objective functions include some that need to be - maximized and others minimized, it is imperative to ensure that the + maximized and others minimized, it is imperative to ensure that the optimizer/objective-function pair work in tandem. - + Inputs: algorithm: string containing the direction of the optimizer search """ - + # Define metrics that need to be maximized: - if algorithm in ["DDS", - ]: - + if algorithm in [ + "DDS", + ]: maximize = True - + # Define the metrics that need to be minimized: - elif algorithm in ["SCEUA", - ]: - + elif algorithm in [ + "SCEUA", + ]: maximize = False - + # Any other optimizer at this date else: sys.exit("The optimization algorithm is unknown.") - + return maximize @@ -323,35 +332,35 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): Qobs : Observed streamflow vector (numpy array) - transform : string indicating the type of transformation required. Can be + transform : string indicating the type of transformation required. Can be one of the following values: - + - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] - + The default value is "None", by which no transformation is performed. - - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: - + perturbation = epsilon * mean(Qobs) - + The default value is 0.01. - + Returns ------- Qsim, Qobs transformed according to the transformation function requested by the user in "transform". Qsim and Qobs are numpy arrays. - + """ # Quick check if transform is not None: - if transform not in ['log','inv','sqrt']: + if transform not in ["log", "inv", "sqrt"]: sys.exit("Flow transformation method not recognized.") - + # Transform the flow series if required if transform == "log": # log transformation epsilon = epsilon * np.nanmean(Qobs) @@ -372,6 +381,7 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): BEGIN OBJECTIVE FUNCTIONS DEFINITIONS """ + def abs_bias(Qsim, Qobs): """ Parameters @@ -381,12 +391,12 @@ def abs_bias(Qsim, Qobs): Returns ------- - abs_bias: positive scalar float representing the absolute value of the - "bias" metric. This metric is useful when calibrating on the bias, because - bias should aim to be 0 but can take large positive or negative values. - Taking the absolute value of the bias will let the optimizer minimize + abs_bias: positive scalar float representing the absolute value of the + "bias" metric. This metric is useful when calibrating on the bias, because + bias should aim to be 0 but can take large positive or negative values. + Taking the absolute value of the bias will let the optimizer minimize the value to zero. - + The abs_bias should be MINIMIZED. """ return np.abs(bias(Qsim, Qobs)) @@ -401,12 +411,12 @@ def abs_pbias(Qsim, Qobs): Returns ------- - abs_pbias: positive scalar float representing the absolute value of the - "pbias" metric. This metric is useful when calibrating on the pbias, - because pbias should aim to be 0 but can take large positive or negative - values. Taking the absolute value of the pbias will let the optimizer + abs_pbias: positive scalar float representing the absolute value of the + "pbias" metric. This metric is useful when calibrating on the pbias, + because pbias should aim to be 0 but can take large positive or negative + values. Taking the absolute value of the pbias will let the optimizer minimize the value to zero. - + The abs_pbias should be MINIMIZED. """ return np.abs(bias(Qsim, Qobs)) @@ -421,12 +431,12 @@ def abs_volume_error(Qsim, Qobs): Returns ------- - abs_volume_error: positive scalar float representing the absolute value of - the "volume_error" metric. This metric is useful when calibrating on the - volume_error, because volume_error should aim to be 0 but can take large + abs_volume_error: positive scalar float representing the absolute value of + the "volume_error" metric. This metric is useful when calibrating on the + volume_error, because volume_error should aim to be 0 but can take large positive or negative values. Taking the absolute value of the volume_error will let the optimizer minimize the value to zero. - + The abs_volume_error should be MINIMIZED. """ return np.abs(volume_error(Qsim, Qobs)) @@ -441,18 +451,18 @@ def agreement_index(Qsim, Qobs): Returns ------- - agreement_index: scalar float representing the agreement index of Willmott + agreement_index: scalar float representing the agreement index of Willmott (1981). Varies between 0 and 1. - + The Agreement index should be MAXIMIZED. """ - - #Decompose into clearer chunks - a = np.sum((Qobs-Qsim)**2) - b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs-np.mean(Qobs)) + + # Decompose into clearer chunks + a = np.sum((Qobs - Qsim) ** 2) + b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs - np.mean(Qobs)) c = np.sum(b**2) - - return (1 - (a/c)) + + return 1 - (a / c) def bias(Qsim, Qobs): @@ -469,12 +479,12 @@ def bias(Qsim, Qobs): flows. This interpretation uses the definition that a positive bias value means that the simulation overestimates the true value (as opposed to many other sources on bias calculations that use the contrary interpretation). - + BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE. """ return np.mean(Qsim - Qobs) - + def correlation_coeff(Qsim, Qobs): """ @@ -485,12 +495,12 @@ def correlation_coeff(Qsim, Qobs): Returns ------- - correlation_coeff: scalar float representing the correlation coefficient. - + correlation_coeff: scalar float representing the correlation coefficient. + The correlation_coeff should be MAXIMIZED. """ - return np.corrcoef(Qobs, Qsim)[0,1] - + return np.corrcoef(Qobs, Qsim)[0, 1] + def kge(Qsim, Qobs): """ @@ -501,9 +511,9 @@ def kge(Qsim, Qobs): Returns ------- - kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of + kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of 2009. It can take values from -inf to 1 (best case). - + The KGE should be MAXIMIZED. """ # This pops up a lot, precalculate. @@ -512,13 +522,11 @@ def kge(Qsim, Qobs): # Calculate the components of KGE r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt( - np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) - ) + r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) r = r_num / r_den a = np.std(Qsim) / np.std(Qobs) b = np.sum(Qsim) / np.sum(Qobs) - + # Calculate the KGE kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2) @@ -534,25 +542,23 @@ def kge_mod(Qsim, Qobs): Returns ------- - kge_mod: scalar float representing the modified Kling-Gupta Efficiency + kge_mod: scalar float representing the modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take values from -inf to 1 (best case). - + The kge_mod should be MAXIMIZED. """ - + # These pop up a lot, precalculate Qsim_mean = np.mean(Qsim) Qobs_mean = np.mean(Qobs) - + # Calc KGE components r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt( - np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) - ) + r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) r = r_num / r_den - g = ((np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean)) - b = (np.mean(Qsim) / np.mean(Qobs)) - + g = (np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean) + b = np.mean(Qsim) / np.mean(Qobs) + # Calc the modified KGE metric kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2) @@ -569,14 +575,14 @@ def mae(Qsim, Qobs): Returns ------- mae: scalar float representing the Mean Absolute Error. It can be - interpreted as the average error (absolute) between observations and + interpreted as the average error (absolute) between observations and simulations for any time step. - + The mae should be MINIMIZED. """ - return np.mean(np.abs(Qsim-Qobs)) + return np.mean(np.abs(Qsim - Qobs)) + - def mare(Qsim, Qobs): """ Parameters @@ -586,14 +592,14 @@ def mare(Qsim, Qobs): Returns ------- - mare: scalar float representing the Mean Absolute Relative Error. For + mare: scalar float representing the Mean Absolute Relative Error. For streamflow, where Qobs is always zero or positive, the MARE is always positive. - + The mare should be MINIMIZED. """ - - return (np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs)) + + return np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs) def mse(Qsim, Qobs): @@ -608,12 +614,12 @@ def mse(Qsim, Qobs): mse: scalar float representing the Mean Square Error. It is the sum of squared errors for each day divided by the total number of days. Units are thus squared units, and the best possible value is 0. - + The mse should be MINIMIZED. """ - return np.mean((Qobs-Qsim)**2) - - + return np.mean((Qobs - Qsim) ** 2) + + def nse(Qsim, Qobs): """ Parameters @@ -625,15 +631,15 @@ def nse(Qsim, Qobs): ------- nse: scalar float representing the Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to 1, with 0 being as good as using the mean - observed flow as the estimator. - + observed flow as the estimator. + The nse should be MAXIMIZED. """ - + num = np.sum((Qobs - Qsim) ** 2) den = np.sum((Qobs - np.mean(Qobs)) ** 2) - - return (1 - (num / den)) + + return 1 - (num / den) def pbias(Qsim, Qobs): @@ -645,18 +651,18 @@ def pbias(Qsim, Qobs): Returns ------- - pbias: scalar float representing the Percent bias. Can be negative or - positive and gives the average relative error between the observed and - simulated flows. This interpretation uses the definition that a positive - bias value means that the simulation overestimates the true value (as - opposed to many other sources on bias calculations that use the contrary + pbias: scalar float representing the Percent bias. Can be negative or + positive and gives the average relative error between the observed and + simulated flows. This interpretation uses the definition that a positive + bias value means that the simulation overestimates the true value (as + opposed to many other sources on bias calculations that use the contrary interpretation). - + PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE. """ - - return (np.sum(Qsim - Qobs)/ np.sum(Qobs)) * 100 + + return (np.sum(Qsim - Qobs) / np.sum(Qobs)) * 100 def r2(Qsim, Qobs): @@ -669,12 +675,12 @@ def r2(Qsim, Qobs): Returns ------- r2: scalar float representing the r-squared (R2) metric equal to the square - of the correlation coefficient. - + of the correlation coefficient. + The r2 should be MAXIMIZED. """ return correlation_coeff(Qsim, Qobs) ** 2 - + def rmse(Qsim, Qobs): """ @@ -685,10 +691,10 @@ def rmse(Qsim, Qobs): Returns ------- - rmse: scalar float representing the Root Mean Square Error. Units are the - same as the timeseries data (ex. m3/s). It can take zero or positive + rmse: scalar float representing the Root Mean Square Error. Units are the + same as the timeseries data (ex. m3/s). It can take zero or positive values. - + The rmse should be MINIMIZED. """ @@ -704,11 +710,11 @@ def rrmse(Qsim, Qobs): Returns ------- - rrmse: scalar float representing the ratio of the RMSE to the mean of the - observations. It allows scaling RMSE values to compare results between + rrmse: scalar float representing the ratio of the RMSE to the mean of the + observations. It allows scaling RMSE values to compare results between time series of different magnitudes (ex. flows from small and large watersheds). Also known as the CVRMSE. - + The rrmse should be MINIMIZED. """ return rmse(Qsim, Qobs) / np.mean(Qobs) @@ -724,9 +730,9 @@ def rsr(Qsim, Qobs): Returns ------- rsr: scalar float representing the Root Mean Square Error (RMSE) divided by - the standard deviation of the observations. Also known as the "Ratio of the + the standard deviation of the observations. Also known as the "Ratio of the Root Mean Square Error to the Standard Deviation of Observations". - + The rsr should be MINIMIZED. """ return rmse(Qobs, Qsim) / np.std(Qobs) @@ -745,18 +751,12 @@ def volume_error(Qsim, Qobs): over the entire period. Expressed in terms of the same units as input data, so for flow rates it is important to multiply by the duration of the time- step to obtain actual volumes. - + The volume_error should be MINIMIZED. """ - return np.sum(Qsim-Qobs) / np.sum(Qobs) - + return np.sum(Qsim - Qobs) / np.sum(Qobs) """ ADD OBJECTIVE FUNCTIONS HERE """ - - - - - \ No newline at end of file From dd1693c94258a52f70cc40eb00fc4a5465fde409 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 21 Dec 2023 23:31:04 -0500 Subject: [PATCH 12/29] fixing linting/ruff, small adjustments --- environment-dev.yml | 1 + environment.yml | 1 + pyproject.toml | 1 + tests/test_calibration.py | 20 +- xhydro/modelling/calibration.py | 68 +++-- xhydro/modelling/obj_funcs.py | 511 +++++++++++++++++--------------- 6 files changed, 314 insertions(+), 288 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 17b32528..5bcb2509 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -6,6 +6,7 @@ dependencies: # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - numpy + - pydantic >=2.0, <2.5.3 - spotpy - statsmodels - xarray diff --git a/environment.yml b/environment.yml index fe309fe6..4ec98f41 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ dependencies: # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - numpy + - pydantic >=2.0, <2.5.3 - spotpy - statsmodels - xarray diff --git a/pyproject.toml b/pyproject.toml index ce1b3ef0..b3536f52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dynamic = ["description", "version"] dependencies = [ # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! "numpy", + "pydantic >=2.0, <2.5.3", "spotpy", "statsmodels", "xarray", diff --git a/tests/test_calibration.py b/tests/test_calibration.py index eacbbb46..f77596b8 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -21,9 +21,9 @@ def test_spotpy_calibration(): "drainage_area": np.array([10]), "model_name": "Dummy", } - + mask = np.array([0, 0, 0, 0, 1, 1]) - + best_parameters, best_simulation, best_objfun = perform_calibration( model_config, "mae", @@ -38,16 +38,16 @@ def test_spotpy_calibration(): assert len(best_parameters) == len(bounds_high) # Test that the objective function is calculated correctly - objfun = get_objective_function( - model_config["Qobs"], - best_simulation, - obj_func="mae", - mask=mask, - ) - + objfun = get_objective_function( + model_config["Qobs"], + best_simulation, + obj_func="mae", + mask=mask, + ) + assert objfun == best_objfun # Test dummy model response model_config["parameters"] = [5, 5, 5] Qsim = dummy_model(model_config) - assert Qsim[3] == 3500.00 \ No newline at end of file + assert Qsim[3] == 3500.00 diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 5c6feb9e..40bfd6bd 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -56,7 +56,6 @@ Any comments are welcome! """ -from typing import Optional # Import packages import numpy as np @@ -65,26 +64,27 @@ from spotpy.parameter import Uniform from xhydro.modelling.hydrological_modelling import hydrological_model_selector -from xhydro.modelling.obj_funcs import (get_objective_function, - get_objfun_minimize_or_maximize, - get_optimizer_minimize_or_maximize, - ) +from xhydro.modelling.obj_funcs import ( + get_objective_function, + get_objfun_minimize_or_maximize, + get_optimizer_minimize_or_maximize, +) class spot_setup: """Create the spotpy calibration system that is used for hydrological model calibration.""" - def __init__(self, - model_config, - bounds_high, - bounds_low, - obj_func=None, - take_negative=False, - mask=None, - transform=None, - epsilon=None, - ): - + def __init__( + self, + model_config, + bounds_high, + bounds_low, + obj_func=None, + take_negative=False, + mask=None, + transform=None, + epsilon=None, + ): """Initialize the spot_setup object. The initialization of the spot_setup object includes a generic @@ -115,7 +115,6 @@ def __init__(self, - "rsr" : Ratio of RMSE to standard deviation. - "volume_error": Total volume error over the period. """ - # Gather the model_config dictionary and obj_func string, and other # optional arguments. self.model_config = model_config @@ -176,16 +175,16 @@ def objectivefunction( - simulation, observation : vectors of streamflow used to compute the objective function """ - - obj_fun_val = get_objective_function(evaluation, - simulation, - obj_func=self.obj_func, - take_negative=self.take_negative, - mask=self.mask, - transform=self.transform, - epsilon=self.epsilon, - ) - + obj_fun_val = get_objective_function( + evaluation, + simulation, + obj_func=self.obj_func, + take_negative=self.take_negative, + mask=self.mask, + transform=self.transform, + epsilon=self.epsilon, + ) + return obj_fun_val @@ -215,7 +214,7 @@ def perform_calibration( It will be up to the user to provide the data that the model requires. obj_func : str The objective function used for calibrating. Can be any one of these: - + - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric @@ -231,7 +230,7 @@ def perform_calibration( - "rmse" : Root Mean Square Error - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - + bounds_high : np.array High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from within these bounds. The size must be equal to the number of parameters to calibrate. @@ -252,20 +251,19 @@ def perform_calibration( caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow times this value of epsilon. """ - # Get objective function and algo optimal convregence direction. Necessary # to ensure that the algorithm is optimizing in the correct direction # (maximizing or minimizing). This code determines the required direction # for the objective function and the working direction of the algorithm. of_maximize = get_objfun_minimize_or_maximize(obj_func) algo_maximize = get_optimizer_minimize_or_maximize(algorithm) - + # They are not working in the same direction. Take the negative of the OF. if of_maximize != algo_maximize: take_negative = True else: take_negative = False - + # Set up the spotpy object to prepare the calibration spotpy_setup = spot_setup( model_config, @@ -277,7 +275,7 @@ def perform_calibration( transform=transform, epsilon=epsilon, ) - + # Select an optimization algorithm and parameterize it, then run the # optimization process. if algorithm == "DDS": @@ -311,7 +309,7 @@ def perform_calibration( # Reconvert objective function if required. if take_negative: bestobjf = bestobjf * -1 - + # Update the parameter set to put the best parameters in model_config... model_config.update({"parameters": best_parameters}) @@ -319,4 +317,4 @@ def perform_calibration( Qsim = hydrological_model_selector(model_config) # Return the best parameters, Qsim and best objective function value. - return best_parameters, Qsim, bestobjf \ No newline at end of file + return best_parameters, Qsim, bestobjf diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 766028ee..a7ad23b3 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -1,64 +1,67 @@ # Created on Tue Dec 12 21:55:25 2023 # @author: Richard Arsenault -""" Objective function package for xhydro, for calibration and model evaluation +"""Objective function package for xhydro, for calibration and model evaluation This package provides a flexible suite of popular objective function metrics in hydrological modelling and hydrological model calibration. The main function 'get_objective_function' returns the value of the desired objective function while allowing users to customize many aspects: - - 1- Select the objective function to run; - 2- Allow providing a mask to remove certain elements from the objective - function calculation (e.g. for odd/even year calibration, or calibration - on high or low flows only, or any custom setup). - 3- Apply a transformation on the flows to modify the behaviour of the - objective function calculation (e.g taking the log, inverse or square - root transform of the flows before computing the objective function). - + + 1- Select the objective function to run; + 2- Allow providing a mask to remove certain elements from the objective + function calculation (e.g. for odd/even year calibration, or calibration + on high or low flows only, or any custom setup). + 3- Apply a transformation on the flows to modify the behaviour of the + objective function calculation (e.g taking the log, inverse or square + root transform of the flows before computing the objective function). + This function also contains some tools and inputs reserved for the calibration toolbox, such as the ability to take the negative of the objective function to maximize instead of minimize a metric according to the needs of the optimizing algorithm. """ +import sys + # Import packages import numpy as np -import sys -def get_objective_function(Qobs, - Qsim, - obj_func='rmse', - take_negative=False, - mask=None, - transform=None, - epsilon=None, - ): - """ - This function is the entrypoint for objective function calculation. More - can be added by adding the function to this file and adding the option in - this function. - + +def get_objective_function( + Qobs, + Qsim, + obj_func="rmse", + take_negative=False, + mask=None, + transform=None, + epsilon=None, +): + """ + Entrypoint function for the objective function calculation. More can be + added by adding the function to this file and adding the option in this + function. + NOTE: All data corresponding to NaN values in the observation set are removed from the calculation. If a mask is passed, it must be the - same size as the Qsim and Qobs vectors. If any NaNs are present in - the Qobs dataset, all corresponding data in the Qobs, Qsim and mask + same size as the Qsim and Qobs vectors. If any NaNs are present in + the Qobs dataset, all corresponding data in the Qobs, Qsim and mask will be removed prior to passing to the processing function. - + Parameters ---------- Qobs: numpy array of size n Vector containing the Observed streamflow to be used in the objective function calculation. It is the target to attain. - + Qsim: numpy array of size n Vector containing the Simulated streamflow as generated by the hydro- - logical model. It is modified by changing parameters and resumulating + logical model. It is modified by changing parameters and resumulating the hydrological model. - + obj_func : string, optional - String representing the objective function to use in the calibration. + String representing the objective function to use in the calibration. Options must be one of the accepted objective functions: - + - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric @@ -77,45 +80,45 @@ def get_objective_function(Qobs, - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - "volume_error": Total volume error over the period. - + The default is 'rmse'. - + take_negative : Boolean used to force the objective function to be multiplied by minus one (-1) such that it is possible to maximize it - if the optimizer is a minimizer and vice-versa. Should always be set + if the optimizer is a minimizer and vice-versa. Should always be set to False unless required by an optimization setup, which is handled internally and transparently to the user. - + The default is False. - + mask : numpy array (vector) of size n, optional - array of 0 or 1 on which the objective function should be applied. + array of 0 or 1 on which the objective function should be applied. Values of 1 indicate that the value is included in the calculation, and values of 0 indicate that the value is excluded and will have no impact on the objective function calculation. This can be useful for specific - optimization strategies such as odd/even year calibration, seasonal - calibration or calibration based on high/low flows. - + optimization strategies such as odd/even year calibration, seasonal + calibration or calibration based on high/low flows. + The default is None, and all data are preserved. - - transform : string indicating the type of transformation required. Can be + + transform : string indicating the type of transformation required. Can be one of the following values: - + - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] - + The default value is "None", by which no transformation is performed. - - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: - + perturbation = epsilon * mean(Qobs) - + The default value is 0.01. - - + + NOTE: All data corresponding to NaN values in the observation set are removed from the calculation @@ -124,65 +127,66 @@ def get_objective_function(Qobs, obj_fun_val: scalar float representing the value of the selected objective function (obj_fun). """ - # List of available objective functions - obj_func_list = ["abs_bias", - "abs_pbias", - "abs_volume_error", - "agreement_index", - "bias", - "correlation_coeff", - "kge", - "kge_mod", - "mae", - "mare", - "mse", - "nse", - "pbias", - "r2", - "rmse", - "rrmse", - "rsr", - "volume_error", - ] - + obj_func_list = [ + "abs_bias", + "abs_pbias", + "abs_volume_error", + "agreement_index", + "bias", + "correlation_coeff", + "kge", + "kge_mod", + "mae", + "mare", + "mse", + "nse", + "pbias", + "r2", + "rmse", + "rrmse", + "rsr", + "volume_error", + ] + # Basic error checking if Qobs.shape[0] != Qsim.shape[0]: sys.exit("Observed and Simulated flows are not of the same size.") - + # Check mask size and contents if mask is not None: - # Size - if Qobs.shape[0]!= mask.shape[0]: + if Qobs.shape[0] != mask.shape[0]: sys.exit("Mask is not of the same size as the streamflow data.") - + # All zero or one? - if not np.setdiff1d(np.unique(mask),np.array([0, 1])).size == 0: + if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0: sys.exit("Mask contains values other 0 or 1. Please modify.") # Check that the objective function is in the list of available methods if obj_func not in obj_func_list: - sys.exit("Selected objective function is currently unavailable. " + - "Consider contributing to our project at: " + - "github.com/hydrologie/xhydro") - + sys.exit( + "Selected objective function is currently unavailable. " + + "Consider contributing to our project at: " + + "github.com/hydrologie/xhydro" + ) + # Ensure there are no NaNs in the dataset if mask is not None: mask = mask[~np.isnan(Qobs)] Qsim = Qsim[~np.isnan(Qobs)] Qobs = Qobs[~np.isnan(Qobs)] - + # Apply mask before trasform if mask is not None: - Qsim=Qsim[mask==1] - Qobs=Qobs[mask==1] - mask=mask[mask==1] - + Qsim = Qsim[mask == 1] + Qobs = Qobs[mask == 1] + mask = mask[mask == 1] + # Transform data if needed if transform is not None: Qsim, Qobs = transform_flows(Qsim, Qobs, transform, epsilon) - + # Compute objective function by switching to the correct algorithm match obj_func: case "abs_bias": @@ -221,90 +225,92 @@ def get_objective_function(Qobs, obj_fun_val = rsr(Qsim, Qobs) case "volume_error": obj_fun_val = volume_error(Qsim, Qobs) - - # Take the negative value of the Objective Function to return to the + + # Take the negative value of the Objective Function to return to the # optimizer. if take_negative: obj_fun_val = obj_fun_val * -1 - + print(obj_fun_val) - + return obj_fun_val def get_objfun_minimize_or_maximize(obj_func): """ - This function checks the name of the objective function and returns whether - it should be maximized or minimized. Returns a boolean value, where True - means it should be maximized, and Flase means that it should be minimized. - Objective functions other than those programmed here will raise an error. - + Checks the name of the objective function and returns whether it should be + maximized or minimized. Returns a boolean value, where True means it should + be maximized, and Flase means that it should be minimized. Objective + functions other than those programmed here will raise an error. + Inputs: - obj_func: string containing the label for the desired objective + obj_func: string containing the label for the desired objective function. """ - # Define metrics that need to be maximized: - if obj_func in ["agreement_index", - "correlation_coeff", - "kge", - "kge_mod", - "nse", - "r2" - ]: + if obj_func in [ + "agreement_index", + "correlation_coeff", + "kge", + "kge_mod", + "nse", + "r2", + ]: maximize = True - + # Define the metrics that need to be minimized: - elif obj_func in ["abs_bias", - "abs_pbias", - "abs_volume_error", - "mae", - "mare", - "mse", - "rmse", - "rrmse", - "rsr", - ]: + elif obj_func in [ + "abs_bias", + "abs_pbias", + "abs_volume_error", + "mae", + "mare", + "mse", + "rmse", + "rrmse", + "rsr", + ]: maximize = False - + # Check for the metrics that exist but cannot be used for optimization - elif obj_func in ["bias","pbias","volume_error"]: - sys.exit("The bias, pbias and volume_error metrics cannot be minimized or maximized. \ - Please use the abs_bias, abs_pbias and abs_volume_error instead.") + elif obj_func in ["bias", "pbias", "volume_error"]: + sys.exit( + "The bias, pbias and volume_error metrics cannot be minimized or maximized. \ + Please use the abs_bias, abs_pbias and abs_volume_error instead." + ) else: sys.exit("The objective function is unknown.") - + return maximize - - + + def get_optimizer_minimize_or_maximize(algorithm): """ - This function finds the direction in which the optimizer searches. Some - optimizers try to maximize the objective function value, and others try to - minimize it. Since our objective functions include some that need to be - maximized and others minimized, it is imperative to ensure that the - optimizer/objective-function pair work in tandem. - + Finds the direction in which the optimizer searches. Some optimizers try to + maximize the objective function value, and others try to minimize it. Since + our objective functions include some that need to be maximized and others + minimized, it is imperative to ensure that the optimizer/objective-function + pair work in tandem. + Inputs: algorithm: string containing the direction of the optimizer search """ - # Define metrics that need to be maximized: - if algorithm in ["DDS", - ]: - + if algorithm in [ + "DDS", + ]: maximize = True - + # Define the metrics that need to be minimized: - elif algorithm in ["SCEUA", - ]: - + elif algorithm in [ + "SCEUA", + ]: maximize = False - + # Any other optimizer at this date else: sys.exit("The optimization algorithm is unknown.") - + return maximize @@ -318,40 +324,38 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) Qobs : Observed streamflow vector (numpy array) - transform : string indicating the type of transformation required. Can be + transform : string indicating the type of transformation required. Can be one of the following values: - + - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] - + The default value is "None", by which no transformation is performed. - - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and + + epsilon: scalar float indicating the perturbation to add to the flow + time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: - + perturbation = epsilon * mean(Qobs) - + The default value is 0.01. - + Returns ------- Qsim, Qobs transformed according to the transformation function requested by the user in "transform". Qsim and Qobs are numpy arrays. - - """ + """ # Quick check if transform is not None: - if transform not in ['log','inv','sqrt']: + if transform not in ["log", "inv", "sqrt"]: sys.exit("Flow transformation method not recognized.") - + # Transform the flow series if required if transform == "log": # log transformation epsilon = epsilon * np.nanmean(Qobs) @@ -372,8 +376,11 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): BEGIN OBJECTIVE FUNCTIONS DEFINITIONS """ + def abs_bias(Qsim, Qobs): """ + absolute bias metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -381,12 +388,12 @@ def abs_bias(Qsim, Qobs): Returns ------- - abs_bias: positive scalar float representing the absolute value of the - "bias" metric. This metric is useful when calibrating on the bias, because - bias should aim to be 0 but can take large positive or negative values. - Taking the absolute value of the bias will let the optimizer minimize + abs_bias: positive scalar float representing the absolute value of the + "bias" metric. This metric is useful when calibrating on the bias, because + bias should aim to be 0 but can take large positive or negative values. + Taking the absolute value of the bias will let the optimizer minimize the value to zero. - + The abs_bias should be MINIMIZED. """ return np.abs(bias(Qsim, Qobs)) @@ -394,6 +401,8 @@ def abs_bias(Qsim, Qobs): def abs_pbias(Qsim, Qobs): """ + absolute pbias metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -401,12 +410,12 @@ def abs_pbias(Qsim, Qobs): Returns ------- - abs_pbias: positive scalar float representing the absolute value of the - "pbias" metric. This metric is useful when calibrating on the pbias, - because pbias should aim to be 0 but can take large positive or negative - values. Taking the absolute value of the pbias will let the optimizer + abs_pbias: positive scalar float representing the absolute value of the + "pbias" metric. This metric is useful when calibrating on the pbias, + because pbias should aim to be 0 but can take large positive or negative + values. Taking the absolute value of the pbias will let the optimizer minimize the value to zero. - + The abs_pbias should be MINIMIZED. """ return np.abs(bias(Qsim, Qobs)) @@ -414,6 +423,8 @@ def abs_pbias(Qsim, Qobs): def abs_volume_error(Qsim, Qobs): """ + absolute value of the volume error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -421,12 +432,12 @@ def abs_volume_error(Qsim, Qobs): Returns ------- - abs_volume_error: positive scalar float representing the absolute value of - the "volume_error" metric. This metric is useful when calibrating on the - volume_error, because volume_error should aim to be 0 but can take large + abs_volume_error: positive scalar float representing the absolute value of + the "volume_error" metric. This metric is useful when calibrating on the + volume_error, because volume_error should aim to be 0 but can take large positive or negative values. Taking the absolute value of the volume_error will let the optimizer minimize the value to zero. - + The abs_volume_error should be MINIMIZED. """ return np.abs(volume_error(Qsim, Qobs)) @@ -434,6 +445,8 @@ def abs_volume_error(Qsim, Qobs): def agreement_index(Qsim, Qobs): """ + index of agreement metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -441,22 +454,23 @@ def agreement_index(Qsim, Qobs): Returns ------- - agreement_index: scalar float representing the agreement index of Willmott + agreement_index: scalar float representing the agreement index of Willmott (1981). Varies between 0 and 1. - + The Agreement index should be MAXIMIZED. """ - - #Decompose into clearer chunks - a = np.sum((Qobs-Qsim)**2) - b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs-np.mean(Qobs)) + # Decompose into clearer chunks + a = np.sum((Qobs - Qsim) ** 2) + b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs - np.mean(Qobs)) c = np.sum(b**2) - - return (1 - (a/c)) + + return 1 - (a / c) def bias(Qsim, Qobs): """ + bias metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -469,15 +483,17 @@ def bias(Qsim, Qobs): flows. This interpretation uses the definition that a positive bias value means that the simulation overestimates the true value (as opposed to many other sources on bias calculations that use the contrary interpretation). - + BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE. """ return np.mean(Qsim - Qobs) - + def correlation_coeff(Qsim, Qobs): """ + correlation coefficient metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -485,15 +501,17 @@ def correlation_coeff(Qsim, Qobs): Returns ------- - correlation_coeff: scalar float representing the correlation coefficient. - + correlation_coeff: scalar float representing the correlation coefficient. + The correlation_coeff should be MAXIMIZED. """ - return np.corrcoef(Qobs, Qsim)[0,1] - + return np.corrcoef(Qobs, Qsim)[0, 1] + def kge(Qsim, Qobs): """ + Kling-Gupta efficiency metric (2009 version) + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -501,9 +519,9 @@ def kge(Qsim, Qobs): Returns ------- - kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of + kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of 2009. It can take values from -inf to 1 (best case). - + The KGE should be MAXIMIZED. """ # This pops up a lot, precalculate. @@ -512,13 +530,11 @@ def kge(Qsim, Qobs): # Calculate the components of KGE r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt( - np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) - ) + r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) r = r_num / r_den a = np.std(Qsim) / np.std(Qobs) b = np.sum(Qsim) / np.sum(Qobs) - + # Calculate the KGE kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2) @@ -527,6 +543,8 @@ def kge(Qsim, Qobs): def kge_mod(Qsim, Qobs): """ + Kling-Gupta efficiency metric (2012 version) + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -534,25 +552,22 @@ def kge_mod(Qsim, Qobs): Returns ------- - kge_mod: scalar float representing the modified Kling-Gupta Efficiency + kge_mod: scalar float representing the modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take values from -inf to 1 (best case). - + The kge_mod should be MAXIMIZED. """ - # These pop up a lot, precalculate Qsim_mean = np.mean(Qsim) Qobs_mean = np.mean(Qobs) - + # Calc KGE components r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt( - np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2) - ) + r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) r = r_num / r_den - g = ((np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean)) - b = (np.mean(Qsim) / np.mean(Qobs)) - + g = (np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean) + b = np.mean(Qsim) / np.mean(Qobs) + # Calc the modified KGE metric kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2) @@ -561,6 +576,8 @@ def kge_mod(Qsim, Qobs): def mae(Qsim, Qobs): """ + mean absolute error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -569,16 +586,18 @@ def mae(Qsim, Qobs): Returns ------- mae: scalar float representing the Mean Absolute Error. It can be - interpreted as the average error (absolute) between observations and + interpreted as the average error (absolute) between observations and simulations for any time step. - + The mae should be MINIMIZED. """ - return np.mean(np.abs(Qsim-Qobs)) + return np.mean(np.abs(Qsim - Qobs)) + - def mare(Qsim, Qobs): """ + mean absolute relative error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -586,18 +605,19 @@ def mare(Qsim, Qobs): Returns ------- - mare: scalar float representing the Mean Absolute Relative Error. For + mare: scalar float representing the Mean Absolute Relative Error. For streamflow, where Qobs is always zero or positive, the MARE is always positive. - + The mare should be MINIMIZED. """ - - return (np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs)) + return np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs) def mse(Qsim, Qobs): """ + mean square error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -608,14 +628,16 @@ def mse(Qsim, Qobs): mse: scalar float representing the Mean Square Error. It is the sum of squared errors for each day divided by the total number of days. Units are thus squared units, and the best possible value is 0. - + The mse should be MINIMIZED. """ - return np.mean((Qobs-Qsim)**2) - - + return np.mean((Qobs - Qsim) ** 2) + + def nse(Qsim, Qobs): """ + Nash-Sutcliffe efficiency metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -625,19 +647,20 @@ def nse(Qsim, Qobs): ------- nse: scalar float representing the Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to 1, with 0 being as good as using the mean - observed flow as the estimator. - + observed flow as the estimator. + The nse should be MAXIMIZED. """ - num = np.sum((Qobs - Qsim) ** 2) den = np.sum((Qobs - np.mean(Qobs)) ** 2) - - return (1 - (num / den)) + + return 1 - (num / den) def pbias(Qsim, Qobs): """ + percent bias metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -645,22 +668,23 @@ def pbias(Qsim, Qobs): Returns ------- - pbias: scalar float representing the Percent bias. Can be negative or - positive and gives the average relative error between the observed and - simulated flows. This interpretation uses the definition that a positive - bias value means that the simulation overestimates the true value (as - opposed to many other sources on bias calculations that use the contrary + pbias: scalar float representing the Percent bias. Can be negative or + positive and gives the average relative error between the observed and + simulated flows. This interpretation uses the definition that a positive + bias value means that the simulation overestimates the true value (as + opposed to many other sources on bias calculations that use the contrary interpretation). - + PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE. """ - - return (np.sum(Qsim - Qobs)/ np.sum(Qobs)) * 100 + return (np.sum(Qsim - Qobs) / np.sum(Qobs)) * 100 def r2(Qsim, Qobs): """ + r-squred metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -669,15 +693,17 @@ def r2(Qsim, Qobs): Returns ------- r2: scalar float representing the r-squared (R2) metric equal to the square - of the correlation coefficient. - + of the correlation coefficient. + The r2 should be MAXIMIZED. """ return correlation_coeff(Qsim, Qobs) ** 2 - + def rmse(Qsim, Qobs): """ + root mean square error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -685,18 +711,19 @@ def rmse(Qsim, Qobs): Returns ------- - rmse: scalar float representing the Root Mean Square Error. Units are the - same as the timeseries data (ex. m3/s). It can take zero or positive + rmse: scalar float representing the Root Mean Square Error. Units are the + same as the timeseries data (ex. m3/s). It can take zero or positive values. - + The rmse should be MINIMIZED. """ - return np.sqrt(np.mean((Qobs - Qsim) ** 2)) def rrmse(Qsim, Qobs): """ + relative root mean square error (ratio of rmse to mean) metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -704,11 +731,11 @@ def rrmse(Qsim, Qobs): Returns ------- - rrmse: scalar float representing the ratio of the RMSE to the mean of the - observations. It allows scaling RMSE values to compare results between + rrmse: scalar float representing the ratio of the RMSE to the mean of the + observations. It allows scaling RMSE values to compare results between time series of different magnitudes (ex. flows from small and large watersheds). Also known as the CVRMSE. - + The rrmse should be MINIMIZED. """ return rmse(Qsim, Qobs) / np.mean(Qobs) @@ -716,6 +743,8 @@ def rrmse(Qsim, Qobs): def rsr(Qsim, Qobs): """ + ratio of root mean square error to standard deviation metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -724,9 +753,9 @@ def rsr(Qsim, Qobs): Returns ------- rsr: scalar float representing the Root Mean Square Error (RMSE) divided by - the standard deviation of the observations. Also known as the "Ratio of the + the standard deviation of the observations. Also known as the "Ratio of the Root Mean Square Error to the Standard Deviation of Observations". - + The rsr should be MINIMIZED. """ return rmse(Qobs, Qsim) / np.std(Qobs) @@ -734,6 +763,8 @@ def rsr(Qsim, Qobs): def volume_error(Qsim, Qobs): """ + volume error metric + Parameters ---------- Qsim : Simulated streamflow vector (numpy array) @@ -745,18 +776,12 @@ def volume_error(Qsim, Qobs): over the entire period. Expressed in terms of the same units as input data, so for flow rates it is important to multiply by the duration of the time- step to obtain actual volumes. - + The volume_error should be MINIMIZED. """ - return np.sum(Qsim-Qobs) / np.sum(Qobs) - + return np.sum(Qsim - Qobs) / np.sum(Qobs) """ ADD OBJECTIVE FUNCTIONS HERE """ - - - - - \ No newline at end of file From 58a43718ec8358b6b9f120f683e12d992875d371 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sat, 23 Dec 2023 09:49:20 -0500 Subject: [PATCH 13/29] fixed match case compatibility --- xhydro/modelling/obj_funcs.py | 42 ++++------------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 74c1c3d3..1cf7114c 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -187,44 +187,10 @@ def get_objective_function( if transform is not None: Qsim, Qobs = transform_flows(Qsim, Qobs, transform, epsilon) - # Compute objective function by switching to the correct algorithm - match obj_func: - case "abs_bias": - obj_fun_val = abs_bias(Qsim, Qobs) - case "abs_pbias": - obj_fun_val = abs_pbias(Qsim, Qobs) - case "abs_volume_error": - obj_fun_val = abs_volume_error(Qsim, Qobs) - case "agreement_index": - obj_fun_val = agreement_index(Qsim, Qobs) - case "bias": - obj_fun_val = bias(Qsim, Qobs) - case "correlation_coeff": - obj_fun_val = correlation_coeff(Qsim, Qobs) - case "kge": - obj_fun_val = kge(Qsim, Qobs) - case "kge_mod": - obj_fun_val = kge_mod(Qsim, Qobs) - case "mae": - obj_fun_val = mae(Qsim, Qobs) - case "mare": - obj_fun_val = mare(Qsim, Qobs) - case "mse": - obj_fun_val = mse(Qsim, Qobs) - case "nse": - obj_fun_val = nse(Qsim, Qobs) - case "pbias": - obj_fun_val = pbias(Qsim, Qobs) - case "r2": - obj_fun_val = r2(Qsim, Qobs) - case "rmse": - obj_fun_val = rmse(Qsim, Qobs) - case "rrmse": - obj_fun_val = rrmse(Qsim, Qobs) - case "rsr": - obj_fun_val = rsr(Qsim, Qobs) - case "volume_error": - obj_fun_val = volume_error(Qsim, Qobs) + # Compute objective function by switching to the correct algorithm. Ensure + # that the function name is the same as the obj_func tag or this will fail. + function_call = globals()[obj_func] + obj_fun_val = function_call(Qsim, Qobs) # Take the negative value of the Objective Function to return to the # optimizer. From fbe6b81bc7169e7b59186a5f43d36a28925488f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Dec 2023 14:53:04 +0000 Subject: [PATCH 14/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 311bb6bf..5c9cdf33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,6 @@ filename = ".cruft.json" search = "\"version\": \"{current_version}\"" replace = "\"version\": \"{new_version}\"" - [tool.coverage.run] relative_files = true include = ["xhydro/*"] From 6460a88e5cb2822356e8dce11574de9b68cd54d0 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 19:06:53 -0500 Subject: [PATCH 15/29] added tests for objfuncs and changed calling method --- tests/test_objective_functions.py | 68 +++++++++++++++++++++++++++++++ xhydro/modelling/obj_funcs.py | 48 +++++++++++----------- 2 files changed, 91 insertions(+), 25 deletions(-) create mode 100644 tests/test_objective_functions.py diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py new file mode 100644 index 00000000..7bbb9ee4 --- /dev/null +++ b/tests/test_objective_functions.py @@ -0,0 +1,68 @@ +"""Test suite for the objective functions in obj_funcs.py.""" + +import numpy as np + +from xhydro.modelling.obj_funcs import get_objective_function + + +def test_obj_funcs(): + """ + Series of tests to test all objective functions with fast test data + """ + Qobs = np.array([120, 130, 140, 150, 160, 170]) + Qsim = np.array([120, 125, 145, 140, 140, 180]) + + # Test that the objective function is calculated correctly + objfun = get_objective_function(Qobs, Qsim, obj_func="abs_bias") + np.testing.assert_array_almost_equal(objfun, 3.3333333333333335, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="abs_pbias") + np.testing.assert_array_almost_equal(objfun, 2.2988505747126435, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="abs_volume_error") + np.testing.assert_array_almost_equal(objfun, 0.022988505747126436, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="agreement_index") + np.testing.assert_array_almost_equal(objfun, 0.9171974522292994, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="bias") + np.testing.assert_array_almost_equal(objfun, -3.3333333333333335, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="correlation_coeff") + np.testing.assert_array_almost_equal(objfun, 0.8599102447336393, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="kge") + np.testing.assert_array_almost_equal(objfun, 0.8077187696552522, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="kge_mod") + np.testing.assert_array_almost_equal(objfun, 0.7888769531580001, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="mae") + np.testing.assert_array_almost_equal(objfun, 8.333333333333334, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="mare") + np.testing.assert_array_almost_equal(objfun, 0.05747126436781609, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="mse") + np.testing.assert_array_almost_equal(objfun, 108.33333333333333, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="nse") + np.testing.assert_array_almost_equal(objfun, 0.6285714285714286, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="pbias") + np.testing.assert_array_almost_equal(objfun, -2.2988505747126435, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="r2") + np.testing.assert_array_almost_equal(objfun, 0.7394456289978675, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="rmse") + np.testing.assert_array_almost_equal(objfun, 10.408329997330663, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="rrmse") + np.testing.assert_array_almost_equal(objfun, 0.07178158618848733, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="rsr") + np.testing.assert_array_almost_equal(objfun, 0.6094494002200439, 8) + + objfun = get_objective_function(Qobs, Qsim, obj_func="volume_error") + np.testing.assert_array_almost_equal(objfun, -0.022988505747126436, 8) diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 1cf7114c..ca51a33a 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -128,26 +128,26 @@ def get_objective_function( function (obj_fun). """ # List of available objective functions - obj_func_list = [ - "abs_bias", - "abs_pbias", - "abs_volume_error", - "agreement_index", - "bias", - "correlation_coeff", - "kge", - "kge_mod", - "mae", - "mare", - "mse", - "nse", - "pbias", - "r2", - "rmse", - "rrmse", - "rsr", - "volume_error", - ] + obj_func_dict = { + "abs_bias": abs_bias, + "abs_pbias": abs_pbias, + "abs_volume_error": abs_volume_error, + "agreement_index": agreement_index, + "bias": bias, + "correlation_coeff": correlation_coeff, + "kge": kge, + "kge_mod": kge_mod, + "mae": mae, + "mare": mare, + "mse": mse, + "nse": nse, + "pbias": pbias, + "r2": r2, + "rmse": rmse, + "rrmse": rrmse, + "rsr": rsr, + "volume_error": volume_error, + } # Basic error checking if Qobs.shape[0] != Qsim.shape[0]: @@ -164,7 +164,7 @@ def get_objective_function( sys.exit("Mask contains values other 0 or 1. Please modify.") # Check that the objective function is in the list of available methods - if obj_func not in obj_func_list: + if obj_func not in obj_func_dict: sys.exit( "Selected objective function is currently unavailable. " + "Consider contributing to our project at: " @@ -189,7 +189,7 @@ def get_objective_function( # Compute objective function by switching to the correct algorithm. Ensure # that the function name is the same as the obj_func tag or this will fail. - function_call = globals()[obj_func] + function_call = obj_func_dict[obj_func] obj_fun_val = function_call(Qsim, Qobs) # Take the negative value of the Objective Function to return to the @@ -197,8 +197,6 @@ def get_objective_function( if take_negative: obj_fun_val = obj_fun_val * -1 - print(obj_fun_val) - return obj_fun_val @@ -384,7 +382,7 @@ def abs_pbias(Qsim, Qobs): The abs_pbias should be MINIMIZED. """ - return np.abs(bias(Qsim, Qobs)) + return np.abs(pbias(Qsim, Qobs)) def abs_volume_error(Qsim, Qobs): From 2229e2d5aa381bcc360cade4bc13bbf9514fc22a Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 19:10:44 -0500 Subject: [PATCH 16/29] fixed linting issue for CI --- tests/test_objective_functions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index 7bbb9ee4..9e89176e 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -6,9 +6,7 @@ def test_obj_funcs(): - """ - Series of tests to test all objective functions with fast test data - """ + """Series of tests to test all objective functions with fast test data""" Qobs = np.array([120, 130, 140, 150, 160, 170]) Qsim = np.array([120, 125, 145, 140, 140, 180]) From 86119ff5f2589d7bac3263da894e4218b28cb982 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 21:08:43 -0500 Subject: [PATCH 17/29] added tests for coveralls --- tests/test_calibration.py | 143 ++++++++++++++++++++++++++- tests/test_hydrological_modelling.py | 35 +++++++ 2 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 tests/test_hydrological_modelling.py diff --git a/tests/test_calibration.py b/tests/test_calibration.py index f77596b8..79c996a5 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,16 +1,17 @@ """Test suite for the calibration algorithm in calibration.py.""" # Also tests the dummy model implementation. - import numpy as np +import pytest from xhydro.modelling.calibration import perform_calibration from xhydro.modelling.hydrological_modelling import dummy_model -from xhydro.modelling.obj_funcs import get_objective_function +from xhydro.modelling.obj_funcs import get_objective_function, transform_flows def test_spotpy_calibration(): - """Make sure the calibration works for a few test cases.""" + """Make sure the calibration works under possible test cases.""" + bounds_low = np.array([0, 0, 0]) bounds_high = np.array([10, 10, 10]) @@ -51,3 +52,139 @@ def test_spotpy_calibration(): model_config["parameters"] = [5, 5, 5] Qsim = dummy_model(model_config) assert Qsim[3] == 3500.00 + + # Also test to ensure SCEUA and take_minimize is required. + best_parameters_sceua, best_simulation, best_objfun = perform_calibration( + model_config, + "mae", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=10, + algorithm="SCEUA", + ) + + assert len(best_parameters_sceua) == len(bounds_high) + + # Also test to ensure SCEUA and take_minimize is required. + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=10, + algorithm="SCEUA", + ) + assert len(best_parameters_negative) == len(bounds_high) + + # Test to see if transform works + best_parameters_transform, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=10, + algorithm="SCEUA", + transform="inv", + epsilon=0.01, + ) + assert len(best_parameters_transform) == len(bounds_high) + + +def test_calibration_failures(): + """Test for the calibration algorithm failure modes""" + bounds_low = np.array([0, 0, 0]) + bounds_high = np.array([10, 10, 10]) + model_config = { + "precip": np.array([10, 11, 12, 13, 14, 15]), + "temperature": np.array([10, 3, -5, 1, 15, 0]), + "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "drainage_area": np.array([10]), + "model_name": "Dummy", + } + + # Test Qobs different length than Qsim + with pytest.raises(SystemExit) as pytest_wrapped_e: + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config.update(Qobs=np.array([100, 100, 100])), + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="OTHER", + ) + assert pytest_wrapped_e.type == SystemExit + + # Test mask not 1 or 0 + mask = np.array([0, 0, 0, 0.5, 1, 1]) + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="DDS", + mask=mask, + ) + assert pytest_wrapped_e.type == SystemExit + + # test not same length in mask + mask = np.array([0, 0, 0, 1, 1]) + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="DDS", + mask=mask, + ) + assert pytest_wrapped_e.type == SystemExit + + # Test objective function fail is caught + mask = np.array([0, 0, 0, 0, 1, 1]) + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config, + "nse_fake", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="DDS", + mask=mask, + ) + assert pytest_wrapped_e.type == SystemExit + + # Test objective function that cannot be minimized + best_parameters_negative, best_simulation, best_objfun = perform_calibration( + model_config, + "bias", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=1000, + algorithm="DDS", + mask=mask, + ) + assert pytest_wrapped_e.type == SystemExit + + +def test_transform(): + """Test the transformer""" + + Qsim = np.array([10, 10, 10]) + Qobs = np.array([5, 5, 5]) + + Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="inv", epsilon=0.01) + np.testing.assert_array_almost_equal(Qsim_r[1], 0.0995024, 6) + np.testing.assert_array_almost_equal(Qobs_r[1], 0.1980198, 6) + + Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="sqrt") + np.testing.assert_array_almost_equal(Qsim_r[1], 3.1622776, 6) + np.testing.assert_array_almost_equal(Qobs_r[1], 2.2360679, 6) + + Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="log", epsilon=0.01) + np.testing.assert_array_almost_equal(Qsim_r[1], 2.3075726, 6) + np.testing.assert_array_almost_equal(Qobs_r[1], 1.6193882, 6) + + # Test Qobs different length than Qsim + with pytest.raises(SystemExit) as pytest_wrapped_e: + Qobs_r, Qobs_r = transform_flows(Qsim, Qobs, transform="a", epsilon=0.01) + assert pytest_wrapped_e.type == SystemExit diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py new file mode 100644 index 00000000..2b33214d --- /dev/null +++ b/tests/test_hydrological_modelling.py @@ -0,0 +1,35 @@ +"""Test suite for hydrological modelling in hydrological_modelling.py""" +import unittest + +import numpy as np +import pytest + +from xhydro.modelling.hydrological_modelling import hydrological_model_selector + + +def test_hydrological_modelling(): + """Test the hydrological models as they become online""" + + # Test the dummy model + model_config = { + "precip": np.array([10, 11, 12, 13, 14, 15]), + "temperature": np.array([10, 3, -5, 1, 15, 0]), + "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "drainage_area": np.array([10]), + "model_name": "Dummy", + "parameters": np.array([5, 5, 5]), + } + Qsim = hydrological_model_selector(model_config) + assert Qsim[3] == 3500.00 + + # Test the exceptions for new models + model_config.update(model_name="ADD_OTHER_HERE") + Qsim = hydrological_model_selector(model_config) + assert Qsim == 0 + + +@pytest.mark.xfail(raises=NotImplementedError) +def import_unknown_model(): + model_config = {"model_name": "fake_model"} + Qsim = hydrological_model_selector(model_config) + assert Qsim == None From da6b3f3362f4ff9e0e383fcf7740157f210f3f10 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 21:12:29 -0500 Subject: [PATCH 18/29] fixed linting not caught by pre-commit --- tests/test_calibration.py | 4 +--- tests/test_hydrological_modelling.py | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 79c996a5..e3aa2b6c 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -11,7 +11,6 @@ def test_spotpy_calibration(): """Make sure the calibration works under possible test cases.""" - bounds_low = np.array([0, 0, 0]) bounds_high = np.array([10, 10, 10]) @@ -167,8 +166,7 @@ def test_calibration_failures(): def test_transform(): - """Test the transformer""" - + """Test the flow transformer""" Qsim = np.array([10, 10, 10]) Qobs = np.array([5, 5, 5]) diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index 2b33214d..9d819e64 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -1,6 +1,4 @@ """Test suite for hydrological modelling in hydrological_modelling.py""" -import unittest - import numpy as np import pytest @@ -9,7 +7,6 @@ def test_hydrological_modelling(): """Test the hydrological models as they become online""" - # Test the dummy model model_config = { "precip": np.array([10, 11, 12, 13, 14, 15]), @@ -32,4 +29,4 @@ def test_hydrological_modelling(): def import_unknown_model(): model_config = {"model_name": "fake_model"} Qsim = hydrological_model_selector(model_config) - assert Qsim == None + assert Qsim is None From 2860fbe4501ee1d382b3db60bcfa72f8a9fd33a9 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 21:34:40 -0500 Subject: [PATCH 19/29] added more coveralls coverage --- tests/test_hydrological_modelling.py | 9 ++-- tests/test_objective_functions.py | 76 +++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index 9d819e64..f4928937 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -25,8 +25,9 @@ def test_hydrological_modelling(): assert Qsim == 0 -@pytest.mark.xfail(raises=NotImplementedError) def import_unknown_model(): - model_config = {"model_name": "fake_model"} - Qsim = hydrological_model_selector(model_config) - assert Qsim is None + """Test for unknown model""" + with pytest.raises(NotImplementedError): + model_config = {"model_name": "fake_model"} + Qsim = hydrological_model_selector(model_config) + assert Qsim is None diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index 9e89176e..86252f02 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -1,8 +1,11 @@ """Test suite for the objective functions in obj_funcs.py.""" - import numpy as np +import pytest -from xhydro.modelling.obj_funcs import get_objective_function +from xhydro.modelling.obj_funcs import ( + get_objective_function, + get_objfun_minimize_or_maximize, +) def test_obj_funcs(): @@ -64,3 +67,72 @@ def test_obj_funcs(): objfun = get_objective_function(Qobs, Qsim, obj_func="volume_error") np.testing.assert_array_almost_equal(objfun, -0.022988505747126436, 8) + + +def test_objective_function_failure_modes(): + """Test for the objective function calculation failure modes""" + Qobs = np.array([100, 100, 100]) + Qsim = np.array([110, 110, 90]) + + mask = np.array([0, 1, 1]) + + # Test Qobs different length than Qsim + with pytest.raises(SystemExit) as pytest_wrapped_e: + objfun = get_objective_function( + np.array([100, 100]), + Qsim, + obj_func="mae", + take_negative=True, + mask=None, + transform=None, + epsilon=None, + ) + assert pytest_wrapped_e.type == SystemExit + + # Test for mask length + objfun = get_objective_function( + Qobs, + Qsim, + obj_func="mae", + take_negative=True, + mask=np.array([0, 1, 0, 0]), + transform=None, + epsilon=None, + ) + assert pytest_wrapped_e.type == SystemExit + + # Test for obj_func does not exist + objfun = get_objective_function( + Qobs, + Qsim, + obj_func="fake_mae", + take_negative=True, + mask=mask, + ) + assert pytest_wrapped_e.type == SystemExit + + # Test for mask is not 0 and 1 + objfun = get_objective_function( + Qobs, + Qsim, + obj_func="mae", + take_negative=True, + mask=np.array([0, 0.5, 1]), + ) + assert pytest_wrapped_e.type == SystemExit + assert objfun is None + + # Test for maximize_minimize objective func for unbounded metrics. + maximize = get_objfun_minimize_or_maximize(obj_func="bias") + assert pytest_wrapped_e.type == SystemExit + maximize = get_objfun_minimize_or_maximize(obj_func="pbias") + assert pytest_wrapped_e.type == SystemExit + maximize = get_objfun_minimize_or_maximize(obj_func="volume_error") + assert pytest_wrapped_e.type == SystemExit + + # Test for unknown objective func + maximize = get_objective_function(obj_func="bias_fake") + assert pytest_wrapped_e.type == SystemExit + + assert objfun is None + assert maximize is None From ccb9e287d480dceb100cd38bfb9a2b28a7757dc3 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 22:02:48 -0500 Subject: [PATCH 20/29] refactored tests to remove duplicates and split raise error tests --- tests/test_calibration.py | 76 ------------------- tests/test_hydrological_modelling.py | 8 +- tests/test_objective_functions.py | 109 ++++++++++++++++----------- 3 files changed, 69 insertions(+), 124 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index e3aa2b6c..0e0335b1 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -89,82 +89,6 @@ def test_spotpy_calibration(): assert len(best_parameters_transform) == len(bounds_high) -def test_calibration_failures(): - """Test for the calibration algorithm failure modes""" - bounds_low = np.array([0, 0, 0]) - bounds_high = np.array([10, 10, 10]) - model_config = { - "precip": np.array([10, 11, 12, 13, 14, 15]), - "temperature": np.array([10, 3, -5, 1, 15, 0]), - "Qobs": np.array([120, 130, 140, 150, 160, 170]), - "drainage_area": np.array([10]), - "model_name": "Dummy", - } - - # Test Qobs different length than Qsim - with pytest.raises(SystemExit) as pytest_wrapped_e: - best_parameters_negative, best_simulation, best_objfun = perform_calibration( - model_config.update(Qobs=np.array([100, 100, 100])), - "nse", - bounds_low=bounds_low, - bounds_high=bounds_high, - evaluations=1000, - algorithm="OTHER", - ) - assert pytest_wrapped_e.type == SystemExit - - # Test mask not 1 or 0 - mask = np.array([0, 0, 0, 0.5, 1, 1]) - best_parameters_negative, best_simulation, best_objfun = perform_calibration( - model_config, - "nse", - bounds_low=bounds_low, - bounds_high=bounds_high, - evaluations=1000, - algorithm="DDS", - mask=mask, - ) - assert pytest_wrapped_e.type == SystemExit - - # test not same length in mask - mask = np.array([0, 0, 0, 1, 1]) - best_parameters_negative, best_simulation, best_objfun = perform_calibration( - model_config, - "nse", - bounds_low=bounds_low, - bounds_high=bounds_high, - evaluations=1000, - algorithm="DDS", - mask=mask, - ) - assert pytest_wrapped_e.type == SystemExit - - # Test objective function fail is caught - mask = np.array([0, 0, 0, 0, 1, 1]) - best_parameters_negative, best_simulation, best_objfun = perform_calibration( - model_config, - "nse_fake", - bounds_low=bounds_low, - bounds_high=bounds_high, - evaluations=1000, - algorithm="DDS", - mask=mask, - ) - assert pytest_wrapped_e.type == SystemExit - - # Test objective function that cannot be minimized - best_parameters_negative, best_simulation, best_objfun = perform_calibration( - model_config, - "bias", - bounds_low=bounds_low, - bounds_high=bounds_high, - evaluations=1000, - algorithm="DDS", - mask=mask, - ) - assert pytest_wrapped_e.type == SystemExit - - def test_transform(): """Test the flow transformer""" Qsim = np.array([10, 10, 10]) diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index f4928937..9f81751a 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -25,9 +25,9 @@ def test_hydrological_modelling(): assert Qsim == 0 -def import_unknown_model(): +def test_import_unknown_model(): """Test for unknown model""" - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError) as pytest_wrapped_e: model_config = {"model_name": "fake_model"} - Qsim = hydrological_model_selector(model_config) - assert Qsim is None + _ = hydrological_model_selector(model_config) + assert pytest_wrapped_e.type == NotImplementedError diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index 86252f02..f1d470f0 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -69,70 +69,91 @@ def test_obj_funcs(): np.testing.assert_array_almost_equal(objfun, -0.022988505747126436, 8) -def test_objective_function_failure_modes(): - """Test for the objective function calculation failure modes""" - Qobs = np.array([100, 100, 100]) - Qsim = np.array([110, 110, 90]) - - mask = np.array([0, 1, 1]) - - # Test Qobs different length than Qsim +def test_objective_function_failure_data_length(): + """Test for the objective function calculation failure mode: + Qobs and Qsim length are different + """ with pytest.raises(SystemExit) as pytest_wrapped_e: - objfun = get_objective_function( - np.array([100, 100]), - Qsim, + _ = get_objective_function( + np.array([100, 110]), + np.array([100, 110, 120]), obj_func="mae", - take_negative=True, - mask=None, - transform=None, - epsilon=None, ) assert pytest_wrapped_e.type == SystemExit - # Test for mask length - objfun = get_objective_function( - Qobs, - Qsim, + +def test_objective_function_failure_mask_length(): + """Test for the objective function calculation failure mode: + Qobs and mask length are different + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objective_function( + np.array([100, 100, 100]), + np.array([100, 110, 120]), obj_func="mae", - take_negative=True, mask=np.array([0, 1, 0, 0]), - transform=None, - epsilon=None, ) assert pytest_wrapped_e.type == SystemExit - # Test for obj_func does not exist - objfun = get_objective_function( - Qobs, - Qsim, - obj_func="fake_mae", - take_negative=True, - mask=mask, + +def test_objective_function_failure_unknown_objfun(): + """Test for the objective function calculation failure mode: + Objective function is unknown + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objective_function( + np.array([100, 100, 100]), + np.array([100, 110, 120]), + obj_func="fake", ) assert pytest_wrapped_e.type == SystemExit - # Test for mask is not 0 and 1 - objfun = get_objective_function( - Qobs, - Qsim, + +def test_objective_function_failure_mask_contents(): + """Test for the objective function calculation failure mode: + Mask contains other than 0 and 1 + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objective_function( + np.array([100, 100, 100]), + np.array([100, 110, 120]), obj_func="mae", - take_negative=True, mask=np.array([0, 0.5, 1]), ) assert pytest_wrapped_e.type == SystemExit - assert objfun is None - # Test for maximize_minimize objective func for unbounded metrics. - maximize = get_objfun_minimize_or_maximize(obj_func="bias") - assert pytest_wrapped_e.type == SystemExit - maximize = get_objfun_minimize_or_maximize(obj_func="pbias") + +def test_maximizer_objfun_failure_modes_bias(): + """Test for maximize-minimize failure mode: + Use of bias objfun which is unbounded + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objfun_minimize_or_maximize(obj_func="bias") assert pytest_wrapped_e.type == SystemExit - maximize = get_objfun_minimize_or_maximize(obj_func="volume_error") + + +def test_maximizer_objfun_failure_modes_pbias(): + """Test for maximize-minimize failure mode: + Use of pbias objfun which is unbounded + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objfun_minimize_or_maximize(obj_func="pbias") assert pytest_wrapped_e.type == SystemExit - # Test for unknown objective func - maximize = get_objective_function(obj_func="bias_fake") + +def test_maximizer_objfun_failure_modes_volume_error(): + """Test for maximize-minimize failure mode: + Use of volume_error objfun which is unbounded + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objfun_minimize_or_maximize(obj_func="volume_error") assert pytest_wrapped_e.type == SystemExit - assert objfun is None - assert maximize is None + +def test_maximizer_objfun_failure_modes_unknown_metric(): + """Test for maximize-minimize failure mode: + Use of unknown objfun + """ + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = get_objfun_minimize_or_maximize(obj_func="unknown_of") + assert pytest_wrapped_e.type == SystemExit From b3b691eaa9cb490ed159e6c2b78bd3890c159e5f Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Thu, 28 Dec 2023 22:16:39 -0500 Subject: [PATCH 21/29] final test for coveralls --- tests/test_calibration.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 0e0335b1..3c81cb89 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -89,6 +89,31 @@ def test_spotpy_calibration(): assert len(best_parameters_transform) == len(bounds_high) +def test_calibration_failure_mode_unknown_optimizer(): + """Test for maximize-minimize failure mode: + use "OTHER" optimizer, i.e. an unknown optimizer. Should fail. + """ + bounds_low = np.array([0, 0, 0]) + bounds_high = np.array([10, 10, 10]) + model_config = { + "precip": np.array([10, 11, 12, 13, 14, 15]), + "temperature": np.array([10, 3, -5, 1, 15, 0]), + "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "drainage_area": np.array([10]), + "model_name": "Dummy", + } + with pytest.raises(SystemExit) as pytest_wrapped_e: + best_parameters_transform, best_simulation, best_objfun = perform_calibration( + model_config, + "nse", + bounds_low=bounds_low, + bounds_high=bounds_high, + evaluations=10, + algorithm="OTHER", + ) + assert pytest_wrapped_e.type == SystemExit + + def test_transform(): """Test the flow transformer""" Qsim = np.array([10, 10, 10]) From b32ec6fda4c4ff9213d5a77ed42ae88409afa07c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 18:27:32 +0000 Subject: [PATCH 22/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 41c17f14..00748e98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,7 +124,6 @@ filename = ".cruft.json" search = "\"version\": \"{current_version}\"" replace = "\"version\": \"{new_version}\"" - [tool.coverage.run] relative_files = true include = ["xhydro/*"] From 92f75c58e493b526df798b47458ffd893409eb97 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Mon, 15 Jan 2024 21:10:31 -0500 Subject: [PATCH 23/29] fix raises, best practices, docs --- tests/test_calibration.py | 42 +- tests/test_hydrological_modelling.py | 14 +- tests/test_objective_functions.py | 76 +-- xhydro/modelling/calibration.py | 197 +++++-- xhydro/modelling/hydrological_modelling.py | 66 ++- xhydro/modelling/obj_funcs.py | 637 ++++++++++++--------- 6 files changed, 649 insertions(+), 383 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 3c81cb89..e0e16ab7 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -17,7 +17,7 @@ def test_spotpy_calibration(): model_config = { "precip": np.array([10, 11, 12, 13, 14, 15]), "temperature": np.array([10, 3, -5, 1, 15, 0]), - "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "qobs": np.array([120, 130, 140, 150, 160, 170]), "drainage_area": np.array([10]), "model_name": "Dummy", } @@ -39,7 +39,7 @@ def test_spotpy_calibration(): # Test that the objective function is calculated correctly objfun = get_objective_function( - model_config["Qobs"], + model_config["qobs"], best_simulation, obj_func="mae", mask=mask, @@ -49,8 +49,8 @@ def test_spotpy_calibration(): # Test dummy model response model_config["parameters"] = [5, 5, 5] - Qsim = dummy_model(model_config) - assert Qsim[3] == 3500.00 + qsim = dummy_model(model_config) + assert qsim["qsim"].values[3] == 3500.00 # Also test to ensure SCEUA and take_minimize is required. best_parameters_sceua, best_simulation, best_objfun = perform_calibration( @@ -98,11 +98,11 @@ def test_calibration_failure_mode_unknown_optimizer(): model_config = { "precip": np.array([10, 11, 12, 13, 14, 15]), "temperature": np.array([10, 3, -5, 1, 15, 0]), - "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "qobs": np.array([120, 130, 140, 150, 160, 170]), "drainage_area": np.array([10]), "model_name": "Dummy", } - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(NotImplementedError) as pytest_wrapped_e: best_parameters_transform, best_simulation, best_objfun = perform_calibration( model_config, "nse", @@ -111,27 +111,27 @@ def test_calibration_failure_mode_unknown_optimizer(): evaluations=10, algorithm="OTHER", ) - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == NotImplementedError def test_transform(): """Test the flow transformer""" - Qsim = np.array([10, 10, 10]) - Qobs = np.array([5, 5, 5]) + qsim = np.array([10, 10, 10]) + qobs = np.array([5, 5, 5]) - Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="inv", epsilon=0.01) - np.testing.assert_array_almost_equal(Qsim_r[1], 0.0995024, 6) - np.testing.assert_array_almost_equal(Qobs_r[1], 0.1980198, 6) + qsim_r, qobs_r = transform_flows(qsim, qobs, transform="inv", epsilon=0.01) + np.testing.assert_array_almost_equal(qsim_r[1], 0.0995024, 6) + np.testing.assert_array_almost_equal(qobs_r[1], 0.1980198, 6) - Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="sqrt") - np.testing.assert_array_almost_equal(Qsim_r[1], 3.1622776, 6) - np.testing.assert_array_almost_equal(Qobs_r[1], 2.2360679, 6) + qsim_r, qobs_r = transform_flows(qsim, qobs, transform="sqrt") + np.testing.assert_array_almost_equal(qsim_r[1], 3.1622776, 6) + np.testing.assert_array_almost_equal(qobs_r[1], 2.2360679, 6) - Qsim_r, Qobs_r = transform_flows(Qsim, Qobs, transform="log", epsilon=0.01) - np.testing.assert_array_almost_equal(Qsim_r[1], 2.3075726, 6) - np.testing.assert_array_almost_equal(Qobs_r[1], 1.6193882, 6) + qsim_r, qobs_r = transform_flows(qsim, qobs, transform="log", epsilon=0.01) + np.testing.assert_array_almost_equal(qsim_r[1], 2.3075726, 6) + np.testing.assert_array_almost_equal(qobs_r[1], 1.6193882, 6) # Test Qobs different length than Qsim - with pytest.raises(SystemExit) as pytest_wrapped_e: - Qobs_r, Qobs_r = transform_flows(Qsim, Qobs, transform="a", epsilon=0.01) - assert pytest_wrapped_e.type == SystemExit + with pytest.raises(NotImplementedError) as pytest_wrapped_e: + qobs_r, qobs_r = transform_flows(qsim, qobs, transform="a", epsilon=0.01) + assert pytest_wrapped_e.type == NotImplementedError diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index 9f81751a..1840d5f3 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from xhydro.modelling.hydrological_modelling import hydrological_model_selector +from xhydro.modelling.hydrological_modelling import run_hydrological_model def test_hydrological_modelling(): @@ -11,23 +11,23 @@ def test_hydrological_modelling(): model_config = { "precip": np.array([10, 11, 12, 13, 14, 15]), "temperature": np.array([10, 3, -5, 1, 15, 0]), - "Qobs": np.array([120, 130, 140, 150, 160, 170]), + "qobs": np.array([120, 130, 140, 150, 160, 170]), "drainage_area": np.array([10]), "model_name": "Dummy", "parameters": np.array([5, 5, 5]), } - Qsim = hydrological_model_selector(model_config) - assert Qsim[3] == 3500.00 + qsim = run_hydrological_model(model_config) + assert qsim["qsim"].values[3] == 3500.00 # Test the exceptions for new models model_config.update(model_name="ADD_OTHER_HERE") - Qsim = hydrological_model_selector(model_config) - assert Qsim == 0 + qsim = run_hydrological_model(model_config) + assert qsim == 0 def test_import_unknown_model(): """Test for unknown model""" with pytest.raises(NotImplementedError) as pytest_wrapped_e: model_config = {"model_name": "fake_model"} - _ = hydrological_model_selector(model_config) + _ = run_hydrological_model(model_config) assert pytest_wrapped_e.type == NotImplementedError diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index f1d470f0..56ffd1f4 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -10,150 +10,150 @@ def test_obj_funcs(): """Series of tests to test all objective functions with fast test data""" - Qobs = np.array([120, 130, 140, 150, 160, 170]) - Qsim = np.array([120, 125, 145, 140, 140, 180]) + qobs = np.array([120, 130, 140, 150, 160, 170]) + qsim = np.array([120, 125, 145, 140, 140, 180]) # Test that the objective function is calculated correctly - objfun = get_objective_function(Qobs, Qsim, obj_func="abs_bias") + objfun = get_objective_function(qobs, qsim, obj_func="abs_bias") np.testing.assert_array_almost_equal(objfun, 3.3333333333333335, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="abs_pbias") + objfun = get_objective_function(qobs, qsim, obj_func="abs_pbias") np.testing.assert_array_almost_equal(objfun, 2.2988505747126435, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="abs_volume_error") + objfun = get_objective_function(qobs, qsim, obj_func="abs_volume_error") np.testing.assert_array_almost_equal(objfun, 0.022988505747126436, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="agreement_index") + objfun = get_objective_function(qobs, qsim, obj_func="agreement_index") np.testing.assert_array_almost_equal(objfun, 0.9171974522292994, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="bias") + objfun = get_objective_function(qobs, qsim, obj_func="bias") np.testing.assert_array_almost_equal(objfun, -3.3333333333333335, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="correlation_coeff") + objfun = get_objective_function(qobs, qsim, obj_func="correlation_coeff") np.testing.assert_array_almost_equal(objfun, 0.8599102447336393, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="kge") + objfun = get_objective_function(qobs, qsim, obj_func="kge") np.testing.assert_array_almost_equal(objfun, 0.8077187696552522, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="kge_mod") + objfun = get_objective_function(qobs, qsim, obj_func="kge_mod") np.testing.assert_array_almost_equal(objfun, 0.7888769531580001, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="mae") + objfun = get_objective_function(qobs, qsim, obj_func="mae") np.testing.assert_array_almost_equal(objfun, 8.333333333333334, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="mare") + objfun = get_objective_function(qobs, qsim, obj_func="mare") np.testing.assert_array_almost_equal(objfun, 0.05747126436781609, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="mse") + objfun = get_objective_function(qobs, qsim, obj_func="mse") np.testing.assert_array_almost_equal(objfun, 108.33333333333333, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="nse") + objfun = get_objective_function(qobs, qsim, obj_func="nse") np.testing.assert_array_almost_equal(objfun, 0.6285714285714286, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="pbias") + objfun = get_objective_function(qobs, qsim, obj_func="pbias") np.testing.assert_array_almost_equal(objfun, -2.2988505747126435, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="r2") + objfun = get_objective_function(qobs, qsim, obj_func="r2") np.testing.assert_array_almost_equal(objfun, 0.7394456289978675, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="rmse") + objfun = get_objective_function(qobs, qsim, obj_func="rmse") np.testing.assert_array_almost_equal(objfun, 10.408329997330663, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="rrmse") + objfun = get_objective_function(qobs, qsim, obj_func="rrmse") np.testing.assert_array_almost_equal(objfun, 0.07178158618848733, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="rsr") + objfun = get_objective_function(qobs, qsim, obj_func="rsr") np.testing.assert_array_almost_equal(objfun, 0.6094494002200439, 8) - objfun = get_objective_function(Qobs, Qsim, obj_func="volume_error") + objfun = get_objective_function(qobs, qsim, obj_func="volume_error") np.testing.assert_array_almost_equal(objfun, -0.022988505747126436, 8) def test_objective_function_failure_data_length(): """Test for the objective function calculation failure mode: - Qobs and Qsim length are different + qobs and qsim length are different """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objective_function( np.array([100, 110]), np.array([100, 110, 120]), obj_func="mae", ) - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_objective_function_failure_mask_length(): """Test for the objective function calculation failure mode: - Qobs and mask length are different + qobs and mask length are different """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objective_function( np.array([100, 100, 100]), np.array([100, 110, 120]), obj_func="mae", mask=np.array([0, 1, 0, 0]), ) - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_objective_function_failure_unknown_objfun(): """Test for the objective function calculation failure mode: Objective function is unknown """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objective_function( np.array([100, 100, 100]), np.array([100, 110, 120]), obj_func="fake", ) - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_objective_function_failure_mask_contents(): """Test for the objective function calculation failure mode: Mask contains other than 0 and 1 """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objective_function( np.array([100, 100, 100]), np.array([100, 110, 120]), obj_func="mae", mask=np.array([0, 0.5, 1]), ) - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_maximizer_objfun_failure_modes_bias(): """Test for maximize-minimize failure mode: Use of bias objfun which is unbounded """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objfun_minimize_or_maximize(obj_func="bias") - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_maximizer_objfun_failure_modes_pbias(): """Test for maximize-minimize failure mode: Use of pbias objfun which is unbounded """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objfun_minimize_or_maximize(obj_func="pbias") - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_maximizer_objfun_failure_modes_volume_error(): """Test for maximize-minimize failure mode: Use of volume_error objfun which is unbounded """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: _ = get_objfun_minimize_or_maximize(obj_func="volume_error") - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == ValueError def test_maximizer_objfun_failure_modes_unknown_metric(): """Test for maximize-minimize failure mode: Use of unknown objfun """ - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(NotImplementedError) as pytest_wrapped_e: _ = get_objfun_minimize_or_maximize(obj_func="unknown_of") - assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.type == NotImplementedError diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 4af21f6b..312aa8b3 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -60,48 +60,42 @@ # Import packages import numpy as np import spotpy +import xarray as xr from spotpy import analyser from spotpy.parameter import Uniform -from xhydro.modelling.hydrological_modelling import hydrological_model_selector +from xhydro.modelling.hydrological_modelling import run_hydrological_model from xhydro.modelling.obj_funcs import ( get_objective_function, get_objfun_minimize_or_maximize, get_optimizer_minimize_or_maximize, ) +__all__ = ["perform_calibration"] -class spot_setup: - """Create the spotpy calibration system that is used for hydrological model calibration.""" - def __init__( - self, - model_config, - bounds_high, - bounds_low, - obj_func=None, - take_negative=False, - mask=None, - transform=None, - epsilon=None, - ): - """ - Initialize the spot_setup object. +class SpotSetup: + """Create the spotpy calibration system that is used for hydrological model calibration. - The initialization of the spot_setup object includes a generic - "model_config" object containing hydrological modelling data required, - low and high parameter bounds, as well as an objective function. - Depending on the objective function, either spotpy or hydroeval will - compute the value, since some functions are found only in one package. + Parameters + ---------- + model_config : dict + The model configuration object that contains all info to run the model. + The model function called to run this model should always use this object and read-in data it requires. + It will be up to the user to provide the data that the model requires. + bounds_high : np.array + High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + bounds_low : np.array + Low bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + obj_func : str + The objective function used for calibrating. Can be any one of these: - Notes - ----- - Accepted obj_func values: - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric - "agreement_index": Index of agreement - - "bias" : Bias metric - "correlation_coeff": Correlation coefficient - "kge" : Kling Gupta Efficiency metric (2009 version) - "kge_mod" : Kling Gupta Efficiency metric (2012 version) @@ -109,12 +103,99 @@ def __init__( - "mare": Mean Absolute Relative Error metric - "mse" : Mean Square Error metric - "nse": Nash-Sutcliffe Efficiency metric - - "pbias" : Percent bias (relative bias) - "r2" : r-squared, i.e. square of correlation_coeff. - "rmse" : Root Mean Square Error - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - - "volume_error": Total volume error over the period. + + take_negative : bool + Inidactor to take the negative of the objective function value in optimization to ensure convergence + in the right direction. + mask : np.array + A vector indicating which values to preserve/remove from the objective function computation. 0=remove, 1=preserve. + transform : str + The method to transform streamflow prior to computing the objective function. Can be one of: + Square root ('sqrt'), inverse ('inv'), or logarithmic ('log') transformation. + epsilon : float + Used to add a small delta to observations for log and inverse transforms, to eliminate errors + caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow + times this value of epsilon. + + Returns + ------- + SpotSetup object for calibration + """ + + def __init__( + self, + model_config: dict, + bounds_high: np.array, + bounds_low: np.array, + obj_func: str = None, + take_negative: bool = False, + mask: np.array = None, + transform: str = None, + epsilon: float = None, + ): + """ + Initialize the SpotSetup object. + + The initialization of the SpotSetup object includes a generic + "model_config" object containing hydrological modelling data required, + low and high parameter bounds, as well as an objective function. + Depending on the objective function, either spotpy or hydroeval will + compute the value, since some functions are found only in one package. + + Parameters + ---------- + model_config : dict + The model configuration object that contains all info to run the model. + The model function called to run this model should always use this object and read-in data it requires. + It will be up to the user to provide the data that the model requires. + obj_func : str + The objective function used for calibrating. Can be any one of these: + + - "abs_bias" : Absolute value of the "bias" metric + - "abs_pbias": Absolute value of the "pbias" metric + - "abs_volume_error" : Absolute value of the volume_error metric + - "agreement_index": Index of agreement + - "correlation_coeff": Correlation coefficient + - "kge" : Kling Gupta Efficiency metric (2009 version) + - "kge_mod" : Kling Gupta Efficiency metric (2012 version) + - "mae": Mean Absolute Error metric + - "mare": Mean Absolute Relative Error metric + - "mse" : Mean Square Error metric + - "nse": Nash-Sutcliffe Efficiency metric + - "r2" : r-squared, i.e. square of correlation_coeff. + - "rmse" : Root Mean Square Error + - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) + - "rsr" : Ratio of RMSE to standard deviation. + bounds_high : np.array + High bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + bounds_low : np.array + Low bounds for the model parameters to be calibrated. Spotpy will sample parameter sets from + within these bounds. The size must be equal to the number of parameters to calibrate. + evaluations : int + Maximum number of model evaluations (calibration budget) to perform before stopping the calibration process. + algorithm : str + The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but more can be easily added. + take_negative : bool + Inidactor to take the negative of the objective function value in optimization to ensure convergence + in the right direction. + mask : np.array + A vector indicating which values to preserve/remove from the objective function computation. 0=remove, 1=preserve. + transform : str + The method to transform streamflow prior to computing the objective function. Can be one of: + Square root ('sqrt'), inverse ('inv'), or logarithmic ('log') transformation. + epsilon : float + Used to add a small delta to observations for log and inverse transforms, to eliminate errors + caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow + times this value of epsilon. + + Returns + ------- + SpotSetup object for calibration. """ # Gather the model_config dictionary and obj_func string, and other # optional arguments. @@ -138,12 +219,25 @@ def simulation(self, x): given bounds and generates the simulation results. We add the parameter "x" that is generated by spotpy to the model_config object at the reserved keyword "parameters". + + Parameters + ---------- + x : array_like + Tested parameter set. + + Returns + ------- + array_like + Simulated streamflow. """ self.model_config.update({"parameters": x}) - # Run the model and return Qsim, with model_config containing the + # Run the model and return qsim, with model_config containing the # tested parameter set. - return hydrological_model_selector(self.model_config) + qsim = run_hydrological_model(self.model_config) + + # Return the array of values from qsim for the objective function eval. + return qsim["qsim"].values def evaluation(self): """Evaluation function for spotpy. @@ -157,8 +251,17 @@ def evaluation(self): package supposes that Qobs and Qsim have the same length, but this can be changed in the model_config parameterization and adding conditions here. + + Returns + ------- + array_like + Observed streamflow. """ - return self.model_config["Qobs"] + qobs = self.model_config["qobs"] + if isinstance(qobs, xr.Dataset): + qobs = qobs["qobs"] + + return qobs def objectivefunction( self, @@ -170,12 +273,17 @@ def objectivefunction( This function is where spotpy computes the objective function. - Notes - ----- - The inputs are: - - model_config object (dict type) with all required information, - - simulation, observation : vectors of streamflow used to compute - the objective function + Parameters + ---------- + simulation : array_like + Vector of simulated streamflow. + evaluation : array_like + Vector of observed streamflow to compute objective function. + + Returns + ------- + float + The value of the objective function. """ obj_fun_val = get_objective_function( evaluation, @@ -252,6 +360,15 @@ def perform_calibration( Used to add a small delta to observations for log and inverse transforms, to eliminate errors caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow times this value of epsilon. + + Returns + ------- + best_parameters : array_like + The optimized parameter set. + qsim : xr.Dataset + Simulated streamflow using the optimized parameter set. + bestobjf : float + The best objective function value. """ # Get objective function and algo optimal convregence direction. Necessary # to ensure that the algorithm is optimizing in the correct direction @@ -267,7 +384,7 @@ def perform_calibration( take_negative = False # Set up the spotpy object to prepare the calibration - spotpy_setup = spot_setup( + spotpy_setup = SpotSetup( model_config, bounds_high=bounds_high, bounds_low=bounds_low, @@ -316,7 +433,7 @@ def perform_calibration( model_config.update({"parameters": best_parameters}) # ... which can be used to run the hydrological model and get the best Qsim. - Qsim = hydrological_model_selector(model_config) + qsim = run_hydrological_model(model_config) - # Return the best parameters, Qsim and best objective function value. - return best_parameters, Qsim, bestobjf + # Return the best parameters, qsim and best objective function value. + return best_parameters, qsim, bestobjf diff --git a/xhydro/modelling/hydrological_modelling.py b/xhydro/modelling/hydrological_modelling.py index 163cb178..4aefad5e 100644 --- a/xhydro/modelling/hydrological_modelling.py +++ b/xhydro/modelling/hydrological_modelling.py @@ -34,44 +34,88 @@ """ import numpy as np +import pandas as pd +import xarray as xr +__all__ = ["run_hydrological_model"] -def hydrological_model_selector(model_config): + +def run_hydrological_model(model_config: dict): """Hydrological model selector. This is the main hydrological model selector. This is the code that looks at the "model_config["model_name"]" keyword and calls the appropriate hydrological model function. + + Parameters + ---------- + model_config : dict + The model configuration object that contains all info to run the model. + The model function called to run this model shouls always use this object + and read-in data it requires. It will be up to the user to provide the + data that the model requires. + + Returns + ------- + xr.Dataset + Simulated streamflow from the model, in xarray Dataset format. """ if model_config["model_name"] == "Dummy": - Qsim = dummy_model(model_config) + qsim = dummy_model(model_config) elif model_config["model_name"] == "ADD_OTHER_HERE": # ADD OTHER MODELS HERE - Qsim = 0 + qsim = 0 else: - raise NotImplementedError() + raise NotImplementedError( + f"The model '{model_config['model_name']}' is not recognized." + ) - return Qsim + return qsim -def dummy_model(model_config): +def dummy_model(model_config: dict): """Dummy model. Dummy model to show the implementation we should be aiming for. Each model will have its own required data that users can pass. + + Parameters + ---------- + model_config : dict + The model configuration object that contains all info to run the model. + The model function called to run this model shouls always use this object + and read-in data it requires. It will be up to the user to provide the + data that the model requires. + + Returns + ------- + xr.Dataset + Simulated streamflow from the model, in xarray Dataset format. """ # Parse the model_config object to extract required information precip = model_config["precip"] temperature = model_config["temperature"] area = model_config["drainage_area"] - X = model_config["parameters"] + x = model_config["parameters"] # Run the dummy model using these data. Keeping it simple to calculate by # hand to ensure the calibration algorithm is working correctly and data # are handled correctly - Qsim = np.empty(len(precip)) + qsim = np.empty(len(precip)) for t in range(0, len(precip)): - Qsim[t] = (precip[t] * X[0] + abs(temperature[t]) * X[1]) * X[2] * area - - return Qsim + qsim[t] = (precip[t] * x[0] + abs(temperature[t]) * x[1]) * x[2] * area + + # For this model, we can convert to xr.dataset by supposing dates. Other + # models will require some dates in some form (to add to QC checks) in their + # inputs. + time = pd.date_range("2024-01-01", periods=len(precip)) + qsim = xr.Dataset( + data_vars=dict( + qsim=(["time"], qsim), + ), + coords=dict(time=time), + attrs=dict(description="streamflow simulated by the Dummy Model"), + ) + + return qsim diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index ca51a33a..989528fe 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -1,6 +1,6 @@ # Created on Tue Dec 12 21:55:25 2023 # @author: Richard Arsenault -"""Objective function package for xhydro, for calibration and model evaluation +"""Objective function package for xhydro, for calibration and model evaluation. This package provides a flexible suite of popular objective function metrics in hydrological modelling and hydrological model calibration. The main function @@ -21,44 +21,45 @@ algorithm. """ -import sys - # Import packages import numpy as np +import xarray as xr + +__all__ = ["get_objective_function"] def get_objective_function( - Qobs, - Qsim, - obj_func="rmse", - take_negative=False, - mask=None, - transform=None, - epsilon=None, + qobs: np.array, + qsim: np.array, + obj_func: str = "rmse", + take_negative: bool = False, + mask: np.array = None, + transform: str = None, + epsilon: float = None, ): - """ - Entrypoint function for the objective function calculation. More can be - added by adding the function to this file and adding the option in this - function. + """Entrypoint function for the objective function calculation. + + More can be added by adding the function to this file and adding the option + in this function. NOTE: All data corresponding to NaN values in the observation set are removed from the calculation. If a mask is passed, it must be the - same size as the Qsim and Qobs vectors. If any NaNs are present in - the Qobs dataset, all corresponding data in the Qobs, Qsim and mask + same size as the qsim and qobs vectors. If any NaNs are present in + the qobs dataset, all corresponding data in the qobs, qsim and mask will be removed prior to passing to the processing function. Parameters ---------- - Qobs: numpy array of size n + qobs : array_like Vector containing the Observed streamflow to be used in the objective function calculation. It is the target to attain. - Qsim: numpy array of size n + qsim : array_like Vector containing the Simulated streamflow as generated by the hydro- logical model. It is modified by changing parameters and resumulating the hydrological model. - obj_func : string, optional + obj_func : str String representing the objective function to use in the calibration. Options must be one of the accepted objective functions: @@ -83,26 +84,25 @@ def get_objective_function( The default is 'rmse'. - take_negative : Boolean used to force the objective function to be - multiplied by minus one (-1) such that it is possible to maximize it - if the optimizer is a minimizer and vice-versa. Should always be set - to False unless required by an optimization setup, which is handled - internally and transparently to the user. - - The default is False. + take_negative : bool + Used to force the objective function to be multiplied by minus one (-1) + such that it is possible to maximize it if the optimizer is a minimizer + and vice-versa. Should always be set to False unless required by an + optimization setup, which is handled internally and transparently to + the user. The default is False. - mask : numpy array (vector) of size n, optional - array of 0 or 1 on which the objective function should be applied. + mask : array_like + Array of 0 or 1 on which the objective function should be applied. Values of 1 indicate that the value is included in the calculation, and values of 0 indicate that the value is excluded and will have no impact on the objective function calculation. This can be useful for specific optimization strategies such as odd/even year calibration, seasonal - calibration or calibration based on high/low flows. + calibration or calibration based on high/low flows. The default is None + and all data are preserved. - The default is None, and all data are preserved. - - transform : string indicating the type of transformation required. Can be - one of the following values: + transform : str + Indicates the type of transformation required. Can be one of the + following values: - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] @@ -110,22 +110,19 @@ def get_objective_function( The default value is "None", by which no transformation is performed. - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and - logarithmic transformation. The perturbation is equal to: + epsilon : float + Indicates the perturbation to add to the flow time series during a + transformation to avoid division by zero and logarithmic transformation. + The perturbation is equal to: - perturbation = epsilon * mean(Qobs) + perturbation = epsilon * mean(qobs) The default value is 0.01. - - NOTE: All data corresponding to NaN values in the observation set are - removed from the calculation - Returns ------- - obj_fun_val: scalar float representing the value of the selected objective - function (obj_fun). + float + Value of the selected objective function (obj_fun). """ # List of available objective functions obj_func_dict = { @@ -149,23 +146,30 @@ def get_objective_function( "volume_error": volume_error, } + # If we got a dataset, change to np.array + if isinstance(qsim, xr.Dataset): + qsim = qsim["qsim"] + + if isinstance(qobs, xr.Dataset): + qobs = qobs["qobs"] + # Basic error checking - if Qobs.shape[0] != Qsim.shape[0]: - sys.exit("Observed and Simulated flows are not of the same size.") + if qobs.shape[0] != qsim.shape[0]: + raise ValueError("Observed and Simulated flows are not of the same size.") # Check mask size and contents if mask is not None: # Size - if Qobs.shape[0] != mask.shape[0]: - sys.exit("Mask is not of the same size as the streamflow data.") + if qobs.shape[0] != mask.shape[0]: + raise ValueError("Mask is not of the same size as the streamflow data.") # All zero or one? if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0: - sys.exit("Mask contains values other 0 or 1. Please modify.") + raise ValueError("Mask contains values other 0 or 1. Please modify.") # Check that the objective function is in the list of available methods if obj_func not in obj_func_dict: - sys.exit( + raise ValueError( "Selected objective function is currently unavailable. " + "Consider contributing to our project at: " + "github.com/hydrologie/xhydro" @@ -173,24 +177,24 @@ def get_objective_function( # Ensure there are no NaNs in the dataset if mask is not None: - mask = mask[~np.isnan(Qobs)] - Qsim = Qsim[~np.isnan(Qobs)] - Qobs = Qobs[~np.isnan(Qobs)] + mask = mask[~np.isnan(qobs)] + qsim = qsim[~np.isnan(qobs)] + qobs = qobs[~np.isnan(qobs)] # Apply mask before trasform if mask is not None: - Qsim = Qsim[mask == 1] - Qobs = Qobs[mask == 1] + qsim = qsim[mask == 1] + qobs = qobs[mask == 1] mask = mask[mask == 1] # Transform data if needed if transform is not None: - Qsim, Qobs = transform_flows(Qsim, Qobs, transform, epsilon) + qsim, qobs = transform_flows(qsim, qobs, transform, epsilon) # Compute objective function by switching to the correct algorithm. Ensure # that the function name is the same as the obj_func tag or this will fail. function_call = obj_func_dict[obj_func] - obj_fun_val = function_call(Qsim, Qobs) + obj_fun_val = function_call(qsim, qobs) # Take the negative value of the Objective Function to return to the # optimizer. @@ -200,16 +204,22 @@ def get_objective_function( return obj_fun_val -def get_objfun_minimize_or_maximize(obj_func): - """ - Checks the name of the objective function and returns whether it should be - maximized or minimized. Returns a boolean value, where True means it should - be maximized, and Flase means that it should be minimized. Objective - functions other than those programmed here will raise an error. +def get_objfun_minimize_or_maximize(obj_func: str): + """Check whether the objective function needs to be maximized or minimized. + + Returns a boolean value, where True means it should be maximized, and False + means that it should be minimized. Objective functions other than those + programmed here will raise an error. - Inputs: - obj_func: string containing the label for the desired objective - function. + Parameters + ---------- + obj_func : str + Label of the desired objective function. + + Returns + ------- + bool + Indicator of if the objective function needs to be maximized. """ # Define metrics that need to be maximized: if obj_func in [ @@ -238,27 +248,33 @@ def get_objfun_minimize_or_maximize(obj_func): # Check for the metrics that exist but cannot be used for optimization elif obj_func in ["bias", "pbias", "volume_error"]: - sys.exit( + raise ValueError( "The bias, pbias and volume_error metrics cannot be minimized or maximized. \ Please use the abs_bias, abs_pbias and abs_volume_error instead." ) else: - sys.exit("The objective function is unknown.") + raise NotImplementedError("The objective function is unknown.") return maximize -def get_optimizer_minimize_or_maximize(algorithm): - """ - Finds the direction in which the optimizer searches. Some optimizers try to - maximize the objective function value, and others try to minimize it. Since - our objective functions include some that need to be maximized and others - minimized, it is imperative to ensure that the optimizer/objective-function - pair work in tandem. +def get_optimizer_minimize_or_maximize(algorithm: str): + """Find the direction in which the optimizer searches. + Some optimizers try to maximize the objective function value, and others + try to minimize it. Since our objective functions include some that need to + be maximized and others minimized, it is imperative to ensure that the + optimizer/objective-function pair work in tandem. + + Parameters + ---------- + algorithm : str + Name of the optimizing algorithm. - Inputs: - algorithm: string containing the direction of the optimizer search + Returns + ------- + bool + Indicator of the direction of the optimizer search, True to maximize. """ # Define metrics that need to be maximized: if algorithm in [ @@ -274,12 +290,14 @@ def get_optimizer_minimize_or_maximize(algorithm): # Any other optimizer at this date else: - sys.exit("The optimization algorithm is unknown.") + raise NotImplementedError("The optimization algorithm is unknown.") return maximize -def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): +def transform_flows( + qsim: np.array, qobs: np.array, transform: str = None, epsilon: float = 0.01 +): """ Transform flows before computing the objective function. @@ -289,12 +307,14 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. - Qobs : Observed streamflow vector (numpy array) - - transform : string indicating the type of transformation required. Can be - one of the following values: + transform : str + Indicates the type of transformation required. Can be one of the + following values: - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] @@ -302,38 +322,41 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): The default value is "None", by which no transformation is performed. - epsilon: scalar float indicating the perturbation to add to the flow - time series during a transformation to avoid division by zero and - logarithmic transformation. The perturbation is equal to: + epsilon : float + Indicates the perturbation to add to the flow time series during a + transformation to avoid division by zero and logarithmic transformation. + The perturbation is equal to: - perturbation = epsilon * mean(Qobs) + perturbation = epsilon * mean(qobs) The default value is 0.01. Returns ------- - Qsim, Qobs transformed according to the transformation function requested - by the user in "transform". Qsim and Qobs are numpy arrays. + qsim : array_like + Transformed simulated flow according to user request. + qobs : array_like + Transformed observed flow according to user request. """ # Quick check if transform is not None: if transform not in ["log", "inv", "sqrt"]: - sys.exit("Flow transformation method not recognized.") + raise NotImplementedError("Flow transformation method not recognized.") # Transform the flow series if required if transform == "log": # log transformation - epsilon = epsilon * np.nanmean(Qobs) - Qobs, Qsim = np.log(Qobs + epsilon), np.log(Qsim + epsilon) + epsilon = epsilon * np.nanmean(qobs) + qobs, qsim = np.log(qobs + epsilon), np.log(qsim + epsilon) elif transform == "inv": # inverse transformation - epsilon = epsilon * np.nanmean(Qobs) - Qobs, Qsim = 1.0 / (Qobs + epsilon), 1.0 / (Qsim + epsilon) + epsilon = epsilon * np.nanmean(qobs) + qobs, qsim = 1.0 / (qobs + epsilon), 1.0 / (qsim + epsilon) elif transform == "sqrt": # square root transformation - Qobs, Qsim = np.sqrt(Qobs), np.sqrt(Qsim) + qobs, qsim = np.sqrt(qobs), np.sqrt(qsim) # Return the flows after transformation (or original if no transform) - return Qsim, Qobs + return qsim, qobs """ @@ -341,163 +364,195 @@ def transform_flows(Qsim, Qobs, transform=None, epsilon=0.01): """ -def abs_bias(Qsim, Qobs): +def abs_bias(qsim: np.array, qobs: np.array): """ - absolute bias metric + Absolute bias metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - abs_bias: positive scalar float representing the absolute value of the - "bias" metric. This metric is useful when calibrating on the bias, because - bias should aim to be 0 but can take large positive or negative values. - Taking the absolute value of the bias will let the optimizer minimize - the value to zero. - + float + Absolute value of the "bias" metric. This metric is useful when calibrating + on the bias, because bias should aim to be 0 but can take large positive + or negative values. Taking the absolute value of the bias will let the + optimizer minimize the value to zero. + + Notes + ----- The abs_bias should be MINIMIZED. """ - return np.abs(bias(Qsim, Qobs)) + return np.abs(bias(qsim, qobs)) -def abs_pbias(Qsim, Qobs): +def abs_pbias(qsim: np.array, qobs: np.array): """ - absolute pbias metric + Absolute pbias metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - abs_pbias: positive scalar float representing the absolute value of the - "pbias" metric. This metric is useful when calibrating on the pbias, - because pbias should aim to be 0 but can take large positive or negative - values. Taking the absolute value of the pbias will let the optimizer - minimize the value to zero. - + float + The absolute value of the "pbias" metric. This metric is useful when + calibrating on the pbias, because pbias should aim to be 0 but can take + large positive or negative values. Taking the absolute value of the + pbias will let the optimizer minimize the value to zero. + + Notes + ----- The abs_pbias should be MINIMIZED. """ - return np.abs(pbias(Qsim, Qobs)) + return np.abs(pbias(qsim, qobs)) -def abs_volume_error(Qsim, Qobs): +def abs_volume_error(qsim: np.array, qobs: np.array): """ - absolute value of the volume error metric + Absolute value of the volume error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - abs_volume_error: positive scalar float representing the absolute value of - the "volume_error" metric. This metric is useful when calibrating on the - volume_error, because volume_error should aim to be 0 but can take large - positive or negative values. Taking the absolute value of the volume_error - will let the optimizer minimize the value to zero. - + float + The absolute value of the "volume_error" metric. This metric is useful + when calibrating on the volume_error, because volume_error should aim + to be 0 but can take large positive or negative values. Taking the + absolute value of the volume_error will let the optimizer minimize the + value to zero. + + Notes + ----- The abs_volume_error should be MINIMIZED. """ - return np.abs(volume_error(Qsim, Qobs)) + return np.abs(volume_error(qsim, qobs)) -def agreement_index(Qsim, Qobs): +def agreement_index(qsim: np.array, qobs: np.array): """ - index of agreement metric + Index of agreement metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - agreement_index: scalar float representing the agreement index of Willmott - (1981). Varies between 0 and 1. + float + The agreement index of Willmott (1981). Varies between 0 and 1. + Notes + ----- The Agreement index should be MAXIMIZED. """ # Decompose into clearer chunks - a = np.sum((Qobs - Qsim) ** 2) - b = np.abs(Qsim - np.mean(Qobs)) + np.abs(Qobs - np.mean(Qobs)) + a = np.sum((qobs - qsim) ** 2) + b = np.abs(qsim - np.mean(qobs)) + np.abs(qobs - np.mean(qobs)) c = np.sum(b**2) return 1 - (a / c) -def bias(Qsim, Qobs): +def bias(qsim: np.array, qobs: np.array): """ - bias metric + The bias metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - bias: scalar float representing the bias in the simulation. Can be negative - or positive and gives the average error between the observed and simulated - flows. This interpretation uses the definition that a positive bias value - means that the simulation overestimates the true value (as opposed to many - other sources on bias calculations that use the contrary interpretation). - + float + The bias in the simulation. Can be negative or positive and gives the + average error between the observed and simulated flows. This + interpretation uses the definition that a positive bias value means + that the simulation overestimates the true value (as opposed to many + other sources on bias calculations that use the contrary interpretation). + + Notes + ----- BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE. """ - return np.mean(Qsim - Qobs) + return np.mean(qsim - qobs) -def correlation_coeff(Qsim, Qobs): +def correlation_coeff(qsim: np.array, qobs: np.array): """ - correlation coefficient metric + Correlation coefficient metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - correlation_coeff: scalar float representing the correlation coefficient. + float + The correlation coefficient. + Notes + ----- The correlation_coeff should be MAXIMIZED. """ - return np.corrcoef(Qobs, Qsim)[0, 1] + return np.corrcoef(qobs, qsim)[0, 1] -def kge(Qsim, Qobs): +def kge(qsim: np.array, qobs: np.array): """ - Kling-Gupta efficiency metric (2009 version) + Kling-Gupta efficiency metric (2009 version). Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - kge: scalar float representing the Kling-Gupta Efficiency (KGE) metric of - 2009. It can take values from -inf to 1 (best case). + float + The Kling-Gupta Efficiency (KGE) metric of 2009. It can take values + from -inf to 1 (best case). + Notes + ----- The KGE should be MAXIMIZED. """ # This pops up a lot, precalculate. - Qsim_mean = np.mean(Qsim) - Qobs_mean = np.mean(Qobs) + Qsim_mean = np.mean(qsim) + qobs_mean = np.mean(qobs) # Calculate the components of KGE - r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) + r_num = np.sum((qsim - Qsim_mean) * (qobs - qobs_mean)) + r_den = np.sqrt(np.sum((qsim - Qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den - a = np.std(Qsim) / np.std(Qobs) - b = np.sum(Qsim) / np.sum(Qobs) + a = np.std(qsim) / np.std(qobs) + b = np.sum(qsim) / np.sum(qobs) # Calculate the KGE kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2) @@ -505,32 +560,37 @@ def kge(Qsim, Qobs): return kge -def kge_mod(Qsim, Qobs): +def kge_mod(qsim: np.array, qobs: np.array): """ - Kling-Gupta efficiency metric (2012 version) + Kling-Gupta efficiency metric (2012 version). Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - kge_mod: scalar float representing the modified Kling-Gupta Efficiency - (KGE) metric of 2012. It can take values from -inf to 1 (best case). + float + The modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take + values from -inf to 1 (best case). + Notes + ----- The kge_mod should be MAXIMIZED. """ # These pop up a lot, precalculate - Qsim_mean = np.mean(Qsim) - Qobs_mean = np.mean(Qobs) + Qsim_mean = np.mean(qsim) + qobs_mean = np.mean(qobs) # Calc KGE components - r_num = np.sum((Qsim - Qsim_mean) * (Qobs - Qobs_mean)) - r_den = np.sqrt(np.sum((Qsim - Qsim_mean) ** 2) * np.sum((Qobs - Qobs_mean) ** 2)) + r_num = np.sum((qsim - Qsim_mean) * (qobs - qobs_mean)) + r_den = np.sqrt(np.sum((qsim - Qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den - g = (np.std(Qsim) / Qsim_mean) / (np.std(Qobs) / Qobs_mean) - b = np.mean(Qsim) / np.mean(Qobs) + g = (np.std(qsim) / Qsim_mean) / (np.std(qobs) / qobs_mean) + b = np.mean(qsim) / np.mean(qobs) # Calc the modified KGE metric kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2) @@ -538,212 +598,257 @@ def kge_mod(Qsim, Qobs): return kge_mod -def mae(Qsim, Qobs): +def mae(qsim: np.array, qobs: np.array): """ - mean absolute error metric + Mean absolute error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - mae: scalar float representing the Mean Absolute Error. It can be - interpreted as the average error (absolute) between observations and - simulations for any time step. + float + Mean Absolute Error. It can be interpreted as the average error + (absolute) between observations and simulations for any time step. + Notes + ----- The mae should be MINIMIZED. """ - return np.mean(np.abs(Qsim - Qobs)) + return np.mean(np.abs(qsim - qobs)) -def mare(Qsim, Qobs): +def mare(qsim: np.array, qobs: np.array): """ - mean absolute relative error metric + Mean absolute relative error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - mare: scalar float representing the Mean Absolute Relative Error. For - streamflow, where Qobs is always zero or positive, the MARE is always - positive. + float + Mean Absolute Relative Error. For streamflow, where qobs is always zero + or positive, the MARE is always positive. + Notes + ----- The mare should be MINIMIZED. """ - return np.sum(np.abs(Qobs - Qsim)) / np.sum(Qobs) + return np.sum(np.abs(qobs - qsim)) / np.sum(qobs) -def mse(Qsim, Qobs): +def mse(qsim: np.array, qobs: np.array): """ - mean square error metric + Mean square error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - mse: scalar float representing the Mean Square Error. It is the sum of - squared errors for each day divided by the total number of days. Units are - thus squared units, and the best possible value is 0. + float + Mean Square Error. It is the sum of squared errors for each day divided + by the total number of days. Units are thus squared units, and the best + possible value is 0. + Notes + ----- The mse should be MINIMIZED. """ - return np.mean((Qobs - Qsim) ** 2) + return np.mean((qobs - qsim) ** 2) -def nse(Qsim, Qobs): +def nse(qsim: np.array, qobs: np.array): """ - Nash-Sutcliffe efficiency metric + Nash-Sutcliffe efficiency metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - nse: scalar float representing the Nash-Sutcliffe Efficiency (NSE) metric. - It can take values from -inf to 1, with 0 being as good as using the mean - observed flow as the estimator. + float + Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to + 1, with 0 being as good as using the mean observed flow as the estimator. + Notes + ----- The nse should be MAXIMIZED. """ - num = np.sum((Qobs - Qsim) ** 2) - den = np.sum((Qobs - np.mean(Qobs)) ** 2) + num = np.sum((qobs - qsim) ** 2) + den = np.sum((qobs - np.mean(qobs)) ** 2) return 1 - (num / den) -def pbias(Qsim, Qobs): +def pbias(qsim: np.array, qobs: np.array): """ - percent bias metric + Percent bias metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - pbias: scalar float representing the Percent bias. Can be negative or - positive and gives the average relative error between the observed and - simulated flows. This interpretation uses the definition that a positive - bias value means that the simulation overestimates the true value (as - opposed to many other sources on bias calculations that use the contrary - interpretation). - + float + Percent bias. Can be negative or positive and gives the average + relative error between the observed and simulated flows. This + interpretation uses the definition that a positive bias value means + that the simulation overestimates the true value (as opposed to many + other sources on bias calculations that use the contrary interpretation). + + Notes + ----- PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE. """ - return (np.sum(Qsim - Qobs) / np.sum(Qobs)) * 100 + return (np.sum(qsim - qobs) / np.sum(qobs)) * 100 -def r2(Qsim, Qobs): +def r2(qsim: np.array, qobs: np.array): """ - r-squred metric + The r-squred metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - r2: scalar float representing the r-squared (R2) metric equal to the square - of the correlation coefficient. + float + The r-squared (R2) metric equal to the square of the correlation + coefficient. + Notes + ----- The r2 should be MAXIMIZED. """ - return correlation_coeff(Qsim, Qobs) ** 2 + return correlation_coeff(qsim, qobs) ** 2 -def rmse(Qsim, Qobs): +def rmse(qsim: np.array, qobs: np.array): """ - root mean square error metric + Root mean square error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - rmse: scalar float representing the Root Mean Square Error. Units are the - same as the timeseries data (ex. m3/s). It can take zero or positive - values. + float + Root Mean Square Error. Units are the same as the timeseries data + (ex. m3/s). It can take zero or positive values. + Notes + ----- The rmse should be MINIMIZED. """ - return np.sqrt(np.mean((Qobs - Qsim) ** 2)) + return np.sqrt(np.mean((qobs - qsim) ** 2)) -def rrmse(Qsim, Qobs): +def rrmse(qsim: np.array, qobs: np.array): """ - relative root mean square error (ratio of rmse to mean) metric + Relative root mean square error (ratio of rmse to mean) metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - rrmse: scalar float representing the ratio of the RMSE to the mean of the - observations. It allows scaling RMSE values to compare results between - time series of different magnitudes (ex. flows from small and large - watersheds). Also known as the CVRMSE. - + float + Ratio of the RMSE to the mean of the observations. It allows scaling + RMSE values to compare results between time series of different + magnitudes (ex. flows from small and large watersheds). Also known as + the CVRMSE. + + Notes + ----- The rrmse should be MINIMIZED. """ - return rmse(Qsim, Qobs) / np.mean(Qobs) + return rmse(qsim, qobs) / np.mean(qobs) -def rsr(Qsim, Qobs): +def rsr(qsim: np.array, qobs: np.array): """ - ratio of root mean square error to standard deviation metric + Ratio of root mean square error to standard deviation metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - rsr: scalar float representing the Root Mean Square Error (RMSE) divided by - the standard deviation of the observations. Also known as the "Ratio of the - Root Mean Square Error to the Standard Deviation of Observations". + float + Root Mean Square Error (RMSE) divided by the standard deviation of the + observations. Also known as the "Ratio of the Root Mean Square Error to + the Standard Deviation of Observations". + Notes + ----- The rsr should be MINIMIZED. """ - return rmse(Qobs, Qsim) / np.std(Qobs) + return rmse(qobs, qsim) / np.std(qobs) -def volume_error(Qsim, Qobs): +def volume_error(qsim: np.array, qobs: np.array): """ - volume error metric + Volume error metric. Parameters ---------- - Qsim : Simulated streamflow vector (numpy array) - Qobs : Observed streamflow vector (numpy array) + qsim : array_like + Simulated streamflow vector. + qobs : array_like + Observed streamflow vector. Returns ------- - volume_error: scalar float representing the total error in terms of volume - over the entire period. Expressed in terms of the same units as input data, - so for flow rates it is important to multiply by the duration of the time- - step to obtain actual volumes. - + float + Total error in terms of volume over the entire period. Expressed in + terms of the same units as input data, so for flow rates it is + important to multiply by the duration of the time-step to obtain actual + volumes. + + Notes + ----- The volume_error should be MINIMIZED. """ - return np.sum(Qsim - Qobs) / np.sum(Qobs) + return np.sum(qsim - qobs) / np.sum(qobs) """ From aec3460ea6ea87ea0144478840b2b3c07299c31f Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Mon, 15 Jan 2024 21:22:32 -0500 Subject: [PATCH 24/29] Make functions local with underscore --- tests/test_calibration.py | 4 +- tests/test_objective_functions.py | 10 +-- xhydro/modelling/calibration.py | 8 +- xhydro/modelling/hydrological_modelling.py | 4 +- xhydro/modelling/obj_funcs.py | 88 +++++++++++----------- 5 files changed, 57 insertions(+), 57 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index e0e16ab7..6aa2a351 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -5,7 +5,7 @@ import pytest from xhydro.modelling.calibration import perform_calibration -from xhydro.modelling.hydrological_modelling import dummy_model +from xhydro.modelling.hydrological_modelling import _dummy_model from xhydro.modelling.obj_funcs import get_objective_function, transform_flows @@ -49,7 +49,7 @@ def test_spotpy_calibration(): # Test dummy model response model_config["parameters"] = [5, 5, 5] - qsim = dummy_model(model_config) + qsim = _dummy_model(model_config) assert qsim["qsim"].values[3] == 3500.00 # Also test to ensure SCEUA and take_minimize is required. diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index 56ffd1f4..8cad8512 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -3,8 +3,8 @@ import pytest from xhydro.modelling.obj_funcs import ( + _get_objfun_minimize_or_maximize, get_objective_function, - get_objfun_minimize_or_maximize, ) @@ -128,7 +128,7 @@ def test_maximizer_objfun_failure_modes_bias(): Use of bias objfun which is unbounded """ with pytest.raises(ValueError) as pytest_wrapped_e: - _ = get_objfun_minimize_or_maximize(obj_func="bias") + _ = _get_objfun_minimize_or_maximize(obj_func="bias") assert pytest_wrapped_e.type == ValueError @@ -137,7 +137,7 @@ def test_maximizer_objfun_failure_modes_pbias(): Use of pbias objfun which is unbounded """ with pytest.raises(ValueError) as pytest_wrapped_e: - _ = get_objfun_minimize_or_maximize(obj_func="pbias") + _ = _get_objfun_minimize_or_maximize(obj_func="pbias") assert pytest_wrapped_e.type == ValueError @@ -146,7 +146,7 @@ def test_maximizer_objfun_failure_modes_volume_error(): Use of volume_error objfun which is unbounded """ with pytest.raises(ValueError) as pytest_wrapped_e: - _ = get_objfun_minimize_or_maximize(obj_func="volume_error") + _ = _get_objfun_minimize_or_maximize(obj_func="volume_error") assert pytest_wrapped_e.type == ValueError @@ -155,5 +155,5 @@ def test_maximizer_objfun_failure_modes_unknown_metric(): Use of unknown objfun """ with pytest.raises(NotImplementedError) as pytest_wrapped_e: - _ = get_objfun_minimize_or_maximize(obj_func="unknown_of") + _ = _get_objfun_minimize_or_maximize(obj_func="unknown_of") assert pytest_wrapped_e.type == NotImplementedError diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 312aa8b3..f8d5a49b 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -66,9 +66,9 @@ from xhydro.modelling.hydrological_modelling import run_hydrological_model from xhydro.modelling.obj_funcs import ( + _get_objfun_minimize_or_maximize, + _get_optimizer_minimize_or_maximize, get_objective_function, - get_objfun_minimize_or_maximize, - get_optimizer_minimize_or_maximize, ) __all__ = ["perform_calibration"] @@ -374,8 +374,8 @@ def perform_calibration( # to ensure that the algorithm is optimizing in the correct direction # (maximizing or minimizing). This code determines the required direction # for the objective function and the working direction of the algorithm. - of_maximize = get_objfun_minimize_or_maximize(obj_func) - algo_maximize = get_optimizer_minimize_or_maximize(algorithm) + of_maximize = _get_objfun_minimize_or_maximize(obj_func) + algo_maximize = _get_optimizer_minimize_or_maximize(algorithm) # They are not working in the same direction. Take the negative of the OF. if of_maximize != algo_maximize: diff --git a/xhydro/modelling/hydrological_modelling.py b/xhydro/modelling/hydrological_modelling.py index 4aefad5e..be9af7d0 100644 --- a/xhydro/modelling/hydrological_modelling.py +++ b/xhydro/modelling/hydrological_modelling.py @@ -61,7 +61,7 @@ def run_hydrological_model(model_config: dict): Simulated streamflow from the model, in xarray Dataset format. """ if model_config["model_name"] == "Dummy": - qsim = dummy_model(model_config) + qsim = _dummy_model(model_config) elif model_config["model_name"] == "ADD_OTHER_HERE": # ADD OTHER MODELS HERE @@ -74,7 +74,7 @@ def run_hydrological_model(model_config: dict): return qsim -def dummy_model(model_config: dict): +def _dummy_model(model_config: dict): """Dummy model. Dummy model to show the implementation we should be aiming for. Each model diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 989528fe..8f03f823 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -126,24 +126,24 @@ def get_objective_function( """ # List of available objective functions obj_func_dict = { - "abs_bias": abs_bias, - "abs_pbias": abs_pbias, - "abs_volume_error": abs_volume_error, - "agreement_index": agreement_index, - "bias": bias, - "correlation_coeff": correlation_coeff, - "kge": kge, - "kge_mod": kge_mod, - "mae": mae, - "mare": mare, - "mse": mse, - "nse": nse, - "pbias": pbias, - "r2": r2, - "rmse": rmse, - "rrmse": rrmse, - "rsr": rsr, - "volume_error": volume_error, + "abs_bias": _abs_bias, + "abs_pbias": _abs_pbias, + "abs_volume_error": _abs_volume_error, + "agreement_index": _agreement_index, + "bias": _bias, + "correlation_coeff": _correlation_coeff, + "kge": _kge, + "kge_mod": _kge_mod, + "mae": _mae, + "mare": _mare, + "mse": _mse, + "nse": _nse, + "pbias": _pbias, + "r2": _r2, + "rmse": _rmse, + "rrmse": _rrmse, + "rsr": _rsr, + "volume_error": _volume_error, } # If we got a dataset, change to np.array @@ -204,7 +204,7 @@ def get_objective_function( return obj_fun_val -def get_objfun_minimize_or_maximize(obj_func: str): +def _get_objfun_minimize_or_maximize(obj_func: str): """Check whether the objective function needs to be maximized or minimized. Returns a boolean value, where True means it should be maximized, and False @@ -258,7 +258,7 @@ def get_objfun_minimize_or_maximize(obj_func: str): return maximize -def get_optimizer_minimize_or_maximize(algorithm: str): +def _get_optimizer_minimize_or_maximize(algorithm: str): """Find the direction in which the optimizer searches. Some optimizers try to maximize the objective function value, and others @@ -364,7 +364,7 @@ def transform_flows( """ -def abs_bias(qsim: np.array, qobs: np.array): +def _abs_bias(qsim: np.array, qobs: np.array): """ Absolute bias metric. @@ -387,10 +387,10 @@ def abs_bias(qsim: np.array, qobs: np.array): ----- The abs_bias should be MINIMIZED. """ - return np.abs(bias(qsim, qobs)) + return np.abs(_bias(qsim, qobs)) -def abs_pbias(qsim: np.array, qobs: np.array): +def _abs_pbias(qsim: np.array, qobs: np.array): """ Absolute pbias metric. @@ -413,10 +413,10 @@ def abs_pbias(qsim: np.array, qobs: np.array): ----- The abs_pbias should be MINIMIZED. """ - return np.abs(pbias(qsim, qobs)) + return np.abs(_pbias(qsim, qobs)) -def abs_volume_error(qsim: np.array, qobs: np.array): +def _abs_volume_error(qsim: np.array, qobs: np.array): """ Absolute value of the volume error metric. @@ -440,10 +440,10 @@ def abs_volume_error(qsim: np.array, qobs: np.array): ----- The abs_volume_error should be MINIMIZED. """ - return np.abs(volume_error(qsim, qobs)) + return np.abs(_volume_error(qsim, qobs)) -def agreement_index(qsim: np.array, qobs: np.array): +def _agreement_index(qsim: np.array, qobs: np.array): """ Index of agreement metric. @@ -471,7 +471,7 @@ def agreement_index(qsim: np.array, qobs: np.array): return 1 - (a / c) -def bias(qsim: np.array, qobs: np.array): +def _bias(qsim: np.array, qobs: np.array): """ The bias metric. @@ -499,7 +499,7 @@ def bias(qsim: np.array, qobs: np.array): return np.mean(qsim - qobs) -def correlation_coeff(qsim: np.array, qobs: np.array): +def _correlation_coeff(qsim: np.array, qobs: np.array): """ Correlation coefficient metric. @@ -522,7 +522,7 @@ def correlation_coeff(qsim: np.array, qobs: np.array): return np.corrcoef(qobs, qsim)[0, 1] -def kge(qsim: np.array, qobs: np.array): +def _kge(qsim: np.array, qobs: np.array): """ Kling-Gupta efficiency metric (2009 version). @@ -560,7 +560,7 @@ def kge(qsim: np.array, qobs: np.array): return kge -def kge_mod(qsim: np.array, qobs: np.array): +def _kge_mod(qsim: np.array, qobs: np.array): """ Kling-Gupta efficiency metric (2012 version). @@ -598,7 +598,7 @@ def kge_mod(qsim: np.array, qobs: np.array): return kge_mod -def mae(qsim: np.array, qobs: np.array): +def _mae(qsim: np.array, qobs: np.array): """ Mean absolute error metric. @@ -622,7 +622,7 @@ def mae(qsim: np.array, qobs: np.array): return np.mean(np.abs(qsim - qobs)) -def mare(qsim: np.array, qobs: np.array): +def _mare(qsim: np.array, qobs: np.array): """ Mean absolute relative error metric. @@ -646,7 +646,7 @@ def mare(qsim: np.array, qobs: np.array): return np.sum(np.abs(qobs - qsim)) / np.sum(qobs) -def mse(qsim: np.array, qobs: np.array): +def _mse(qsim: np.array, qobs: np.array): """ Mean square error metric. @@ -671,7 +671,7 @@ def mse(qsim: np.array, qobs: np.array): return np.mean((qobs - qsim) ** 2) -def nse(qsim: np.array, qobs: np.array): +def _nse(qsim: np.array, qobs: np.array): """ Nash-Sutcliffe efficiency metric. @@ -698,7 +698,7 @@ def nse(qsim: np.array, qobs: np.array): return 1 - (num / den) -def pbias(qsim: np.array, qobs: np.array): +def _pbias(qsim: np.array, qobs: np.array): """ Percent bias metric. @@ -726,7 +726,7 @@ def pbias(qsim: np.array, qobs: np.array): return (np.sum(qsim - qobs) / np.sum(qobs)) * 100 -def r2(qsim: np.array, qobs: np.array): +def _r2(qsim: np.array, qobs: np.array): """ The r-squred metric. @@ -747,10 +747,10 @@ def r2(qsim: np.array, qobs: np.array): ----- The r2 should be MAXIMIZED. """ - return correlation_coeff(qsim, qobs) ** 2 + return _correlation_coeff(qsim, qobs) ** 2 -def rmse(qsim: np.array, qobs: np.array): +def _rmse(qsim: np.array, qobs: np.array): """ Root mean square error metric. @@ -774,7 +774,7 @@ def rmse(qsim: np.array, qobs: np.array): return np.sqrt(np.mean((qobs - qsim) ** 2)) -def rrmse(qsim: np.array, qobs: np.array): +def _rrmse(qsim: np.array, qobs: np.array): """ Relative root mean square error (ratio of rmse to mean) metric. @@ -797,10 +797,10 @@ def rrmse(qsim: np.array, qobs: np.array): ----- The rrmse should be MINIMIZED. """ - return rmse(qsim, qobs) / np.mean(qobs) + return _rmse(qsim, qobs) / np.mean(qobs) -def rsr(qsim: np.array, qobs: np.array): +def _rsr(qsim: np.array, qobs: np.array): """ Ratio of root mean square error to standard deviation metric. @@ -822,10 +822,10 @@ def rsr(qsim: np.array, qobs: np.array): ----- The rsr should be MINIMIZED. """ - return rmse(qobs, qsim) / np.std(qobs) + return _rmse(qobs, qsim) / np.std(qobs) -def volume_error(qsim: np.array, qobs: np.array): +def _volume_error(qsim: np.array, qobs: np.array): """ Volume error metric. From 81359796274031f6f6f2e53eae3f5e3a627a9e1c Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Tue, 23 Jan 2024 21:37:53 -0500 Subject: [PATCH 25/29] added model_config helper function and tests --- tests/test_hydrological_modelling.py | 21 +++++++++++++- xhydro/modelling/hydrological_modelling.py | 32 ++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index 1840d5f3..2bccb4a5 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -2,7 +2,10 @@ import numpy as np import pytest -from xhydro.modelling.hydrological_modelling import run_hydrological_model +from xhydro.modelling.hydrological_modelling import ( + get_hydrological_model_inputs, + run_hydrological_model, +) def test_hydrological_modelling(): @@ -31,3 +34,19 @@ def test_import_unknown_model(): model_config = {"model_name": "fake_model"} _ = run_hydrological_model(model_config) assert pytest_wrapped_e.type == NotImplementedError + + +def test_get_unknown_model_requirements(): + """Test for required inputs for models with unknown name""" + with pytest.raises(NotImplementedError) as pytest_wrapped_e: + model_name = "fake_model" + _ = get_hydrological_model_inputs(model_name) + assert pytest_wrapped_e.type == NotImplementedError + + +def test_get_model_requirements(): + """Test for required inputs for models""" + model_name = "Dummy" + required_config = get_hydrological_model_inputs(model_name) + print(required_config.keys()) + assert len(required_config.keys()) == 4 diff --git a/xhydro/modelling/hydrological_modelling.py b/xhydro/modelling/hydrological_modelling.py index be9af7d0..a4baab0b 100644 --- a/xhydro/modelling/hydrological_modelling.py +++ b/xhydro/modelling/hydrological_modelling.py @@ -74,6 +74,38 @@ def run_hydrological_model(model_config: dict): return qsim +def get_hydrological_model_inputs(model_name: str): + """Required hydrological model inputs for model_config objects. + + Parameters + ---------- + model_name : str + Model name that must be one of the models in the list of possible + models. + + Returns + ------- + dict + Elements that must be found in the model_config object. + """ + if model_name == "Dummy": + required_config = dict( + precip="Daily precipitation in mm.", + temperature="Daily average air temperature in °C", + drainage_area="Drainage area of the catchment", + parameters="Model parameters, length 3", + ) + + elif model_name == "ADD_OTHER_HERE": + # ADD OTHER MODELS HERE + required_config = {} + + else: + raise NotImplementedError(f"The model '{model_name}' is not recognized.") + + return required_config + + def _dummy_model(model_config: dict): """Dummy model. From ad4446fe3c4747dfe114d7811548000cfdccf01d Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sat, 27 Jan 2024 14:15:15 -0500 Subject: [PATCH 26/29] fix issues from review --- environment-dev.yml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- xhydro/modelling/calibration.py | 60 ++++++++++++++++++++++++++------- xhydro/modelling/obj_funcs.py | 3 +- 5 files changed, 53 insertions(+), 16 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 1284f1dc..961d3085 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,7 +11,7 @@ dependencies: - spotpy - statsmodels - xarray - - xclim >=0.45.0 + - xclim >=0.47.0 - xscen >=0.7.1 - pip - pip: diff --git a/environment.yml b/environment.yml index 3b115ddc..72a0ca87 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - spotpy - statsmodels - xarray - - xclim >=0.45.0 + - xclim >=0.47.0 - xscen >=0.7.1 - pip - pip: diff --git a/pyproject.toml b/pyproject.toml index 00748e98..963a7669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "spotpy", "statsmodels", "xarray", - "xclim>=0.45.0", + "xclim>=0.47.0", "xdatasets>=0.3.1", "xscen>=0.7.1" ] diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index f8d5a49b..6b3feec5 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -57,7 +57,11 @@ Any comments are welcome! """ +from copy import deepcopy + # Import packages +from typing import Optional + import numpy as np import spotpy import xarray as xr @@ -135,7 +139,7 @@ def __init__( take_negative: bool = False, mask: np.array = None, transform: str = None, - epsilon: float = None, + epsilon: float = 0.01, ): """ Initialize the SpotSetup object. @@ -181,11 +185,11 @@ def __init__( algorithm : str The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but more can be easily added. take_negative : bool - Inidactor to take the negative of the objective function value in optimization to ensure convergence + Wether to take the negative of the objective function value in optimization to ensure convergence in the right direction. - mask : np.array + mask : np.array, optional A vector indicating which values to preserve/remove from the objective function computation. 0=remove, 1=preserve. - transform : str + transform : str, optional The method to transform streamflow prior to computing the objective function. Can be one of: Square root ('sqrt'), inverse ('inv'), or logarithmic ('log') transformation. epsilon : float @@ -199,7 +203,7 @@ def __init__( """ # Gather the model_config dictionary and obj_func string, and other # optional arguments. - self.model_config = model_config + self.model_config = deepcopy(model_config) self.obj_func = obj_func self.mask = mask self.transform = transform @@ -305,9 +309,10 @@ def perform_calibration( bounds_low: np.array, evaluations: int, algorithm: str = "DDS", - mask: np.array = None, - transform: str = None, + mask: Optional[np.array] = None, + transform: Optional[str] = None, epsilon: float = 0.01, + sampler_kwargs: Optional[dict] = None, ): """Perform calibration using spotpy. @@ -351,15 +356,21 @@ def perform_calibration( Maximum number of model evaluations (calibration budget) to perform before stopping the calibration process. algorithm : str The optimization algorithm to use. Currently, "DDS" and "SCEUA" are available, but more can be easily added. - mask : np.array + mask : np.array, optional A vector indicating which values to preserve/remove from the objective function computation. 0=remove, 1=preserve. - transform : str + transform : str, optional The method to transform streamflow prior to computing the objective function. Can be one of: Square root ('sqrt'), inverse ('inv'), or logarithmic ('log') transformation. epsilon : scalar float Used to add a small delta to observations for log and inverse transforms, to eliminate errors caused by zero flow days (1/0 and log(0)). The added perturbation is equal to the mean observed streamflow times this value of epsilon. + sampler_kwargs : dict + Contains the keywords and hyperparameter values for the optimization algorithm. + Keywords depend on the algorithm choice. Currently, SCEUA and DDS are supported with + the following default values: + - SCEUA: dict(ngs=7, kstop=3, peps=0.1, pcento=0.1) + - DDS: dict(trials=1) Returns ------- @@ -370,7 +381,7 @@ def perform_calibration( bestobjf : float The best objective function value. """ - # Get objective function and algo optimal convregence direction. Necessary + # Get objective function and algo optimal convergence direction. Necessary # to ensure that the algorithm is optimizing in the correct direction # (maximizing or minimizing). This code determines the required direction # for the objective function and the working direction of the algorithm. @@ -401,13 +412,38 @@ def perform_calibration( sampler = spotpy.algorithms.dds( spotpy_setup, dbname="DDS_optim", dbformat="ram", save_sim=False ) - sampler.sample(evaluations, trials=1) + + # If the user provided a custom sampler hyperparameter set. + if sampler_kwargs is not None: + if "trials" in sampler_kwargs: + sampler.sample(evaluations, *sampler_kwargs) + else: + raise ValueError( + 'DDS optimizer hyperparameter keyword "trials" not found in sampler_kwargs.' + ) + + # If not, use the default. + else: + sampler.sample(evaluations, trials=1) elif algorithm == "SCEUA": sampler = spotpy.algorithms.sceua( spotpy_setup, dbname="SCEUA_optim", dbformat="ram", save_sim=False ) - sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) + + # If the user provided a custom sampler hyperparameter set. + if sampler_kwargs is not None: + if all( + item in sampler_kwargs for item in ["ngs", "kstop", "peps", "pcento"] + ): + sampler.sample(evaluations, *sampler_kwargs) + else: + raise ValueError( + 'SCEUA optimizer hyperparameter keywords "ngs", "kstop", "peps" or " pcento" not found in sampler_kwargs.' + ) + + else: + sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) # Gather optimization results results = sampler.getdata() diff --git a/xhydro/modelling/obj_funcs.py b/xhydro/modelling/obj_funcs.py index 8f03f823..4777621e 100644 --- a/xhydro/modelling/obj_funcs.py +++ b/xhydro/modelling/obj_funcs.py @@ -147,6 +147,7 @@ def get_objective_function( } # If we got a dataset, change to np.array + # FIXME: Implement a more flexible method if isinstance(qsim, xr.Dataset): qsim = qsim["qsim"] @@ -165,7 +166,7 @@ def get_objective_function( # All zero or one? if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0: - raise ValueError("Mask contains values other 0 or 1. Please modify.") + raise ValueError("Mask contains values other than 0 or 1. Please modify.") # Check that the objective function is in the list of available methods if obj_func not in obj_func_dict: From 329753a3310f5ac7998a1ad902fa8b9bc382e58d Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sun, 28 Jan 2024 19:38:55 -0500 Subject: [PATCH 27/29] fixed dict passing args to spotpy --- tests/test_calibration.py | 1 + xhydro/modelling/calibration.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 6aa2a351..5cf8ed46 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -32,6 +32,7 @@ def test_spotpy_calibration(): evaluations=1000, algorithm="DDS", mask=mask, + sampler_kwargs=dict(trials=1), ) # Test that the results have the same size as expected (number of parameters) diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 6b3feec5..88e8addc 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -416,7 +416,7 @@ def perform_calibration( # If the user provided a custom sampler hyperparameter set. if sampler_kwargs is not None: if "trials" in sampler_kwargs: - sampler.sample(evaluations, *sampler_kwargs) + sampler.sample(evaluations, **sampler_kwargs) else: raise ValueError( 'DDS optimizer hyperparameter keyword "trials" not found in sampler_kwargs.' @@ -436,7 +436,7 @@ def perform_calibration( if all( item in sampler_kwargs for item in ["ngs", "kstop", "peps", "pcento"] ): - sampler.sample(evaluations, *sampler_kwargs) + sampler.sample(evaluations, **sampler_kwargs) else: raise ValueError( 'SCEUA optimizer hyperparameter keywords "ngs", "kstop", "peps" or " pcento" not found in sampler_kwargs.' From b5d2c673442e090793a3c81f97b23a77297dc8c2 Mon Sep 17 00:00:00 2001 From: richardarsenault Date: Sun, 28 Jan 2024 20:05:30 -0500 Subject: [PATCH 28/29] fixed and streamlined code relative to sampler_kwargs --- xhydro/modelling/calibration.py | 46 +++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/xhydro/modelling/calibration.py b/xhydro/modelling/calibration.py index 88e8addc..2462fb1d 100644 --- a/xhydro/modelling/calibration.py +++ b/xhydro/modelling/calibration.py @@ -408,42 +408,43 @@ def perform_calibration( # Select an optimization algorithm and parameterize it, then run the # optimization process. + sampler_kwargs = deepcopy(sampler_kwargs) or None + if sampler_kwargs is None: + sampler_kwargs = {} + if algorithm == "DDS": sampler = spotpy.algorithms.dds( spotpy_setup, dbname="DDS_optim", dbformat="ram", save_sim=False ) - # If the user provided a custom sampler hyperparameter set. - if sampler_kwargs is not None: - if "trials" in sampler_kwargs: - sampler.sample(evaluations, **sampler_kwargs) - else: - raise ValueError( - 'DDS optimizer hyperparameter keyword "trials" not found in sampler_kwargs.' - ) - - # If not, use the default. + # Get the sampler hyperparameters, either default or user-provided. + defaults = {"trials": 1} + sampler_kwargs = defaults | sampler_kwargs + + # Ensure there is only 1 hyperparameter passed by the user, if + # applicable + if len(sampler_kwargs) == 1: + sampler.sample(evaluations, **sampler_kwargs) else: - sampler.sample(evaluations, trials=1) + raise ValueError( + "sampler_kwargs should only contain the keyword 'trials' when using DDS." + ) elif algorithm == "SCEUA": sampler = spotpy.algorithms.sceua( spotpy_setup, dbname="SCEUA_optim", dbformat="ram", save_sim=False ) + # Get the sampler hyperparameters, either default or user-provided. + defaults = {"ngs": 7, "kstop": 3, "peps": 0.1, "pcento": 0.1} + sampler_kwargs = defaults | sampler_kwargs # If the user provided a custom sampler hyperparameter set. - if sampler_kwargs is not None: - if all( - item in sampler_kwargs for item in ["ngs", "kstop", "peps", "pcento"] - ): - sampler.sample(evaluations, **sampler_kwargs) - else: - raise ValueError( - 'SCEUA optimizer hyperparameter keywords "ngs", "kstop", "peps" or " pcento" not found in sampler_kwargs.' - ) - + if len(sampler_kwargs) == 4: + sampler.sample(evaluations, **sampler_kwargs) else: - sampler.sample(evaluations, ngs=7, kstop=3, peps=0.1, pcento=0.1) + raise ValueError( + "sampler_kwargs should only contain the keywords [ngs, kstop, peps, pcento] when using SCEUA." + ) # Gather optimization results results = sampler.getdata() @@ -466,6 +467,7 @@ def perform_calibration( bestobjf = bestobjf * -1 # Update the parameter set to put the best parameters in model_config... + model_config = deepcopy(model_config) model_config.update({"parameters": best_parameters}) # ... which can be used to run the hydrological model and get the best Qsim. From ed6632d219f9164070c56ffe5bbbe0c38e7935f1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 15:44:29 +0000 Subject: [PATCH 29/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_hydrological_modelling.py | 1 + tests/test_objective_functions.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/test_hydrological_modelling.py b/tests/test_hydrological_modelling.py index 2bccb4a5..d23a850c 100644 --- a/tests/test_hydrological_modelling.py +++ b/tests/test_hydrological_modelling.py @@ -1,4 +1,5 @@ """Test suite for hydrological modelling in hydrological_modelling.py""" + import numpy as np import pytest diff --git a/tests/test_objective_functions.py b/tests/test_objective_functions.py index 8cad8512..591dddcf 100644 --- a/tests/test_objective_functions.py +++ b/tests/test_objective_functions.py @@ -1,4 +1,5 @@ """Test suite for the objective functions in obj_funcs.py.""" + import numpy as np import pytest