# Altering the objective function and comparing calibrations


Workbook layout:
   1. Read historical benchmarking streamflow data from csv and get shapefile information
   2. Calibrate using standard Nash-Sutcliffe Efficiency (NSE) objective function and added Kling-Gupta Efficiency (KGE) 

#   1. Read shapefile and historical data from csv

In [None]:
# get catchments shapes and IDs used for benchmarking from shapefile
import os

import pandas as pd

from awrams.benchmarking.utils import read_id_csv
from awrams.utils import config_manager
from awrams.utils.io.data_mapping import SplitFileManager
from awrams.utils.gis import ShapefileDB

system_profile = config_manager.get_system_profile()
system_settings = system_profile.get_settings()

base_data_path = system_settings['DATA_PATHS']['BASE_DATA']
catchment_shapefile = os.path.join(base_data_path, 'spatial/shapefiles/Final_list_all_attributes.shp')

catchments = ShapefileDB(catchment_shapefile)
records = catchments.get_records_df()
records['StationID'].head()

catchment_csv = base_data_path + '/training/benchmarking/catchment_ids.csv'
id_list = read_id_csv(catchment_csv)

obs_csv = base_data_path + '/observations/runoff/awrams_v5_cal_qobs.csv'
qobs = pd.read_csv(obs_csv, index_col=0)

# get site ids and start dates etc for comparison
ids = records['StationID']  # ids as used in awra-l system (often concatenated wrsc)
wrsids = records['WrscID'] # official water resource station catalogue station numbers
areas = records['AlbersArea'] # catchment areas in km^ 2
findate = records['DateQfinish'] # finish date
startdate = records['DateOpen'] # start date 

# 2. Demonstrate use of altered objective function
## Calibrate AWRA-L to a few catchments using NSE and then the Kling-Gupta Efficiency


In [None]:
from os.path import join

import h5py
from matplotlib import pyplot as plt
import pandas as pd

from awrams.calibration import cluster, support
from awrams.calibration.launch_calibration import run_from_pickle
from awrams.calibration.objectives import test_objectives as tobj
from awrams.calibration.optimizers import sce
from awrams.models.awral.model import AWRALModel
from awrams.utils import extents, gis
from awrams.utils import datetools as dt
from awrams.utils.nodegraph import nodes, graph

# Get model settings for the AWRA-L model
model_profile = config_manager.get_model_profile('awral', 'v6_default')
model_settings = model_profile.get_settings()

# Define the extenst of the calibration
def_extent = extents.get_default_extent() 
# define the location of input data (streamflow and climate)
cal_catchments=['105001','145003'] # set of 4 test data sites provided with git copy

input_map = model_profile.get_input_mapping()

## Create a dict with multiple extents
cal_dict = {}
#cal_catchments=['113004','111101', '109001', '108003', '112102', '107002','105001', '116008']
#observations = dict(qtot = '../../test_data/calibration/q_obs.csv' ) - if only have limited observations
observations=dict(qtot=obs_csv)
for catchment in cal_catchments:
    cal_dict[catchment] = catchments.get_extent_by_field('StationID', catchment.zfill(6), parent_extent=def_extent)

run_period = dt.dates('2009 - 2010')
eval_period = dt.dates('2009 - 2010')
print('Calibrating over the following set of sites for 2009-2010',cal_dict)
evolver_spec = support.EvolverSpec(sce.CCEvolver,
                                   evolver_run_args=dict(n_offspring=1,n_evolutions=5,elitism=2.0))
optimizer_spec = support.OptimizerSpec(sce.ShuffledOptimizer,
                                       evolver_spec=evolver_spec,
                                       n_complexes=5,
                                       max_nsni=500,
                                       min_complexes=1,
                                       max_eval=2000) #n_complex 14
local_objfspec = support.ObjectiveFunctionSpec(tobj.TestLocalSingle)  # this function to load up the ObjFunc comes from awrams.calibration.support
global_objfspec = tobj.TestGlobalSingle
objective_spec = support.ObjectiveSpec(global_objfspec,
                                       local_objfspec,
                                       observations,
                                       eval_period)
#### 2.6. Build spec dict
#Assemble above settings into specification dictionary
# Get the input mapping and model
awral = model_profile.get_model(model_settings)
node_mapping = input_map
model = awral#callable_to_funcspec(awral)
cal_spec = {}
cal_spec['optimizer_spec'] = optimizer_spec
cal_spec['objective_spec'] = objective_spec
cal_spec['extent_map'] = cal_dict
cal_spec['run_period'] = run_period
cal_spec['model'] = model
cal_spec['node_mapping'] = node_mapping
cal_spec['logfile'] = './calres.h5'

ncores = 4
nnodes = 1
cluster.build_pickle_from_spec(cal_spec, ncores, nnodes, 'test_cal.pkl')


### Can skip this step and read in outputs as it takes a little while
#### results saved in './calres.h5'

In [None]:
# run the calibration
#from awrams.calibration.launch_calibration import run_from_pickle
import time
start = time.time()
print("Starting calibration........")
cal = run_from_pickle('./test_cal.pkl')
end = time.time()
print('Calibration complete.......')
print('Time elapsed for calibration',end - start)

In [None]:
cr = support.CalibrationResults('./calres.h5')
cr.get_best_paramset()

### Alter the objective function

#### View objective function file [calibration/objectives/test_objectives.py]

[calibration/objectives/test_objectives.py]: ../../../edit/calibration/awrams/calibration/objectives/test_objectives.py                    



In [None]:
# View current objective function...
# Code below is from within test_objectives.py
from awrams.calibration.support import input_group

class TestLocalSingle:

    input_schema = input_group(['qtot'])
    output_schema = ['qtot_nse']

    def __init__(self,obs,eval_period,min_valid=15):

        self.valid_idx = {}
        self.nse = {}
        self.flow_variable = 'qtot'
        for k in [self.flow_variable]:

            data = obs[k]

            if np.isnan(data).any():
                nan_mask = np.isnan(data)
                self.valid_idx[k] = np.where(nan_mask == False)
            else:
                self.valid_idx[k] = slice(0,len(eval_period))

            self.nse[k] = NSE(data[self.valid_idx[k]])

    def evaluate(self,modelled):
        qtot_nse = self.nse[self.flow_variable](modelled[self.flow_variable][self.valid_idx[self.flow_variable]])
        return np.array(qtot_nse)

class TestGlobalSingle:

    output_schema = ['objf_val']
    objective_key = 'objf_val'

    def evaluate(self,l_results):
        objf_val = 1.0 - np.mean(l_results['qtot_nse'])
        return dict(objf_val = objf_val)

## Define the Kling-Gupta efficiency

In [None]:
import os
awrams_base_path = os.environ['AWRAMS_BASE_PATH']
awrams_base_path

In [None]:
%%writefile $awrams_base_path/code/user/awrams_user/objectives/kge_objectives.py
import numpy as np

from awrams.calibration.support import input_group


class KGE:
    '''
    Precomputed Kling-Gupta Efficiency evaluator
    '''    
    def __init__(self, obs):
        self.obs = obs
        # Computing these values during initialisation means we don't have to do so at every iteration
        self.obs_sum = np.sum(self.obs)
        self.obs_std = np.std(self.obs)

    def __call__(self, modelled):
        if modelled.size == 0:
            correl = np.NaN
        else:
            correl = np.corrcoef(self.obs, modelled)[0, 1]
        alpha = np.std(modelled) / self.obs_std # this is equivalent to np.std(modelled) / np.std(self.obs)
        beta = np.sum(modelled) / self.obs_sum
        kge = 1 - np.sqrt((correl - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)
        return kge

# Trial an alternate objective function
# Kling-Gupta efficiency
class TestLocalSingle_KGE:

    input_schema = input_group(['qtot'])
    output_schema = ['qtot_kge']

    def __init__(self, obs, eval_period):
        data = obs['qtot']
        
        if np.isnan(data).any():
            nan_mask = np.isnan(data)
            self.valid_idx = np.where(nan_mask==False)
        else:
            self.valid_idx = slice(0, len(eval_period))

        self.kge = KGE(data[self.valid_idx])

    def evaluate(self, modelled):
        qtot_kge = self.kge(modelled['qtot'][self.valid_idx])
        return np.array(qtot_kge)

class TestGlobalSingle_KGE:

    output_schema = ['objf_val']
    objective_key = 'objf_val'

    def evaluate(self, l_results):
        objf_val = 1.0 - np.mean(l_results['qtot_kge'])
        return dict(objf_val=objf_val)

In [None]:
# import your newly created module
from awrams_user.objectives import kge_objectives as kobj

In [None]:
local_objfspec = support.ObjectiveFunctionSpec(kobj.TestLocalSingle_KGE)  # this function to load up the ObjFunc comes from awrams.calibration.support
global_objfspec = kobj.TestGlobalSingle_KGE
objective_spec = support.ObjectiveSpec(global_objfspec,
                                       local_objfspec,
                                       observations,
                                       eval_period)
#### 2.6. Build spec dict
#Assemble above settings into specification dictionary
# Get the input mapping and model
node_mapping = input_map
model = awral
cal_spec = {}
cal_spec['optimizer_spec'] = optimizer_spec
cal_spec['objective_spec'] = objective_spec
cal_spec['extent_map'] = cal_dict
cal_spec['run_period'] = run_period
cal_spec['model'] = model
cal_spec['node_mapping'] = node_mapping
cal_spec['logfile'] = './calres_KGE.h5'
ncores = 4
nnodes = 1
cluster.build_pickle_from_spec(cal_spec, ncores, nnodes, 'test_cal_kge.pkl')

In [None]:
# run the calibration using KGE
#from awrams.calibration.launch_calibration import run_from_pickle
import time
start = time.time()
print("Starting calibration using KGE........")
cal = run_from_pickle('./test_cal_kge.pkl')
end = time.time()
print('Calibration complete.......')
print('Time elapsed for calibration', end - start)