# Calibrating the HBV model

## Setting up the python kernel

In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import ewatercycle
import ewatercycle.models
import ewatercycle.forcing

from rich import print
from tqdm import tqdm
from pathlib import Path
from ewatercycle_DA import DA
from datetime import datetime

## Loading in observed discharge data

In [5]:
# Loading in discharge data from Katima Mulilo
katima = (pd.read_csv('Katima_Mulilo/1291100_Q_Day.txt', skiprows=35, delimiter=';', header=1)
    .drop(columns=['hh:mm'])
    .rename(columns={"YYYY-MM-DD": "Date", " Value": "Discharge [m^3/s]"}))

# Removing invalid data
km = katima[['Date', 'Discharge [m^3/s]']]
km['Discharge [m^3/s]'] = km['Discharge [m^3/s]'].replace(-999.000, np.nan)

# Fixing the date column for accesibility 
km['Date'] = pd.to_datetime(km['Date'])

# Defining the calibration period
start_calibration = '1986-01-01'
end_calibration = '2019-12-31'

# Reducing the dataframe to the calibration period
km = km[(km['Date'] >= start_calibration) & (km['Date'] <= end_calibration)]

# Defining the start and end of the dataset (in the format needed for the calibration)
data_start = km['Date'].iloc[0].strftime('%Y-%m-%dT%H:%M:%SZ')
data_end = km['Date'].iloc[-1].strftime('%Y-%m-%dT%H:%M:%SZ')

## Generating the ERA5 forcings 

In [7]:
#Defining the location of the shape file to be used
km_shp = Path.home() / "BEP-Zoe/book/thesis_projects/BSc/2025_Q4_ZoeLucius_CEG" /  "km_ERA5" / "km.shp"

#Defining the forcings directory
ERA5_folder = Path.home() / "ERA5"
ERA5_folder.mkdir(exist_ok=True)

# HBV model path 
HBV_model_path = Path.home() / "HBV_model"

# OPTION 1: Generating ERA5 forcings
# ERA5_forcing = ewatercycle.forcing.sources["LumpedMakkinkForcing"].generate(
#    dataset="ERA5",
#    start_time= data_start,
#    end_time= data_end,
#    shape=km_shp,
#    directory=ERA5_folder)

# OPTION 2: Generating ERA5 forcings (For when ERA5 forcing has already been made before and exists in the directory)
load_location = ERA5_folder / "work" / "diagnostic" / "script" 
ERA5_forcing = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=load_location)

## Defining functions to determine the model of best fit

In [8]:
def calibrationObjective(modelOutput, observation, start, end):
    # Making sure the date of the modelled output and onserved input are in the correct format
    modelOutput.index = pd.to_datetime(modelOutput.index)
    observation.index = pd.to_datetime(observation.index)

    # Making sure the given dates are in the correct format
    start = pd.to_datetime(start)
    end = pd.to_datetime(end)

    # Combine the model output and the observation into one data frame 
    hydro_data = pd.concat([modelOutput.reindex(observation.index, method='ffill'), observation], axis=1, keys=['model', 'observation'])
    
    # Making sure to only take the calibration period
    hydro_data = hydro_data[(hydro_data.index > start) & (hydro_data.index < end)]

    # Calculate the absolute square difference
    squareDiff = (hydro_data['model'] - hydro_data['observation']) ** 2
    rootMeanSquareDiff = np.sqrt(np.mean(squareDiff))
    
    return rootMeanSquareDiff


## Start calibration

In [9]:
# Set the number of HBV model ensembles you will test against the observed discharge
N = 100

In [12]:
# Define the initial state parameters (Si,  Su, Sf, Ss, Sp)
s_0 = np.array([0,  100,  0,  5,  0])

# Define parameters and their corresponding boundary values 
param_names = ["Imax", "Ce", "Sumax", "Beta", "Pmax", "Tlag", "Kf", "Ks", "FM"]
parameters = np.zeros([len(param_names), N])

param_mins = np.array([0,   0.2,  40,    .5,   .001,   1,     .01,  .0001,   0.01])
param_maxs = np.array([8,    1,  800,   4,    .3,     10,    .1,   .01,   10.0])

#Fill the parameters array with random values between each minimum and maximum
for p in range(len(param_names)):
    parameters[p,:] = np.random.uniform(param_mins[p],param_maxs[p],N)

In [15]:
# Observe the randomly chosen variables for each parameter to confirm their boundaries are adhered to
print(list(zip(param_names, np.round(parameters[:,0], decimals=3))))

In [16]:
# Generate N parameter sets and save them in a list of arrays
setup_kwargs_lst = []

for i in range(N):
    setup_kwargs_lst.append({'parameters': parameters[:,i]})

In [17]:
# ensemble = []

# for c in range(N): 
#     ensemble.append(ewatercycle.models.HBVLocal(forcing=ERA5_forcing))
#     config_file, _ = ensemble[c].setup(
#                             parameters = parameters[c],
#                             initial_storage=s_0,
#                             cfg_dir = HBV_model_path,
#                                )
#     ensemble[p].initialize(config_file)

ensemble = DA.Ensemble(N=N)
ensemble.setup()

ensemble.initialize(model_name=["HBVLocal"]*N,
                    forcing=[ERA5_forcing]*N,
                    setup_kwargs=setup_kwargs_lst) 

In [18]:
ref_model = ensemble.ensemble_list[0].model

In [19]:
timesteps = int((ref_model.end_time - ref_model.start_time) /  ref_model.time_step)

time = []
lst_Q = [] 

for t in tqdm(range(timesteps)):    
    time.append(pd.Timestamp(ref_model.time_as_datetime.date()))
    ensemble.update()
    lst_Q.append(ensemble.get_value("Q").flatten())

100%|██████████| 12417/12417 [05:26<00:00, 37.99it/s]


In [20]:
ensemble.finalize()

In [23]:
Q_m_arr = np.array(lst_Q).T
df_ensemble = pd.DataFrame(data=Q_m_arr[:,:len(time)].T,index=time,columns=[f'particle {n}' for n in range(N)])

In [33]:
objective_values = []

data_start = pd.to_datetime(km['Date'].iloc[0])
data_end = pd.to_datetime(km['Date'].iloc[-1])

for i in tqdm(range(N)):
    objective_values.append(calibrationObjective(df_ensemble.iloc[:,i],km['Discharge [m^3/s]'],data_start,data_end))


100%|██████████| 100/100 [00:01<00:00, 76.00it/s]


In [34]:
parameters_minimum_index = np.argmin(np.array(objective_values))

parameters_minimum = parameters[:,parameters_minimum_index]

print(list(zip(param_names, np.round(parameters_minimum, decimals=3))))