# PCR-GLOBWB reference soil moisture analysis

In [2]:
import xarray as xr
import numpy as np
import pandas as pd

from pathlib import Path
from glob import glob

RRMSE: https://www.analyticsvidhya.com/blog/2021/10/evaluation-metric-for-regression-models/

## Set Paths

In [3]:
# Set Paths
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/')
AUXDIR = Path(f"{ROOT}/aux_data/")
OBSDIR = Path(f"{ROOT}/observations/")
MODELS = Path(f'{ROOT}/model_parameters/wflow_sbm/')
RESULTS = Path(f'{ROOT}/results/')

## Config

In [4]:
# Get available basin IDs wflow_sbm
calibration_file = f"{RESULTS}/streamflow_evaluation/wflow_sbm_calibration/wflow_calibration_objective_function_overview.csv"
df = pd.read_csv(calibration_file, index_col='basin_id')
basin_ids = df.index.to_list()


# Amount of available cores
cores_available = 1

# Period (drop first year)
start_date = '2015-10-01'
end_date   = '2017-09-30'

## RRMSE function

In [5]:
def RRMSE(obs,sim):
    MSE  = np.square(np.subtract(obs,sim)).mean() 
    RMSE = np.sqrt(MSE)
    std_obs = np.std(obs)
    RRMSE = (RMSE/std_obs)
    
    return RRMSE

## Calculate RRMSE evaporation timeseries

In [11]:
# Create empty dataframe and list
df = pd.DataFrame()
objective_function_values = []

# Loop basin IDs
for i, basin_id in enumerate(basin_ids):
    print(i, end="\r")
    # Load reference evaporation
    ds_obs = xr.open_dataset(f'{OBSDIR}/soil_moisture/regridded_HydroJULES/{basin_id}_soil_moisture_ref_2015_2017.nc')
    ds_obs = ds_obs.sm
    df_obs = ds_obs.to_dataframe()

    # Load wflow_sbm evaporation
    ds_sim = xr.open_dataset(f'{RESULTS}/soil_moisture_evaluation/regridded_pcr-globwb_sm/{basin_id}_pcr-globwb_sm_2015_2017.nc')
    ds_sim = ds_sim.soil_moisture
    df_sim = ds_sim.to_dataframe()
    
    # Create empty list to store water years
    objectives = []

    # Calculate objective function for each water year and take average
    years = list(range(int(start_date[:4]), int(end_date[:4])))
    for year in years:
        start_year = f'{year}-10-01'
        end_year = f'{year+1}-09-30'
        
        # Select water year
        mask = (df_sim.index >= start_year) & (df_sim.index <= end_year)
        df_sim_year = df_sim.loc[mask]
        mask = (df_obs.index >= start_year) & (df_obs.index <= end_year)
        df_obs_year = df_obs.loc[mask]

        # Calculate objective function
        objective_function = RRMSE(df_obs_year.values, df_sim_year.values)
        objectives.append(objective_function)
    
    
    # Calculate and append average objective function of water years
    objective_function_values.append(np.average(objectives))

df['basin_id'] = basin_ids
df['RRMSE_sm_pcrglob'] = objective_function_values
df.to_csv(f'{RESULTS}/soil_moisture_evaluation/pcr-globwb_RRMSE_sm_2015_2017.csv')

645

In [12]:
df

Unnamed: 0,basin_id,RRMSE_sm_pcrglob
0,10003,2.606374
1,1001,3.567523
2,101002,1.679547
3,101005,1.988774
4,102001,1.571342
...,...,...
641,95001,18.257874
642,96001,43.511574
643,96002,34.051403
644,96004,63.256916
