# Multi-model ensembles

The FDCs estimated by individual models are combined into multi-model ensembles in this notebook.  Here, the goal is to evaluate whether multi-model ensembles can exploit low rank correlation to improve performance overall.

In this section:

1. We organize and align the outputs from all FDC-estimation methods to ensure consistent comparison across catchments and performance metrics.

2. We construct inter-model ensembles by combining estimates from parametric, kNN, and LSTM approaches, and define the rules used for averaging or aggregation.

3. We evaluate ensemble performance against that of the individual models, examining when complementary information improves accuracy and when it does not.

4. We summarize patterns in ensemble gains and identify where model disagreement limits the benefits of combination.

In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

import json

# from utils.kde_estimator import KDEEstimator
from utils.fdc_estimator_context import FDCEstimationContext
from utils.fdc_data import StationData
from utils.evaluation_metrics import EvaluationMetrics

import utils.data_processing_functions as dpf

In [2]:
# bitrate PMFs are we working with
regularization_type = 'discrete'  # 'kde' or 'discrete'

bitrate = 10 if regularization_type == 'discrete' else 10  # number of bins = 2**bitrate

In [3]:
attr_fpath = 'data/catchment_attributes_with_runoff_stats.csv'
attr_df = pd.read_csv(attr_fpath, dtype={'Official_ID': str})
station_ids = sorted(attr_df['official_id'].unique().tolist())

# streamflow folder from (updated) HYSETS
HYSETS_DIR = Path('/home/danbot/code/common_data/HYSETS')
hs_df = pd.read_csv('data/HYSETS_watershed_properties.txt', sep=';')
hs_df = hs_df[hs_df['Official_ID'].isin(station_ids)]

In [4]:
watershed_id_dict = {row['Watershed_ID']: row['Official_ID'] for _, row in hs_df.iterrows()}
# and the inverse
official_id_dict = {row['Official_ID']: row['Watershed_ID'] for _, row in hs_df.iterrows()}
# also for drainage areas
da_dict = {row['Official_ID']: row['Drainage_Area_km2'] for _, row in hs_df.iterrows()}

In [5]:
# load the baseline PMFs from the previous notebook
baseline_distribution_folder = Path(os.getcwd()) / 'data' / 'baseline_distributions' 

baseline_pmf_path = baseline_distribution_folder / f'{bitrate:02d}_bits' /  f'pmf_obs.csv'
if regularization_type == 'kde':
    baseline_pmf_path = baseline_distribution_folder / f'{bitrate:02d}_bits' /  f'pmf_kde.csv'

pmf_df = pd.read_csv(baseline_pmf_path, index_col=0)

In [6]:
# retrieve LSTM ensemble predictions
LSTM_ensemble_result_folder = '/home/danbot/code/neuralhydrology/data/ensemble_results_20250514'# uses NSE mean as loss function
# LSTM_ensemble_result_folder = '/home/danbot/code/neuralhydrology/data/ensemble_results_20250627'# uses NSE 95% as loss function
lstm_result_files = os.listdir(LSTM_ensemble_result_folder)
lstm_result_stns = [e.split('_')[0] for e in lstm_result_files]

# filter for the common stations between BCUB region and LSTM-compatible (i.e. 1980-)
daymet_concurrent_stations = np.intersect1d(np.intersect1d(station_ids, lstm_result_stns), pmf_df.columns)
# assert '012414900' in daymet_concurrent_stations
print(f'There are {len(daymet_concurrent_stations)} monitored basins concurrent with LSTM ensemble results.')
'08JE027' in daymet_concurrent_stations
'12392895' in daymet_concurrent_stations


There are 712 monitored basins concurrent with LSTM ensemble results.


False

Load the global mean PMF and resample to the higher resolution evaluation grid (12 bits)

In [7]:
# load the pre-computed dictionary of complete years of record for each station
complete_year_stats_fpath = os.path.join('data', 'complete_year_stats.npy')
complete_year_stats = np.load(complete_year_stats_fpath, allow_pickle=True).item()

meet_min_hyd_years = []
for stn in complete_year_stats.keys():
    if len(complete_year_stats[stn]['hyd_years']) >= 5:
        meet_min_hyd_years.append(stn)
    else:
        print(f'Station {stn} has {len(complete_year_stats[stn]["hyd_years"])} complete hydrological years of data.')


Station 05BG002 has 4 complete hydrological years of data.
Station 08JE005 has 4 complete hydrological years of data.
Station 08ME015 has 4 complete hydrological years of data.
Station 08NG004 has 4 complete hydrological years of data.
Station 08PA001 has 4 complete hydrological years of data.
Station 12073000 has 4 complete hydrological years of data.
Station 12164000 has 4 complete hydrological years of data.
Station 12392895 has 4 complete hydrological years of data.
Station 12444490 has 4 complete hydrological years of data.
Station 15081614 has 4 complete hydrological years of data.


In [8]:
# see Notebook 1 for details on these exclusions
exclude_stations = ['08FA009', '08GA037', '08NC003', '12052500', '12090480', '12107950', '12108450', '12119300', 
                    '12119450', '12200684', '12200762', '12203000', '12409500', '15056070', '15081510',
                    '12323760', '12143700', '12143900', '12398000', '12058800', '12137800', '12100000']

pmf_stations = np.intersect1d(meet_min_hyd_years, daymet_concurrent_stations)
official_ids_to_include = [s for s in pmf_stations if s not in exclude_stations]
print(f'After exclusions, {len(official_ids_to_include)} stations remain for FDC estimation.')

After exclusions, 712 stations remain for FDC estimation.


In [9]:
# load the predicted parameter results
parameter_prediction_results_folder = os.path.join('data', 'results', 'parameter_prediction_results', )
predicted_params_fpath   = os.path.join(parameter_prediction_results_folder, 'OOS_parameter_predictions.csv')
rdf = pd.read_csv(predicted_params_fpath, index_col=['official_id'], dtype={'official_id': str})
predicted_param_dict = rdf.to_dict(orient='index')

In [10]:
LSTM_forcings_folder = '/home/danbot/neuralhydrology/data/BCUB_catchment_mean_met_forcings_20250320'
# LSTM_ensemble_result_folder = '/home/danbot/code/neuralhydrology/data/ensemble_results'
attr_df_fpath = os.path.join('data', f'catchment_attributes_with_runoff_stats.csv')

methods = ('parametric', 'lstm', 'knn',)
# methods = ('knn',)
include_pre_1980_data = True  # use only stations with data 1980-present concurrent with Daymet
daymet_start_date = '1980-01-01'  # default start date for Daymet data
if include_pre_1980_data:
    daymet_start_date = '1950-01-01'

# load the predicted parameter results (Notebook 3)
target_cols = [
    'uar_mean_predicted', 'uar_std_predicted', 'uar_median_predicted', 'uar_mad_predicted',
    'log_uar_mean_predicted', 'log_uar_std_predicted', 'log_uar_median_predicted', 'log_uar_mad_predicted',
]

global_min_uar = 5e-5   # see Notebook 1: data
global_max_uar = 1e4    # see Notebook 1: data


input_data = {
        'attr_df_fpath': attr_df_fpath,
        'LSTM_forcings_folder': LSTM_forcings_folder,
        'LSTM_ensemble_result_folder': LSTM_ensemble_result_folder,
        'include_pre_1980_data': include_pre_1980_data,  # use only stations with data 1980-present concurrent with Daymet
        'predicted_param_dict': predicted_param_dict,
        'eps': 1e-12,
        'min_record_length': 5, # minimum record length (years)
        'minimum_days_per_month': 20, # minimum number of days with valid data per month
        'parametric_target_cols': target_cols,
        'all_station_ids': daymet_concurrent_stations,
        'baseline_distribution_folder': baseline_distribution_folder,
        'delta': 0.001, # maximum uncertainty (by KL divergence) added to the predicted PMF by the uniform mixture ratio
        'regularization_type': regularization_type, # use 'kde' or 'discrete'.  if discrete, bitrate must be specified
        'bitrate': bitrate,
        'complete_year_stats': complete_year_stats,
        'year_type': 'hydrological',  # 'calendar' or 'hydrological'
        'zero_flow_threshold': 1e-4,  # threshold below which flow is indistinguishable from zero
        'global_min_uar': global_min_uar,
        'global_max_uar': global_max_uar,
    }

fdc_context = FDCEstimationContext(**input_data)


    Using all stations in the catchment data with a baseline PMF (validated): 1007


In [11]:
def compute_multi_model_ensemble_pmf(stn, rev_date, which_models, result_folder=None):
    # load the knn_result
    knn_fpath = result_folder / 'knn' / f'{stn}_fdc_results.json'
    knn_pmfs = {}
    with open(knn_fpath, 'rb') as file:
        knn_dict = json.load(file)
        # retrieve the PMF for the 4_NN_0_minOverlapPct_attribute_dist_ID2
        knn_models = list(knn_dict.keys())
        knn_models = [k for k in knn_models if '_NN_attribute_dist_ID2_freqEnsemble' in k]
        for m in sorted(knn_models):
            knn_pmfs[m] = knn_dict[m]['pmf']
            bias = knn_dict[m]['bias']
        assert knn_models, f'No knn model found for {stn}'
        # knn_pmf = knn_dict[knn_model[0]]['pmf']

    lstm_fpath = result_folder / f'lstm_{rev_date}' / f'{stn}_fdc_results.json'
    with open(lstm_fpath, 'rb') as file:
        lstm_dict = json.load(file)
        lstm_pmf = lstm_dict['frequency']['pmf']

    param_fpath = result_folder / 'parametric' / f'{stn}_fdc_results.json'
    with open(param_fpath, 'rb') as file:
        param_dict = json.load(file)
        # retrieve the PMF for the 'PredictedMOM' model
        param_models = list(param_dict.keys())
        param_model = [k for k in param_models if 'PredictedLog' in k]
        assert param_model, f'No parametric model found for {stn}'
        param_pmf = param_dict[param_model[0]]['pmf']
        param_pmf /= np.sum(param_pmf)

    # compute an ensemble PMF as the average of the knn and lstm PMFs
    # compute the mean ensemble along the support evaluation grid
    ensemble_pmfs = {}
    # assert knn_pmfs[m].sum() and lstm_pmf.sum() == 1 so it's an equally weighted average
    assert np.isclose(np.sum(lstm_pmf), 1.0), f'LSTM PMF does not sum to 1 for {stn}'
    assert np.isclose(np.sum(param_pmf), 1.0), f'Parametric PMF does not sum to 1 for {stn}'

    for m in knn_pmfs:
        assert np.isclose(np.sum(knn_pmfs[m]), 1.0), f'KNN PMF does not sum to 1 for {stn} model {m}'

    if which_models == 'knn-lstm':
        for m in knn_pmfs:
            ensemble_pmf = np.mean([knn_pmfs[m], lstm_pmf], axis=0)
            ensemble_pmf /= np.sum(ensemble_pmf)
            ensemble_pmfs[m] = ensemble_pmf
    elif which_models == 'knn-lstm-parametric':
        for m in knn_pmfs:
            ensemble_pmf = np.mean([knn_pmfs[m], lstm_pmf, param_pmf], axis=0)
            ensemble_pmf /= np.sum(ensemble_pmf)
            ensemble_pmfs[m] = ensemble_pmf
    elif which_models == 'knn-parametric':
        for m in knn_pmfs:
            ensemble_pmf = np.mean([knn_pmfs[m], param_pmf], axis=0)
            ensemble_pmf /= np.sum(ensemble_pmf)
            ensemble_pmfs[m] = ensemble_pmf
    else:
        raise ValueError(f'which_models {which_models} not recognized, must be one of knn-lstm, knn-lstm-parametric, knn-parametric')
    return ensemble_pmfs

In [12]:
def compute_ensemble_divergence(stn, rev_date, pmf_obs_df, which_models, result_folder=None):
    station = StationData(fdc_context, stn)
    eval_object = EvaluationMetrics(data=station, bitrate=bitrate)
    ensemble_pmfs = compute_multi_model_ensemble_pmf(stn, rev_date, which_models=which_models, result_folder=result_folder)
    results = {}
    for m in ensemble_pmfs:
        results[m] = eval_object._evaluate_fdc_metrics_from_pmf(ensemble_pmfs[m], pmf_obs_df[stn].values)
    return (results, ensemble_pmfs)

In [13]:
ensembles = {}
ensemble_folder = f'data/results/ensemble_results/{bitrate:02d}_bits'
results_folder = Path(f'data/results/fdc_estimation_results_{bitrate:02d}_bits')
if regularization_type == 'kde':
    ensemble_folder = f'data/results/ensemble_results/kde'
    results_folder = Path(f'data/results/fdc_estimation_results_kde')
assert os.path.exists(results_folder), f'Results folder {results_folder} does not exist'
process_ensembles = True
for ep in ['knn_lstm/', 'knn_lstm_lognorm/', 'knn_lognorm/']:
    folder = Path(ensemble_folder) / ep
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    model_ensemble = 'knn-lstm'
    if ep == 'knn_lstm_lognorm/':
        model_ensemble = 'knn-lstm-parametric'
    elif ep == 'knn_lognorm/':
        model_ensemble = 'knn-parametric'

    rev_date = LSTM_ensemble_result_folder.split('_')[-1]
    n = 0
    if process_ensembles:
        max_nae = 0
        for stn in official_ids_to_include:
            n += 1
            ensemble_output_fpath = folder / f'{stn}-{model_ensemble}.csv'
            if os.path.exists(ensemble_output_fpath):
                # print(f'     {ensemble_output_fpath} already exists, skipping')
                continue
            try:
                results, ensemble_pmfs = compute_ensemble_divergence(stn, rev_date, pmf_df, which_models=model_ensemble, result_folder=results_folder)
            except Exception as e:
                print(f'Error processing station {stn}: {e}')
                raise Exception(f'Error processing station {stn}: {e}')
                
            results_df = pd.DataFrame(results)
            results_df.columns = [e.split('_')[1] for e in results_df.columns]
            results_df.index.name = 'metric'
            results_df.to_csv(ensemble_output_fpath, index=True)
            
            if n % 50 == 0:
                print(f'{n}/{len(daymet_concurrent_stations)} processed')

In [14]:
for ep in ['knn_lstm/', 'knn_lstm_lognorm/', 'knn_lognorm/']:
    folder = Path(ensemble_folder) / ep
    if not process_ensembles:
        break
    which_ensemble = '-'.join(ep.split('/')[0].split('_'))
    nn_ensemble_results = {which_ensemble: {}}    

    for n in range(1, 11):
        fname = f'{which_ensemble}_{n}NN.csv'
        ensemble_path = os.path.join(ensemble_folder, fname)
        if os.path.exists(ensemble_path):
            print(f'     {ensemble_path} already exists, skipping')
            continue
        nn_results = []
        for f in os.listdir(folder):
            stn = f.split('.')[0]
            df = pd.read_csv(os.path.join(folder, f))
            if 'Unnamed: 0' in df.columns:
                df.rename({'Unnamed: 0': 'metric'}, axis=1, inplace=True)
            df.set_index('metric', inplace=True)
            res = df[[str(n)]].to_dict()
            res[str(n)]['stn_id'] = stn
            nn_results.append(res[str(n)])

        nn_df = pd.DataFrame(nn_results)
        nn_df.set_index('stn_id', inplace=True)
        nn_df.to_csv(ensemble_path, index=True)
        print(f'    ...saved {len(nn_df)} results to {fname}')

     data/results/ensemble_results/10_bits/knn-lstm_1NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_2NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_3NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_4NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_5NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_6NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_7NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_8NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_9NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm_10NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm-lognorm_1NN.csv already exists, skipping
     data/results/ensemble_results/10_bits/knn-lstm-logn

### Concatenate all results into a single data structure for easier plotting and comparison

Compute baseline values to represent the "null" models of using the global mean PMF, and the uniform distributions for all locations.  These are benchmarks to help understand how much value is added by using different models to predict FDCs.

In [15]:
from multiprocessing import Pool

results_dfs = {}
lstm_rev_date = LSTM_ensemble_result_folder.split('_')[-1]
sub_folder = f'lstm_{lstm_rev_date}'
method_results_folder = Path(os.path.join('data', 'results', f'additional_results'))
completed_stns = [c.split('_')[0] for c in os.listdir(os.path.join(results_folder, 'knn'))]
print(f'Found {len(set(completed_stns))} completed stations in {sub_folder} results folder.')

for method in ['parametric', 'lstm', 'knn']:
    print(f'   Loading {method} results')
    method_results_fpath =  method_results_folder / f'{method}_all_results_{bitrate:02d}_bits.csv'
    if regularization_type == 'kde':
        method_results_fpath =  method_results_folder / f'{method}_all_results_kde.csv'
    
    if method == 'lstm':
        rev_date = LSTM_ensemble_result_folder.split('_')[-1]
        method_results_fpath = method_results_folder / f'{method}_all_results_{rev_date}_{bitrate:02d}_bits.csv'
        
    if os.path.exists(method_results_fpath):
        results_dfs[method] = pd.read_csv(method_results_fpath, dtype={'Official_ID': str}).dropna(axis=1, how='all')
    else:
        print(f'   {method} results not found in {method_results_fpath}, loading from individual station files...')
        res_folder = os.path.join(results_folder, method)
        if method == 'lstm':
            res_folder = os.path.join(results_folder, f'{method}_{rev_date}')
        args = [(stn, res_folder, method) for stn in completed_stns]

        with Pool() as pool:
            results_list = pool.map(dpf.load_results, args)

        merged = pd.concat(results_list, ignore_index=True)
        
        bad_dkl = merged[merged['KLD'].isna() | (merged['KLD'] < 0)].copy()
        if not bad_dkl.empty:
            print(f'Warning: {len(bad_dkl)} {method} rows with NaN or negative DKL values.')
            bad_stns = bad_dkl['Official_ID'].values
            raise Exception(f'Results have {len(bad_stns)} NaN or negative DKL values: {bad_stns}')
        method_results = pd.concat(results_list, ignore_index=True)
        results_dfs[method] = method_results
        print(f'   Loaded {len(set(completed_stns))} station results for {method} results')
        method_results.to_csv(method_results_fpath, index=False)


Found 712 completed stations in lstm_20250514 results folder.
   Loading parametric results
   Loading lstm results
   Loading knn results


In [16]:
# format the metrics to align score interpretation (zero better)
for k, r in results_dfs.items():
    # take exponential to express as geometric mean / average multiplicative deviation
    results_dfs[k]['RMSE'] = 100 * (np.exp(results_dfs[k]['RMSE']) - 1) 
    # results_dfs[k]['PB'] = results_dfs[k]['PB'] # express as percentage
    results_dfs[k]['NAE'] = 100 * (1 - results_dfs[k]['VE']) # express as %, 0 is perfect
    results_dfs[k]['NSE'] = 1 - results_dfs[k]['NSE'] # express as 0 is perfect
    results_dfs[k]['KGE'] = 1 - results_dfs[k]['KGE']   # express as 0 is perfect   

In [17]:
def split_knn_label_col(df):
    """kNN results have a label column that needs to be split into multiple columns."""
    # Split the string column
    # Determine format based on length
    if 'MDB' in df.columns:
        df.drop(labels=['MDB'], axis=1, inplace=True)
    # df.rename({'TBV': 'PVB'}, inplace=True)
    split_labels = df['Label'].str.split('_')
    df['n_parts'] = split_labels.str.len()

    assert len(set(df['n_parts'])) == 1, "Not all labels have the same number of parts"

    # Define expected column structures
    # format_a_cols = ["Official_ID", "k", "NN", 'concurrent', 'tree_type', 'dist', 'weighting', 'ensemble_method']
    format_cols = ["Official_ID", "k", "NN", 'tree_type', 'dist', 'ensemble_weight', 'ensemble_method']

    # Subset by format
    df_a = df[df['n_parts'] == len(format_cols)].copy()

    # Split and join with suffix to avoid conflicts
    df_a_split = df_a['Label'].str.split('_', expand=True)
    df_a_split.columns = format_cols
    merged = pd.concat([df_a.reset_index(drop=True), df_a_split.reset_index(drop=True)], axis=1)

    # Drop duplicates (if any) and update
    merged.drop(columns=['NN', 'dist', 'n_parts', 'minYears', 'minOverlapPct'], errors='ignore', inplace=True)
    merged = merged.loc[:, ~merged.columns.duplicated()]
    return merged

In [18]:
parametric_targets = list(set(results_dfs['parametric']['Label'].values))
results_dfs['knn'] = split_knn_label_col(results_dfs['knn'])
knn_formatted_results = results_dfs['knn'].copy()
knn_formatted_fpath = os.path.join('data', 'results', 'additional_results', f'knn_all_results_formatted_{bitrate:02d}_bits.csv')
if regularization_type == 'kde':
    knn_formatted_fpath = os.path.join('data', 'results', 'additional_results', f'knn_all_results_formatted_kde.csv')

knn_formatted_results.to_csv(knn_formatted_fpath, index=False)


### Load the total sample mean PMF

Here we want to pre-compute benchmark performance measures based on the "global" mean PMF and the uniform distribution.  These represent null models to provide context for comparing the value added by using different models to predict FDCs.

In [19]:
# import the mean global PMF
mean_pmf_df_bits = pd.read_csv(f'data/results/sample_distribution_mixture/mean_distribution_{bitrate}bits.csv')
mean_pmf_df_bits.drop(columns=['Unnamed: 0'], inplace=True)
# upsample to the 12 bits over the same range
num_bins = np.log2(len(mean_pmf_df_bits))
mean_pmf_df_bits['pmf'] /= mean_pmf_df_bits['pmf'].sum()

In [20]:
fdc_df = pd.concat([results_dfs['parametric'].dropna(axis=1, how='all'), results_dfs['lstm'].dropna(axis=1, how='all')], axis=0)

# load the reference PMFs
pmf_obs_df = pd.read_csv(baseline_pmf_path)
# log_edges = np.concatenate([mean_pmf_df['left_log_edges'].values[:1], mean_pmf_df['right_log_edges'].values])
log_x_uar = mean_pmf_df_bits['log_x_uar'].values

formatted_fdc_results_fpath = f'data/results/additional_results/formatted_results_by_performance_measure_{bitrate:02d}_bits.csv'
if regularization_type == 'kde':
    formatted_fdc_results_fpath = f'data/results/additional_results/formatted_results_by_performance_measure_kde.csv'
    
if not os.path.exists(formatted_fdc_results_fpath):

    for stn in fdc_df['Official_ID'].unique():
        # adjust log_x to adapt to station-specific zero flow threshold UAR equivalent
        # set all values < zero_flow_uar to the adjusted minimum
        log_zf = 1000.0 * fdc_context.zero_flow_threshold / da_dict[stn]  # convert to L/s/km2
        zero_bin_index = int(np.searchsorted(log_x_uar, float(log_zf), side="right")) - 1
        min_measurable_log_uar = log_x_uar[zero_bin_index]
        min_measurable_uar = np.exp(min_measurable_log_uar)
        
        eval_obj = EvaluationMetrics(bitrate=bitrate, log_x=log_x_uar, min_measurable_log_uar=min_measurable_log_uar)
        
        if stn not in official_ids_to_include:
            continue

        mean_pmf = mean_pmf_df_bits['pmf'].values
        # get the sum of probabilities above the zero flow threshold
        low_flow_prob = mean_pmf[:zero_bin_index].sum()
        adjusted_mean_pmf = np.zeros_like(mean_pmf)
        adjusted_mean_pmf[0] = low_flow_prob
        adjusted_mean_pmf[zero_bin_index:] = mean_pmf[zero_bin_index:].copy()
        # adjusted_mean_pmf /= adjusted_mean_pmf.sum()

        assert np.isclose(mean_pmf.sum(), 1.0), 'Mean PMF does not sum to 1'
        stn_data = StationData(fdc_context, stn)
        prior_adjusted_pmf = stn_data._compute_adjusted_distribution_with_mixed_uniform(adjusted_mean_pmf)

        u = np.ones_like(adjusted_mean_pmf) / len(mean_pmf)

        assert np.isclose(u.sum(), 1.0), f'Prior adjusted PMF does not sum to 1 for station {stn}: {u.sum():.6f}'
        assert np.isclose(prior_adjusted_pmf.sum(), 1.0), f'Adjusted mean PMF does not sum to 1 for station {stn}: {adjusted_mean_pmf.sum():.6f}'
        
        for q_est, label in zip([prior_adjusted_pmf, u], ['Mean_PMF', 'Uniform']):
            new_eval = eval_obj._evaluate_fdc_metrics_from_pmf(q_est, pmf_obs_df[stn].values)
            assert new_eval['kld'] > 0, f'Negative KLD for station {stn} with method {label}: {new_eval["kld"]:.3f}'

            # Prepare a new row with the results for this station
            result_keys = ['kld', 'emd', 'rmse', 'pct_vol_bias', 'mean_abs_pct_error', 'nse', 'kge', 've', 'norm_abs_error']
            df_labels = ['KLD', 'EMD', 'RMSE', 'PB', 'MAPE', 'NSE', 'KGE', 'VE', 'NAE']
            new_row = {dl: new_eval[rk] for rk, dl in zip(result_keys, df_labels)}
            new_row['Official_ID'] = stn
            new_row['Label'] = label

            fdc_mapper = {'RB': 'PB', 'MARE': 'MAPE',}

            # Add missing columns as NaN if needed
            for col in fdc_df.columns:
                if col not in new_row:
                    if col in fdc_mapper:
                        mapped_col = fdc_mapper[col]
                        new_row[col] = new_row[mapped_col]
                        continue
                    new_row[col] = np.nan
                
            # Append the new row to the dataframe
            new_row['RMSE'] = 100 * (np.exp(new_row['RMSE']) - 1)
            new_row['NSE'] = 1 - new_row['NSE']
            new_row['NAE'] = 100 * (1 - new_row['VE'])
            fdc_df = pd.concat([fdc_df, pd.DataFrame([new_row])], ignore_index=True)

    fdc_df.sort_values(by=['Official_ID'], inplace=True)
    fdc_df.reset_index(drop=True, inplace=True)
    fdc_df.to_csv(formatted_fdc_results_fpath, index=False)