In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from loguru import logger
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.utils.collect_results import collect_pb_result, collect_rmse_result, create_df_forecaster_first_stage, create_df_forecaster_second_stage
from source.utils.generate_timestamp import generate_timestamps
from source.simulation.submission_module import submission_forecasters
from source.simulation.buyer_module import prepare_buyer_data
from source.ensemble.stack_generalization.utils.display_results import display_forecasting_metrics
from source.ensemble.combination_scheme.equal_weights import calculate_equal_weights
from source.ensemble.combination_scheme.avg_weights import calculate_weighted_avg
from source.ensemble.combination_scheme.model_selection import run_model_selection
from source.plots.plot_forecasts import plot_forecasts, plot_var_forecasts
from source.plots.display_hypothesis_testing import run_statistical_comparison_analysis
from source.plots.display_metrics import display_table_metrics
from source.ml_engine import create_ensemble_forecasts
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
from collections import defaultdict
import seaborn as sns
from IPython.display import clear_output

In [2]:
from config.simulation_setting import Simulation, WeightedAvg, Stack
from source.simulation.helpers_simulation import process_combination_scheme
from source.utils.session_ml_info import delete_previous_day_pickle
sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params

In [3]:
def calculate_coverage(df_pred_ensemble, col_90, col_10, col_targets):
    """
    Calculates the coverage percentage of target values falling within the
    10th and 90th percentile predictions.
    """
    condition = (df_pred_ensemble[col_10] <= df_pred_ensemble[col_targets]) &\
        (df_pred_ensemble[col_targets] <= df_pred_ensemble[col_90])
    return np.mean(condition)

def average_interval_width(df_pred_ensemble, col_90, col_10):
    """
    Calculates the average width of the 80% prediction interval.
    """
    return np.mean(df_pred_ensemble[col_90] - df_pred_ensemble[col_10])

In [None]:
# set random seed
np.random.seed(sim_params['random_seed'])

# process and concatenate files
files = [sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4'], 
            sim_params['file_5'], sim_params['file_6'], sim_params['file_7'], sim_params['file_8'], 
            sim_params['file_9'], sim_params['file_10'], sim_params['file_11'], sim_params['file_12']]

logger.info(' ')
logger.info(f'Load Files: {files}')

df = process_and_concat_files(files)

# filter data forecasters
df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# replace NaN values
if sim_params['replace_nan']:
    logger.info(' ')
    logger.warning("Replacing NaN values with 0s")
    print(df_filtered.isna().sum())
    df_filtered.fillna(0, inplace=True)

# # set buyer resource name
buyer_resource_name = 'b1r1'

# loss quantile ensemble regressor
lst_rmse_ensemble = []
#loss best model selection
lst_rmse_best_model = []
# loss equal weights scheme
lst_rmse_equal_weights = []
# loss weighted average scheme
lst_rmse_weighted_avg = []
# loss weighted average scheme soft
lst_rmse_weighted_avg_soft = []
# loss baseline day ahead
lst_rmse_baseline_dayahead = []
# loss baseline day ahead 11
lst_rmse_baseline_dayahead11h = []
# loss baseline week ahead
lst_rmse_baseline_week_ahead = []
# loss baseline most recent
if sim_params['most_recent']:
    lst_rmse_baseline_most_recent = []
# loss baseline malicious
if sim_params['malicious']:
    lst_rmse_baseline_malicious = []
# loss baseline noisy
if sim_params['noisy']:
    lst_rmse_baseline_noisy = []

# loss var ensemble regressor
lst_rmse_var_ensemble = []
# loss var best model selection
lst_rmse_var_best_model = []
# loss var equal weights scheme
lst_rmse_var_equal_weights = []
# loss var weighted average scheme
lst_rmse_var_weighted_avg = []
# loss var weighted average scheme soft
lst_rmse_var_weighted_avg_soft = []
# loss var baseline day ahead
lst_rmse_var_baseline_dayahead = []
# loss var baseline day ahead 11
lst_rmse_var_baseline_dayahead11h = []
# loss var baseline week ahead
lst_rmse_var_baseline_week_ahead = []
# loss var baseline most recent
if sim_params['most_recent']:
    lst_rmse_var_baseline_most_recent = []
    lst_pb_most_recent_q10 = []
    lst_pb_most_recent_q90 = []
# loss var baseline malicious
if sim_params['malicious']:
    lst_rmse_var_baseline_malicious = []
    lst_pb_malicious_q10 = []
    lst_pb_malicious_q90 = []
# loss var baseline noisy
if sim_params['noisy']:
    lst_rmse_var_baseline_noisy = []
    lst_pb_noisy_q10 = []
    lst_pb_noisy_q90 = []

# loss quantile ensemble regressor
lst_pb_ensemble_q10 = []
lst_pb_ensemble_q90 = []
# loss quantile best model selection
lst_pb_best_model_q10 = []
lst_pb_best_model_q90 = []
# loss avg weights scheme
lst_pb_weighted_avg_q10 = []
lst_pb_weighted_avg_q90 = []
# loss soft avg weights scheme
lst_pb_weighted_avg_soft_q10 = []
lst_pb_weighted_avg_soft_q90 = []
# loss equal weighted scheme
lst_pb_equal_weights_q10 = []
lst_pb_equal_weights_q90 = []
# loss baseline day ahead
lst_pb_dayahead_q10 = []
lst_pb_dayahead_q90  = []
# loss baseline day ahead 11
lst_pb_dayahead_11h_q10 = []
lst_pb_dayahead_11h_q90 = []
# loss baseline week ahead
lst_pb_week_ahead_q10 = []
lst_pb_week_ahead_q90 = []

# coverage probability
lst_coverage_ensemble = []
lst_coverage_var_ensemble = []
lst_coverage_best_model = []
lst_coverage_equal_weights = []
lst_coverage_weighted_avg = []
lst_coverage_weighted_avg_soft = []
lst_coverage_baseline_dayahead = []
lst_coverage_baseline_dayahead11h = []
lst_coverage_baseline_week_ahead = []
if sim_params['most_recent']:
    lst_coverage_baseline_most_recent = []
if sim_params['malicious']:
    lst_coverage_baseline_malicious = []
if sim_params['noisy']:
    lst_coverage_baseline_noisy = []


# average interval width
lst_avg_width_ensemble = []
lst_avg_width_ensemble_var = []
lst_avg_width_best_model = []
lst_avg_width_equal_weights = []
lst_avg_width_weighted_avg = []
lst_avg_width_weighted_avg_soft = []
lst_avg_width_baseline_dayahead = []
lst_avg_width_baseline_dayahead11h = []
lst_avg_width_baseline_week_ahead = []
if sim_params['most_recent']:
    lst_avg_width_baseline_most_recent = []
if sim_params['malicious']:
    lst_avg_width_baseline_malicious = []
if sim_params['noisy']:
    lst_avg_width_baseline_noisy = []


# remove previous day pickle file
logger.info(' ')
delete_previous_day_pickle()
logger.opt(colors = True).warning('previous day pickle file removed')

# final contributions forecasters
avg_permutation_contributions = defaultdict(dict)
avg_shapley_contributions = defaultdict(dict)
avg_coefficients_contributions = defaultdict(dict)
avg_weighted_avg_contributions = defaultdict(dict)
avg_weighted_soft_avg_contributions = defaultdict(dict)

# Collect Ramp Alarm
list_ramp_alarm = []

# loop over test days
for i in tqdm(range(sim_params['num_test_days']), desc='Testing Days'):

    # generate timestamps train and prediction
    start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

    if i >= ens_params['day_calibration'] and ens_params['conformalized_qr']:
        day_calibration = ens_params['day_calibration']
        start_training_timestamp = start_training_timestamp - pd.Timedelta(f'{day_calibration}day')

    logger.info(' ')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors=True).info(f'<blue>Start training: {start_training_timestamp} - End training: {end_training_timestamp}</blue>')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

    day_previous_start_prediction_timestamp = start_prediction_timestamp - pd.Timedelta('1day')
    df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
    df_test = df_filtered[df_filtered.index.to_series().between(day_previous_start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]
                                                                                                                            
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of training data: {len(df_train)} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of test data: {len(df_test)} </blue>')

    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

# # ----------------------------> FORECASTERS PREDICTION SUBMISSION <----------------------------

    df_market, df_train, df_test = submission_forecasters(sim_params, df_train, df_test)   

# # ----------------------------> BUYERS DATA <----------------------------

    df_buyer, forecast_range = prepare_buyer_data(df_train, df_test, start_prediction_timestamp, end_prediction_timestamp)

# # ----------------------------> PREDICO PLATFORM ML ENGINE <----------------------------

# # ----------------------------> ENSEMBLE FORECASTS <----------------------------

    results_ensemble_forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                                            df_buyer=df_buyer, 
                                                            df_market=df_market,
                                                            end_training_timestamp=end_training_timestamp,
                                                            forecast_range = forecast_range,
                                                            challenge_usecase='simulation',
                                                            simulation=True)
    

    if sim_params['baselines_comparison']:

        # # # ----------------------------> COMBINATION SCHEME DATA <----------------------------

        df_train_norm, day_previous_df_test_norm, day_previous_df_test_norm_var = process_combination_scheme(df_train, df_test, end_training_timestamp, day_previous_start_prediction_timestamp)
        
        df_pred_ensemble = results_ensemble_forecasts['wind_power']['predictions']   
        df_pred_ensemble.rename(columns={'q50_' + 'b1r1': '50_predictions', 'q10_' + 'b1r1': '10_predictions', 'q90_' + 'b1r1': '90_predictions', 'norm_' + 'b1r1': 'targets'}, inplace=True)
        df_pred_ensemble['targets'] = day_previous_df_test_norm['norm_measured'].values[-96:]
        df_var_ensemble = results_ensemble_forecasts['wind_power_ramp']['predictions']
        df_var_ensemble.rename(columns={'q50_' + 'b1r1': '50_var_predictions', 'q10_' + 'b1r1': '10_var_predictions', 'q90_' + 'b1r1': '90_var_predictions', 'targets': 'targets'}, inplace=True)
        df_var_ensemble['targets'] = day_previous_df_test_norm_var['norm_measured'].values[-96:]
        
        df_test_ensemble = pd.DataFrame(df_pred_ensemble['targets']) 
        df_2stage_test = pd.DataFrame(df_var_ensemble['targets'])

    # # ----------------------------> PERFORMANCE METRICS <----------------------------

    ## ----------------------------> WIND POWER VARIABILITY - PERFORMANCE METRICS <----------------------------

        # performance variability ensemble
        lst_rmse_var_ensemble, rmse_var_ensemble = collect_rmse_result(df_var_ensemble, '50_var_predictions', lst_rmse_var_ensemble)
        # coverage ensemble variability
        coverage_var_ensemble = calculate_coverage(df_var_ensemble, '90_var_predictions', '10_var_predictions', 'targets')
        lst_coverage_var_ensemble.append(round(coverage_var_ensemble, 3))
        # average interval width
        avg_width_ensemble_var = average_interval_width(df_var_ensemble, '90_var_predictions', '10_var_predictions')
        lst_avg_width_ensemble_var.append(round(avg_width_ensemble_var, 3))

        # performance best model selection
        df_best_model_var = run_model_selection(sim_params, df_train_norm , day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp , window_size_valid = weight_avg_params['window_size_valid'], var=True)
        lst_rmse_var_best_model, rmse_var_best_model = collect_rmse_result(df_best_model_var, 'mean_prediction', lst_rmse_var_best_model)

        # performance weighted average
        df_weighted_avg_var, dict_weights_var = calculate_weighted_avg(sim_params, df_train_norm , day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp , window_size_valid=weight_avg_params['window_size_valid'], var=True)
        lst_rmse_var_weighted_avg, rmse_var_weighted_avg = collect_rmse_result(df_weighted_avg_var, 'mean_prediction', lst_rmse_var_weighted_avg)

        # performance weighted avg soft
        df_weighted_avg_soft_var, dict_weights_soft_var = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'], var=True, norm='softmax')
        lst_rmse_var_weighted_avg_soft, rmse_var_weighted_avg_soft = collect_rmse_result(df_weighted_avg_soft_var, 'mean_prediction', lst_rmse_var_weighted_avg_soft)

        # performance equal weights
        df_equal_weights_var = calculate_equal_weights(day_previous_df_test_norm_var, start_prediction_timestamp)
        lst_rmse_var_equal_weights, rmse_var_equal_weights = collect_rmse_result(df_equal_weights_var, 'mean_prediction', lst_rmse_var_equal_weights)

        # performance day-ahead
        df_dayahead_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'dayahead', start_prediction_timestamp)
        lst_rmse_var_baseline_dayahead, rmse_var_dayahead = collect_rmse_result(df_dayahead_var, 'norm_dayaheadforecast', lst_rmse_var_baseline_dayahead)

        # performance day-ahead-11h
        df_dayahead_11h_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'dayahead11h', start_prediction_timestamp)
        lst_rmse_var_baseline_dayahead11h, rmse_var_dayahead_11h = collect_rmse_result(df_dayahead_11h_var, 'norm_dayahead11hforecast', lst_rmse_var_baseline_dayahead11h)

        # performance week ahead
        df_week_ahead_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'weekahead', start_prediction_timestamp)
        lst_rmse_var_baseline_week_ahead, rmse_var_week_ahead = collect_rmse_result(df_week_ahead_var, 'norm_weekaheadforecast', lst_rmse_var_baseline_week_ahead)

        # performance most recent
        if sim_params['most_recent']:
            # performance most recent
            df_most_recent_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'mostrecent', start_prediction_timestamp)
            lst_rmse_var_baseline_most_recent, rmse_var_most_recent = collect_rmse_result(df_most_recent_var, 'norm_mostrecentforecast', lst_rmse_var_baseline_most_recent)

        # performance malicious
        if sim_params['malicious']:
            # performance malicious
            df_malicious_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'malicious', start_prediction_timestamp)
            lst_rmse_var_baseline_malicious, rmse_var_malicious = collect_rmse_result(df_malicious_var, 'norm_maliciousforecast', lst_rmse_var_baseline_malicious)

        # performance noisy
        if sim_params['noisy']:
            # performance noisy
            df_noisy_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'noisy', start_prediction_timestamp)
            lst_rmse_var_baseline_noisy, rmse_var_noisy = collect_rmse_result(df_noisy_var, 'norm_noisyforecast', lst_rmse_var_baseline_noisy)

    ## ----------------------------> WIND POWER - PERFORMANCE METRICS <----------------------------

        # performance ensemble
        lst_rmse_ensemble, rmse_ensemble = collect_rmse_result(df_pred_ensemble, '50_predictions', lst_rmse_ensemble)
        lst_pb_ensemble_q10, lst_pb_ensemble_q90, pinball_ensemble_q10, pinball_ensemble_q90 = collect_pb_result(df_pred_ensemble, 
                                                                                                                '10_predictions', '90_predictions', 
                                                                                                                lst_pb_ensemble_q10, lst_pb_ensemble_q90)
        # coverage ensemble
        coverage_ensemble = calculate_coverage(df_pred_ensemble, '90_predictions', '10_predictions', 'targets')
        lst_coverage_ensemble.append(round(coverage_ensemble, 3))
        # average interval width
        avg_width_ensemble = average_interval_width(df_pred_ensemble, '90_predictions', '10_predictions')
        lst_avg_width_ensemble.append(round(avg_width_ensemble, 3))

        # performance best model selection
        df_best_model = run_model_selection(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'])
        
        lst_rmse_best_model, rmse_best_model = collect_rmse_result(df_best_model, 'mean_prediction', lst_rmse_best_model)
        lst_pb_best_model_q10, lst_pb_best_model_q90, pinball_best_model_q10, pinball_best_model_q90 = collect_pb_result(df_best_model,
                                                                                                                        'Q10', 'Q90',
                                                                                                                        lst_pb_best_model_q10, lst_pb_best_model_q90)
        # coverage best model selection
        coverage_best_model = calculate_coverage(df_best_model, 'Q90', 'Q10', 'targets')
        lst_coverage_best_model.append(round(coverage_best_model, 3))
        # average interval width
        avg_width_best_model = average_interval_width(df_best_model, 'Q90', 'Q10')
        lst_avg_width_best_model.append(round(avg_width_best_model, 3))

        # performance weighted average
        df_weighted_avg, dict_weights = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'])
        lst_rmse_weighted_avg, rmse_weighted_avg = collect_rmse_result(df_weighted_avg, 'mean_prediction', lst_rmse_weighted_avg)
        lst_pb_weighted_avg_q10, lst_pb_weighted_avg_q90, pinball_weighted_avg_q10, pinball_weighted_avg_q90 = collect_pb_result(df_weighted_avg, 
                                                                                                                                    'Q10', 'Q90', 
                                                                                                                                    lst_pb_weighted_avg_q10, lst_pb_weighted_avg_q90)
        # coverage weighted average
        coverage_weighted_avg = calculate_coverage(df_weighted_avg, 'Q90', 'Q10', 'targets')
        lst_coverage_weighted_avg.append(round(coverage_weighted_avg, 3))
        # average interval width
        avg_width_weighted_avg = average_interval_width(df_weighted_avg, 'Q90', 'Q10')
        lst_avg_width_weighted_avg.append(round(avg_width_weighted_avg, 3))

        # performance weighted avg soft
        df_weighted_avg_soft, dict_weights_soft = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'], norm='softmax')
        lst_rmse_weighted_avg_soft, rmse_weighted_avg_soft = collect_rmse_result(df_weighted_avg_soft, 'mean_prediction', lst_rmse_weighted_avg_soft)
        lst_pb_weighted_avg_soft_q10, lst_pb_weighted_avg_soft_q90, pinball_weighted_avg_soft_q10, pinball_weighted_avg_soft_q90 = collect_pb_result(df_weighted_avg_soft,
                                                                                                                                                    'Q10', 'Q90', 
                                                                                                                                                    lst_pb_weighted_avg_soft_q10, lst_pb_weighted_avg_soft_q90)
        # coverage weighted avg soft
        coverage_weighted_avg_soft = calculate_coverage(df_weighted_avg_soft, 'Q90', 'Q10', 'targets')
        lst_coverage_weighted_avg_soft.append(round(coverage_weighted_avg_soft, 3))
        # average interval width
        avg_width_weighted_avg_soft = average_interval_width(df_weighted_avg_soft, 'Q90', 'Q10')
        lst_avg_width_weighted_avg_soft.append(round(avg_width_weighted_avg_soft, 3))
        
        # performance equal weights
        df_equal_weights = calculate_equal_weights(day_previous_df_test_norm, start_prediction_timestamp)
        lst_rmse_equal_weights, rmse_equal_weights = collect_rmse_result(df_equal_weights, 'mean_prediction', lst_rmse_equal_weights)
        lst_pb_equal_weights_q10, lst_pb_equal_weights_q90, pinball_equal_weights_q10, pinball_equal_weights_q90 = collect_pb_result(df_equal_weights, 
                                                                                                                                            'Q10', 'Q90', 
                                                                                                                                            lst_pb_equal_weights_q10, lst_pb_equal_weights_q90)
        # coverage equal weights
        coverage_equal_weights = calculate_coverage(df_equal_weights, 'Q90', 'Q10', 'targets')
        lst_coverage_equal_weights.append(round(coverage_equal_weights, 3))
        # average interval width
        avg_width_equal_weights = average_interval_width(df_equal_weights, 'Q90', 'Q10')
        lst_avg_width_equal_weights.append(round(avg_width_equal_weights, 3))


        # performance day-ahead
        df_dayahead = create_df_forecaster_first_stage(day_previous_df_test_norm, 'dayahead', start_prediction_timestamp)
        lst_rmse_baseline_dayahead, rmse_dayahead = collect_rmse_result(df_dayahead, 'norm_dayaheadforecast', lst_rmse_baseline_dayahead)
        lst_pb_dayahead_q10, lst_pb_dayahead_q90, pinball_dayahead_q10, pinball_dayahead_q90 = collect_pb_result(df_dayahead, 
                                                                                                                    'norm_dayaheadconfidence10', 'norm_dayaheadconfidence90', 
                                                                                                                    lst_pb_dayahead_q10, lst_pb_dayahead_q90)
        # coverage day-ahead
        coverage_dayahead = calculate_coverage(df_dayahead, 'norm_dayaheadconfidence90', 'norm_dayaheadconfidence10', 'targets')
        lst_coverage_baseline_dayahead.append(round(coverage_dayahead, 3))
        # average interval width
        avg_width_dayahead = average_interval_width(df_dayahead, 'norm_dayaheadconfidence90', 'norm_dayaheadconfidence10')
        lst_avg_width_baseline_dayahead.append(round(avg_width_dayahead, 3))


        # performance day-ahead-11h
        df_dayahead_11h = create_df_forecaster_first_stage(day_previous_df_test_norm, 'dayahead11h', start_prediction_timestamp)
        lst_rmse_baseline_dayahead11h, rmse_dayahead_11h = collect_rmse_result(df_dayahead_11h, 'norm_dayahead11hforecast', lst_rmse_baseline_dayahead11h)
        lst_pb_dayahead_11h_q10, lst_pb_dayahead_11h_q90, pinball_dayahead_11h_q10, pinball_dayahead_11h_q90 = collect_pb_result(df_dayahead_11h, 
                                                                                                                                    'norm_dayahead11hconfidence10', 'norm_dayahead11hconfidence90', 
                                                                                                                                    lst_pb_dayahead_11h_q10, lst_pb_dayahead_11h_q90)
        # coverage day-ahead-11h
        coverage_dayahead_11h = calculate_coverage(df_dayahead_11h, 'norm_dayahead11hconfidence90', 'norm_dayahead11hconfidence10', 'targets')
        lst_coverage_baseline_dayahead11h.append(round(coverage_dayahead_11h, 3))
        # average interval width
        avg_width_dayahead_11h = average_interval_width(df_dayahead_11h, 'norm_dayahead11hconfidence90', 'norm_dayahead11hconfidence10')
        lst_avg_width_baseline_dayahead11h.append(round(avg_width_dayahead_11h, 3))

        # performance week ahead
        df_week_ahead = create_df_forecaster_first_stage(day_previous_df_test_norm, 'weekahead', start_prediction_timestamp)
        lst_rmse_baseline_week_ahead, rmse_week_ahead = collect_rmse_result(df_week_ahead, 'norm_weekaheadforecast', lst_rmse_baseline_week_ahead)
        lst_pb_week_ahead_q10, lst_pb_week_ahead_q90, pinball_week_ahead_q10, pinball_week_ahead_q90 = collect_pb_result(df_week_ahead, 
                                                                                                                            'norm_weekaheadconfidence10', 'norm_weekaheadconfidence90', 
                                                                                                                            lst_pb_week_ahead_q10, lst_pb_week_ahead_q90)
        # coverage week ahead
        coverage_week_ahead = calculate_coverage(df_week_ahead, 'norm_weekaheadconfidence90', 'norm_weekaheadconfidence10', 'targets')
        lst_coverage_baseline_week_ahead.append(round(coverage_week_ahead, 3))
        # average interval width
        avg_width_week_ahead = average_interval_width(df_week_ahead, 'norm_weekaheadconfidence90', 'norm_weekaheadconfidence10')
        lst_avg_width_baseline_week_ahead.append(round(avg_width_week_ahead, 3))

        # performance most recent
        if sim_params['most_recent']:
            df_most_recent = create_df_forecaster_first_stage(day_previous_df_test_norm, 'mostrecent', start_prediction_timestamp)
            lst_rmse_baseline_most_recent, rmse_most_recent = collect_rmse_result(df_most_recent, 'norm_mostrecentforecast', lst_rmse_baseline_most_recent)
            lst_pb_most_recent_q10, lst_pb_most_recent_q90, pinball_most_recent_q10, pinball_most_recent_q90 = collect_pb_result(df_most_recent, 
                                                                                                                                    'norm_mostrecentconfidence10', 'norm_mostrecentconfidence90', 
                                                                                                                                    lst_pb_most_recent_q10, lst_pb_most_recent_q90)
            # coverage most recent
            coverage_most_recent = calculate_coverage(df_most_recent, 'norm_mostrecentconfidence90', 'norm_mostrecentconfidence10', 'targets')
            lst_coverage_baseline_most_recent.append(round(coverage_most_recent, 3))
            # average interval width
            avg_width_most_recent = average_interval_width(df_most_recent, 'norm_mostrecentconfidence90', 'norm_mostrecentconfidence10')
            lst_avg_width_baseline_most_recent.append(round(avg_width_most_recent, 3))

        # performance malicious cheat
        if sim_params['malicious']:
            df_malicious = create_df_forecaster_first_stage(day_previous_df_test_norm, 'malicious', start_prediction_timestamp)
            lst_rmse_baseline_malicious, rmse_malicious = collect_rmse_result(df_malicious, 'norm_maliciousforecast', lst_rmse_baseline_malicious)
            lst_pb_malicious_q10, lst_pb_malicious_q90, pinball_malicious_q10, pinball_malicious_q90 = collect_pb_result(df_malicious, 
                                                                                                                            'norm_maliciousconfidence10', 'norm_maliciousconfidence90', 
                                                                                                                            lst_pb_malicious_q10, lst_pb_malicious_q90)
            # coverage malicious
            coverage_malicious = calculate_coverage(df_malicious, 'norm_maliciousconfidence90', 'norm_maliciousconfidence10', 'targets')
            lst_coverage_baseline_malicious.append(round(coverage_malicious, 3))
            # average interval width
            avg_width_malicious = average_interval_width(df_malicious, 'norm_maliciousconfidence90', 'norm_maliciousconfidence10')
            lst_avg_width_baseline_malicious.append(round(avg_width_malicious, 3))

        # performance noisy
        if sim_params['noisy']:
            df_noisy = create_df_forecaster_first_stage(day_previous_df_test_norm, 'noisy', start_prediction_timestamp)
            lst_rmse_baseline_noisy, rmse_noisy = collect_rmse_result(df_noisy, 'norm_noisyforecast', lst_rmse_baseline_noisy)
            lst_pb_noisy_q10, lst_pb_noisy_q90, pinball_noisy_q10, pinball_noisy_q90 = collect_pb_result(df_noisy, 
                                                                                                            'norm_noisyconfidence10', 'norm_noisyconfidence90', 
                                                                                                            lst_pb_noisy_q10, lst_pb_noisy_q90)
            # coverage noisy
            coverage_noisy = calculate_coverage(df_noisy, 'norm_noisyconfidence90', 'norm_noisyconfidence10', 'targets')
            lst_coverage_baseline_noisy.append(round(coverage_noisy, 3))
            # average interval width
            avg_width_noisy = average_interval_width(df_noisy, 'norm_noisyconfidence90', 'norm_noisyconfidence10')
            lst_avg_width_baseline_noisy.append(round(avg_width_noisy, 3))
            
        # plot forecasts
        if ens_params['plt_wind_power_ensemble']:
            plot_forecasts(df_pred_ensemble, df_test_ensemble, list_wind_ramps=[], title=f'Wind Power Forecasting - coverage {coverage_ensemble} - avg width {avg_width_ensemble}')

        # plot variability forecast results
        if ens_params['plt_wind_power_variability_ensemble']:
            plot_var_forecasts(df_var_ensemble, df_2stage_test, list_wind_ramps=[], title=f'Wind Power Variability Forecasting - coverage {coverage_var_ensemble} - avg width {avg_width_ensemble_var}')

        ## ----------------------------> DISPLAY METRICS <----------------------------
        if sim_params['display_metrics']:
            results_metrics = {'ensemble': {'rmse': rmse_ensemble, 
                                            'pb10': pinball_ensemble_q10, 
                                            'pb90': pinball_ensemble_q90, 
                                            'rmse_var': rmse_var_ensemble},
                                'best_model': {'rmse': rmse_best_model,
                                                'pb10': pinball_best_model_q10, 
                                                'pb90': pinball_best_model_q90, 
                                                'rmse_var': rmse_var_best_model},
                                'weighted_avg': {'rmse': rmse_weighted_avg, 
                                                'pb10': pinball_weighted_avg_q10, 
                                                'pb90': pinball_weighted_avg_q90, 
                                                'rmse_var': rmse_var_weighted_avg},
                                'weighted_avg_soft': {'rmse': rmse_weighted_avg_soft, 
                                                    'pb10': pinball_weighted_avg_soft_q10, 
                                                    'pb90': pinball_weighted_avg_soft_q90, 
                                                    'rmse_var': rmse_var_weighted_avg_soft},
                                'equal_weights': {'rmse': rmse_equal_weights, 
                                                'pb10': pinball_equal_weights_q10, 
                                                'pb90': pinball_equal_weights_q90, 
                                                'rmse_var': rmse_var_equal_weights},
                                'day_ahead': {'rmse': rmse_dayahead, 
                                            'pb10': pinball_dayahead_q10, 
                                            'pb90': pinball_dayahead_q90, 
                                            'rmse_var': rmse_var_dayahead},
                                'day_ahead_11h': {'rmse': rmse_dayahead_11h, 
                                                'pb10': pinball_dayahead_11h_q10, 
                                                'pb90': pinball_dayahead_11h_q90, 
                                                'rmse_var': rmse_var_dayahead_11h},
                                'week_ahead': {'rmse': rmse_week_ahead, 
                                            'pb10': pinball_week_ahead_q10, 
                                            'pb90': pinball_week_ahead_q90, 
                                            'rmse_var': rmse_var_week_ahead}
                                }
            if sim_params['most_recent']:
                results_metrics['most_recent'] = {'rmse': rmse_most_recent, 
                                                'pb10': pinball_most_recent_q10, 
                                                'pb90': pinball_most_recent_q90, 
                                                'rmse_var': rmse_var_most_recent}
            if sim_params['malicious']:
                results_metrics['malicious'] = {'rmse': rmse_malicious, 
                                                        'pb10': pinball_malicious_q10, 
                                                        'pb90': pinball_malicious_q90, 
                                                        'rmse_var': rmse_var_malicious}
            if sim_params['noisy']:
                results_metrics['noisy'] = {'rmse': rmse_noisy, 
                                                'pb10': pinball_noisy_q10, 
                                                'pb90': pinball_noisy_q90, 
                                                'rmse_var': rmse_var_noisy}
            
            display_forecasting_metrics(sim_params=sim_params, ens_params=ens_params, dict_metrics = results_metrics)


    #Clear output
    clear_output(wait=True)

    # import time
    # time.sleep(3)

In [None]:
# most recent
if not sim_params['most_recent']:
    lst_rmse_baseline_most_recent = None
    lst_pb_most_recent_q10 = None
    lst_pb_most_recent_q90 = None
    lst_rmse_var_baseline_most_recent = None
    
# malicious
if not sim_params['malicious']:
    lst_rmse_baseline_malicious = None
    lst_pb_malicious_q10 = None
    lst_pb_malicious_q90 = None
    lst_rmse_var_baseline_malicious = None

# noisy
if not sim_params['noisy']:
    lst_rmse_baseline_noisy = None
    lst_pb_noisy_q10 = None
    lst_pb_noisy_q90 = None
    lst_rmse_var_baseline_noisy = None

# plot statistical comparison q50
title1='RMSE-based Statistical Significance'
title2='RMSE-based Statistical Comparison: critical difference diagram of ranks'
data_q50, avg_rank_q50 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                                lst_rmse_ensemble,
                                                                lst_rmse_best_model,
                                                                lst_rmse_equal_weights, 
                                                                lst_rmse_weighted_avg,
                                                                lst_rmse_weighted_avg_soft,
                                                                lst_rmse_baseline_dayahead, 
                                                                lst_rmse_baseline_dayahead11h, 
                                                                lst_rmse_baseline_week_ahead,
                                                                lst_rmse_baseline_most_recent,
                                                                lst_rmse_baseline_malicious,
                                                                lst_rmse_baseline_noisy,
                                                                title1, title2)
# plot statistical comparison q10
title1 = 'Q10 Pinball loss-based Statistical Significance'
title2 = 'Q10 Pinball loss-based Statistical Comparison: critical difference diagram of ranks'
data_q10, avg_rank_q10 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                            lst_pb_ensemble_q10, 
                                                            lst_pb_best_model_q10,
                                                            lst_pb_equal_weights_q10, 
                                                            lst_pb_weighted_avg_q10, 
                                                            lst_pb_weighted_avg_soft_q10,
                                                            lst_pb_dayahead_q10, 
                                                            lst_pb_dayahead_11h_q10, 
                                                            lst_pb_week_ahead_q10,
                                                            lst_pb_most_recent_q10,
                                                            lst_pb_malicious_q10,
                                                            lst_pb_noisy_q10,
                                                            title1, title2)
# plot statistical comparison q90
title1 = 'Q90 Pinball loss-based Statistical Significance'
title2 = 'Q90 Pinball loss-based Statistical Comparison: critical difference diagram of ranks'
data_q90, avg_rank_q90 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                            lst_pb_ensemble_q90,
                                                            lst_pb_best_model_q90,
                                                            lst_pb_equal_weights_q90, 
                                                            lst_pb_weighted_avg_q90,
                                                            lst_pb_weighted_avg_soft_q90, 
                                                            lst_pb_dayahead_q90, 
                                                            lst_pb_dayahead_11h_q90, 
                                                            lst_pb_week_ahead_q90,
                                                            lst_pb_most_recent_q90,
                                                            lst_pb_malicious_q90,
                                                            lst_pb_noisy_q90,
                                                            title1, title2)
# plot statistical comparison variability
title1 = 'RMSE-based Statistical Significance'
title2 = 'RMSE-based Statistical Comparison: critical difference diagram of ranks'
data_q50_var, avg_rank_q50_var = run_statistical_comparison_analysis(ens_params['var_model_type'],
                                                                lst_rmse_var_ensemble,
                                                                lst_rmse_var_best_model, 
                                                                lst_rmse_var_equal_weights, 
                                                                lst_rmse_var_weighted_avg,
                                                                lst_rmse_var_weighted_avg_soft, 
                                                                lst_rmse_var_baseline_dayahead, 
                                                                lst_rmse_var_baseline_dayahead11h, 
                                                                lst_rmse_var_baseline_week_ahead,
                                                                lst_rmse_var_baseline_most_recent,
                                                                lst_rmse_var_baseline_malicious,
                                                                lst_rmse_var_baseline_noisy,
                                                                title1, title2)

In [None]:
# Display the styled DataFrame
dfs = [data_q10, data_q50, data_q90, data_q50_var]
prefixes = ['Q10', 'Q50', 'Q90', 'Q50_var']
result, styled_result = display_table_metrics(dfs, prefixes)
styled_result

In [None]:
# compute percentage of improvement of the best
perc_improvement_df = (result/np.min(result, axis=0)-1)*100
perc_improvement_df.T

In [None]:
from scipy.stats import sem
import matplotlib.pyplot as plt

# compute mean rmse and se
mean_rmse_ensemble = np.mean(lst_rmse_ensemble)
mean_rmse_best_model = np.mean(lst_rmse_best_model)
mean_rmse_equal_weights = np.mean(lst_rmse_equal_weights)
mean_rmse_weighted_avg = np.mean(lst_rmse_weighted_avg)
mean_rmse_weighted_avg_soft = np.mean(lst_rmse_weighted_avg_soft)
mean_rmse_baseline_dayahead = np.mean(lst_rmse_baseline_dayahead)
mean_rmse_baseline_dayahead11h = np.mean(lst_rmse_baseline_dayahead11h)
mean_rmse_baseline_week_ahead = np.mean(lst_rmse_baseline_week_ahead)


se_rmse_ensemble = 1.96*sem(lst_rmse_ensemble)
se_rmse_best_model = 1.96*sem(lst_rmse_best_model)
se_rmse_equal_weights = 1.96*sem(lst_rmse_equal_weights)
se_rmse_weighted_avg = 1.96*sem(lst_rmse_weighted_avg)
se_rmse_weighted_avg_soft = 1.96*sem(lst_rmse_weighted_avg_soft)
se_rmse_baseline_dayahead = 1.96*sem(lst_rmse_baseline_dayahead)
se_rmse_baseline_dayahead11h = 1.96*sem(lst_rmse_baseline_dayahead11h)
se_rmse_baseline_week_ahead = 1.96*sem(lst_rmse_baseline_week_ahead)

# plot seaborn barplot of mean rmse with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean RMSE': [mean_rmse_ensemble, mean_rmse_best_model, mean_rmse_equal_weights, mean_rmse_weighted_avg, mean_rmse_weighted_avg_soft, mean_rmse_baseline_dayahead, mean_rmse_baseline_dayahead11h, mean_rmse_baseline_week_ahead],
        'SE RMSE': [se_rmse_ensemble, se_rmse_best_model, se_rmse_equal_weights, se_rmse_weighted_avg, se_rmse_weighted_avg_soft, se_rmse_baseline_dayahead, se_rmse_baseline_dayahead11h, se_rmse_baseline_week_ahead]}
df = pd.DataFrame(data)
sns.barplot(x='Mean RMSE', y='Forecaster', data=df, alpha=0.5)
# add error bars
plt.errorbar(df['Mean RMSE'], df['Forecaster'], xerr=df['SE RMSE'], fmt='o', label='95% CI',)
# plot legend
plt.legend()
plt.title('RMSE')
plt.show()

# compute mean pinball loss and se quantile 10
mean_pb_ensemble_q10 = np.mean(lst_pb_ensemble_q10)
mean_pb_best_model_q10 = np.mean(lst_pb_best_model_q10)
mean_pb_equal_weights_q10 = np.mean(lst_pb_equal_weights_q10)
mean_pb_weighted_avg_q10 = np.mean(lst_pb_weighted_avg_q10)
mean_pb_weighted_avg_soft_q10 = np.mean(lst_pb_weighted_avg_soft_q10)
mean_pb_baseline_dayahead_q10 = np.mean(lst_pb_dayahead_q10)
mean_pb_baseline_dayahead11h_q10 = np.mean(lst_pb_dayahead_11h_q10)
mean_pb_baseline_week_ahead_q10 = np.mean(lst_pb_week_ahead_q10)


se_pb_ensemble_q10 = 1.96*sem(lst_pb_ensemble_q10)
se_pb_best_model_q10 = 1.96*sem(lst_pb_best_model_q10)
se_pb_equal_weights_q10 = 1.96*sem(lst_pb_equal_weights_q10)
se_pb_weighted_avg_q10 = 1.96*sem(lst_pb_weighted_avg_q10)
se_pb_weighted_avg_soft_q10 = 1.96*sem(lst_pb_weighted_avg_soft_q10)
se_pb_baseline_dayahead_q10 = 1.96*sem(lst_pb_dayahead_q10)
se_pb_baseline_dayahead11h_q10 = 1.96*sem(lst_pb_dayahead_11h_q10)
se_pb_baseline_week_ahead_q10 = 1.96*sem(lst_pb_week_ahead_q10)

# plot seaborn barplot of mean pinball loss with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean PB Q10': [mean_pb_ensemble_q10, mean_pb_best_model_q10, mean_pb_equal_weights_q10, mean_pb_weighted_avg_q10, mean_pb_weighted_avg_soft_q10, mean_pb_baseline_dayahead_q10, mean_pb_baseline_dayahead11h_q10, mean_pb_baseline_week_ahead_q10],
        'SE PB Q10': [se_pb_ensemble_q10, se_pb_best_model_q10, se_pb_equal_weights_q10, se_pb_weighted_avg_q10, se_pb_weighted_avg_soft_q10, se_pb_baseline_dayahead_q10, se_pb_baseline_dayahead11h_q10, se_pb_baseline_week_ahead_q10]}
df = pd.DataFrame(data)
sns.barplot(x='Mean PB Q10', y='Forecaster', data=df, alpha=0.5)
# add error bars
plt.errorbar(df['Mean PB Q10'], df['Forecaster'], xerr=df['SE PB Q10'], fmt='o', label='95% CI',)
# plot legend
plt.legend()
plt.title('Pinball Loss Q10')
plt.show()

# compute mean pinball loss and se quantile 90
mean_pb_ensemble_q90 = np.mean(lst_pb_ensemble_q90)
mean_pb_best_model_q90 = np.mean(lst_pb_best_model_q90)
mean_pb_equal_weights_q90 = np.mean(lst_pb_equal_weights_q90)
mean_pb_weighted_avg_q90 = np.mean(lst_pb_weighted_avg_q90)
mean_pb_weighted_avg_soft_q90 = np.mean(lst_pb_weighted_avg_soft_q90)
mean_pb_baseline_dayahead_q90 = np.mean(lst_pb_dayahead_q90)
mean_pb_baseline_dayahead11h_q90 = np.mean(lst_pb_dayahead_11h_q90)
mean_pb_baseline_week_ahead_q90 = np.mean(lst_pb_week_ahead_q90)


se_pb_ensemble_q90 = 1.96*sem(lst_pb_ensemble_q90)
se_pb_best_model_q90 = 1.96*sem(lst_pb_best_model_q90)
se_pb_equal_weights_q90 = 1.96*sem(lst_pb_equal_weights_q90)
se_pb_weighted_avg_q90 = 1.96*sem(lst_pb_weighted_avg_q90)
se_pb_weighted_avg_soft_q90 = 1.96*sem(lst_pb_weighted_avg_soft_q90)
se_pb_baseline_dayahead_q90 = 1.96*sem(lst_pb_dayahead_q90)
se_pb_baseline_dayahead11h_q90 = 1.96*sem(lst_pb_dayahead_11h_q90)
se_pb_baseline_week_ahead_q90 = 1.96*sem(lst_pb_week_ahead_q90)

# plot seaborn barplot of mean pinball loss with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean PB Q90': [mean_pb_ensemble_q90, mean_pb_best_model_q90, mean_pb_equal_weights_q90, mean_pb_weighted_avg_q90, mean_pb_weighted_avg_soft_q90, mean_pb_baseline_dayahead_q90, mean_pb_baseline_dayahead11h_q90, mean_pb_baseline_week_ahead_q90],
        'SE PB Q90': [se_pb_ensemble_q90, se_pb_best_model_q90, se_pb_equal_weights_q90, se_pb_weighted_avg_q90, se_pb_weighted_avg_soft_q90, se_pb_baseline_dayahead_q90, se_pb_baseline_dayahead11h_q90, se_pb_baseline_week_ahead_q90]}
df = pd.DataFrame(data)
sns.barplot(x='Mean PB Q90', y='Forecaster', data=df, alpha=0.5)
# add error bars
plt.errorbar(df['Mean PB Q90'], df['Forecaster'], xerr=df['SE PB Q90'], fmt='o', label='95% CI',)
# plot legend
plt.legend()
plt.title('Pinball Loss Q90')
plt.show()

# compute mean rmse for variability and se
mean_rmse_var_ensemble = np.mean(lst_rmse_var_ensemble)
mean_rmse_var_best_model = np.mean(lst_rmse_var_best_model)
mean_rmse_var_equal_weights = np.mean(lst_rmse_var_equal_weights)
mean_rmse_var_weighted_avg = np.mean(lst_rmse_var_weighted_avg)
mean_rmse_var_weighted_avg_soft = np.mean(lst_rmse_var_weighted_avg_soft)
mean_rmse_var_baseline_dayahead = np.mean(lst_rmse_var_baseline_dayahead)
mean_rmse_var_baseline_dayahead11h = np.mean(lst_rmse_var_baseline_dayahead11h)
mean_rmse_var_baseline_week_ahead = np.mean(lst_rmse_var_baseline_week_ahead)

se_rmse_var_ensemble = 1.96*sem(lst_rmse_var_ensemble)
se_rmse_var_best_model = 1.96*sem(lst_rmse_var_best_model)
se_rmse_var_equal_weights = 1.96*sem(lst_rmse_var_equal_weights)
se_rmse_var_weighted_avg = 1.96*sem(lst_rmse_var_weighted_avg)
se_rmse_var_weighted_avg_soft = 1.96*sem(lst_rmse_var_weighted_avg_soft)
se_rmse_var_baseline_dayahead = 1.96*sem(lst_rmse_var_baseline_dayahead)
se_rmse_var_baseline_dayahead11h = 1.96*sem(lst_rmse_var_baseline_dayahead11h)
se_rmse_var_baseline_week_ahead = 1.96*sem(lst_rmse_var_baseline_week_ahead)

# plot seaborn barplot of mean rmse with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean RMSE Var': [mean_rmse_var_ensemble, mean_rmse_var_best_model, mean_rmse_var_equal_weights, mean_rmse_var_weighted_avg, mean_rmse_var_weighted_avg_soft, mean_rmse_var_baseline_dayahead, mean_rmse_var_baseline_dayahead11h, mean_rmse_var_baseline_week_ahead],
        'SE RMSE Var': [se_rmse_var_ensemble, se_rmse_var_best_model, se_rmse_var_equal_weights, se_rmse_var_weighted_avg, se_rmse_var_weighted_avg_soft, se_rmse_var_baseline_dayahead, se_rmse_var_baseline_dayahead11h, se_rmse_var_baseline_week_ahead]}
df = pd.DataFrame(data)
sns.barplot(x='Mean RMSE Var', y='Forecaster', data=df, alpha=0.5)
# add error bars
plt.errorbar(df['Mean RMSE Var'], df['Forecaster'], xerr=df['SE RMSE Var'], fmt='o', label='95% CI',)
# plot legend
plt.legend()
plt.title('RMSE Variability')
plt.show()











mean_cov_ensemble = np.mean(lst_coverage_ensemble)
mean_cov_var_ensemble = np.mean(lst_coverage_var_ensemble)
mean_cov_best_model = np.mean(lst_coverage_best_model)
mean_cov_equal_weights = np.mean(lst_coverage_equal_weights)
mean_cov_weighted_avg = np.mean(lst_coverage_weighted_avg)
mean_cov_weighted_avg_soft = np.mean(lst_coverage_weighted_avg_soft)
mean_cov_baseline_dayahead = np.mean(lst_coverage_baseline_dayahead)
mean_cov_baseline_dayahead11h = np.mean(lst_coverage_baseline_dayahead11h)
mean_cov_baseline_week_ahead = np.mean(lst_coverage_baseline_week_ahead)


se_cov_ensemble = 1.96*sem(lst_coverage_ensemble)
se_cov_var_ensemble = 1.96*sem(lst_coverage_var_ensemble)
se_cov_best_model = 1.96*sem(lst_coverage_best_model)
se_cov_equal_weights = 1.96*sem(lst_coverage_equal_weights)
se_cov_weighted_avg = 1.96*sem(lst_coverage_weighted_avg)
se_cov_weighted_avg_soft = 1.96*sem(lst_coverage_weighted_avg_soft)
se_cov_baseline_dayahead = 1.96*sem(lst_coverage_baseline_dayahead)
se_cov_baseline_dayahead11h = 1.96*sem(lst_coverage_baseline_dayahead11h)
se_cov_baseline_week_ahead = 1.96*sem(lst_coverage_baseline_week_ahead)

# plot seaborn barplot of mean coverage with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'QR Var Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean Coverage': [mean_cov_ensemble, mean_cov_var_ensemble, mean_cov_best_model, mean_cov_equal_weights, mean_cov_weighted_avg, mean_cov_weighted_avg_soft, mean_cov_baseline_dayahead, mean_cov_baseline_dayahead11h, mean_cov_baseline_week_ahead],
        'SE Coverage': [se_cov_ensemble, se_cov_var_ensemble, se_cov_best_model, se_cov_equal_weights, se_cov_weighted_avg, se_cov_weighted_avg_soft, se_cov_baseline_dayahead, se_cov_baseline_dayahead11h, se_cov_baseline_week_ahead]}
df = pd.DataFrame(data)
sns.barplot(x='Mean Coverage', y='Forecaster', data=df, alpha=0.5)
# add error bars
plt.errorbar(df['Mean Coverage'], df['Forecaster'], xerr=df['SE Coverage'], fmt='o', label='95% CI',)
# vertical line at 0.8
plt.axvline(x=0.8, color='r', linestyle='--')
# plot legend
plt.legend()
plt.title('PI Mean Coverage Probability')
plt.show()


mean_width_ensemble = np.mean(lst_avg_width_ensemble)
mean_width_ensemble_var = np.mean(lst_avg_width_ensemble_var)
mean_width_best_model = np.mean(lst_avg_width_best_model)
mean_width_equal_weights = np.mean(lst_avg_width_equal_weights)
mean_width_weighted_avg = np.mean(lst_avg_width_weighted_avg)
mean_width_weighted_avg_soft = np.mean(lst_avg_width_weighted_avg_soft)
mean_width_baseline_dayahead = np.mean(lst_avg_width_baseline_dayahead)
mean_width_baseline_dayahead11h = np.mean(lst_avg_width_baseline_dayahead11h)
mean_width_baseline_week_ahead = np.mean(lst_avg_width_baseline_week_ahead)

se_width_ensemble = 1.96*sem(lst_avg_width_ensemble)
se_width_ensemble_var = 1.96*sem(lst_avg_width_ensemble_var)
se_width_best_model = 1.96*sem(lst_avg_width_best_model)
se_width_equal_weights = 1.96*sem(lst_avg_width_equal_weights)
se_width_weighted_avg = 1.96*sem(lst_avg_width_weighted_avg)
se_width_weighted_avg_soft = 1.96*sem(lst_avg_width_weighted_avg_soft)
se_width_baseline_dayahead = 1.96*sem(lst_avg_width_baseline_dayahead)
se_width_baseline_dayahead11h = 1.96*sem(lst_avg_width_baseline_dayahead11h)
se_width_baseline_week_ahead = 1.96*sem(lst_avg_width_baseline_week_ahead)

# plot seaborn barplot of mean coverage with std
sns.set_theme(style="whitegrid")
data = {'Forecaster': ['QR Ensemble', 'Best Model', 'Equal Weights', 'Weighted Avg', 'Weighted Avg Soft', 'Day-ahead', 'Day-ahead 11h', 'Week-ahead'],
        'Mean Width': [mean_width_ensemble, mean_width_best_model, mean_width_equal_weights, mean_width_weighted_avg, mean_width_weighted_avg_soft, mean_width_baseline_dayahead, mean_width_baseline_dayahead11h, mean_width_baseline_week_ahead],
        'SE Width': [se_width_ensemble, se_width_best_model, se_width_equal_weights, se_width_weighted_avg, se_width_weighted_avg_soft, se_width_baseline_dayahead, se_width_baseline_dayahead11h, se_width_baseline_week_ahead]}

df = pd.DataFrame(data)
# display mean width value next to the barplot
sns.barplot(x='Mean Width', y='Forecaster', data=df, alpha=0.5, label='Mean Width')
# add error bars
plt.errorbar(df['Mean Width'], df['Forecaster'], xerr=df['SE Width'], fmt='o', label='95% CI',)
# vertical line at 0.8
plt.axvline(x=0.8, color='r', linestyle='--')
# plot legend
plt.legend()
plt.title('PI Mean Width')
plt.show()
