In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from loguru import logger
import pickle
import seaborn as sns
from matplotlib import pyplot as plt
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.utils.collect_results import collect_pb_result, collect_rmse_result, create_df_forecaster_first_stage, create_df_forecaster_second_stage
from source.utils.generate_timestamp import generate_timestamps
from source.forecasters.deterministic import create_day_ahead_predictions, create_day_ahead_11_predictions, create_week_ahead_predictions, create_most_recent_predictions, create_malicious_predictions, create_noisy_predictions
from source.forecasters.probabilistic import create_day_ahead_quantiles10, create_day_ahead_11_quantiles10, create_week_ahead_quantiles10, create_most_recent_quantiles10, create_malicious_quantiles10, create_noisy_quantiles10
from source.forecasters.probabilistic import create_day_ahead_quantiles90, create_day_ahead_11_quantiles90, create_week_ahead_quantiles90, create_most_recent_quantiles90, create_malicious_quantiles90, create_noisy_quantiles90
from source.ensemble.stack_generalization.utils.display_results import display_forecasting_metrics
from source.ensemble.combination_scheme.equal_weights import calculate_equal_weights
from source.ensemble.combination_scheme.avg_weights import calculate_weighted_avg
from source.ensemble.combination_scheme.model_selection import run_model_selection
from source.plots.plot_forecasts import plot_ensemble_forecasts, plot_var_ensemble_forecasts, plot_weighted_avg_forecasts
from source.plots.display_hypothesis_testing import run_statistical_comparison_analysis
from source.plots.display_metrics import display_table_metrics
from source.plots.display_contributions import weighted_avg_pivot_data, permutation_pivot_data, lasso_coefs_pivot_data
from source.ensemble.combination_scheme.weight_avg_plot_importance import plot_weight_avg_contributions
from source.assessment_contributions import compute_forecasters_contributions
from source.ml_engine import create_ensemble_forecasts
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
from collections import defaultdict
from IPython.display import clear_output

In [None]:
from config.simulation_setting import Simulation, WeightedAvg, Stack
from source.simulation.helpers_simulation import process_combination_scheme, update_dict_weights, compute_coefficients
from source.utils.session_ml_info import delete_previous_day_pickle
sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params

In [None]:
# set random seed
np.random.seed(sim_params['random_seed'])

# process and concatenate files
files = [sim_params['file_0'], sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4']]
df = process_and_concat_files(files)

df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# set buyer resource name
buyer_resource_name = 'b1r1'

# loss quantile ensemble regressor
lst_rmse_ensemble = []
#loss best model selection
lst_rmse_best_model = []
# loss equal weights scheme
lst_rmse_equal_weights = []
# loss weighted average scheme
lst_rmse_weighted_avg = []
# loss weighted average scheme soft
lst_rmse_weighted_avg_soft = []
# loss baseline day ahead
lst_rmse_baseline_dayahead = []
# loss baseline day ahead 11
lst_rmse_baseline_dayahead11h = []
# loss baseline week ahead
lst_rmse_baseline_week_ahead = []
# loss baseline most recent
if sim_params['most_recent']:
    lst_rmse_baseline_most_recent = []
# loss baseline malicious
if sim_params['malicious']:
    lst_rmse_baseline_malicious = []
# loss baseline noisy
if sim_params['noisy']:
    lst_rmse_baseline_noisy = []
    
# loss var ensemble regressor
lst_rmse_var_ensemble = []
# loss var best model selection
lst_rmse_var_best_model = []
# loss var equal weights scheme
lst_rmse_var_equal_weights = []
# loss var weighted average scheme
lst_rmse_var_weighted_avg = []
# loss var weighted average scheme soft
lst_rmse_var_weighted_avg_soft = []
# loss var baseline day ahead
lst_rmse_var_baseline_dayahead = []
# loss var baseline day ahead 11
lst_rmse_var_baseline_dayahead11h = []
# loss var baseline week ahead
lst_rmse_var_baseline_week_ahead = []
# loss var baseline most recent
if sim_params['most_recent']:
    lst_rmse_var_baseline_most_recent = []
    lst_pb_most_recent_q10 = []
    lst_pb_most_recent_q90 = []
# loss var baseline malicious
if sim_params['malicious']:
    lst_rmse_var_baseline_malicious = []
    lst_pb_malicious_q10 = []
    lst_pb_malicious_q90 = []
# loss var baseline noisy
if sim_params['noisy']:
    lst_rmse_var_baseline_noisy = []
    lst_pb_noisy_q10 = []
    lst_pb_noisy_q90 = []

# loss quantile ensemble regressor
lst_pb_ensemble_q10 = []
lst_pb_ensemble_q90 = []
# loss quantile best model selection
lst_pb_best_model_q10 = []
lst_pb_best_model_q90 = []
# loss avg weights scheme
lst_pb_weighted_avg_q10 = []
lst_pb_weighted_avg_q90 = []
# loss soft avg weights scheme
lst_pb_weighted_avg_soft_q10 = []
lst_pb_weighted_avg_soft_q90 = []
# loss equal weighted scheme
lst_pb_equal_weights_q10 = []
lst_pb_equal_weights_q90 = []
# loss baseline day ahead
lst_pb_dayahead_q10 = []
lst_pb_dayahead_q90  = []
# loss baseline day ahead 11
lst_pb_dayahead_11h_q10 = []
lst_pb_dayahead_11h_q90 = []
# loss baseline week ahead
lst_pb_week_ahead_q10 = []
lst_pb_week_ahead_q90 = []

# remove previous day pickle file
logger.info(' ')
delete_previous_day_pickle()
logger.opt(colors = True).warning('previous day pickle file removed')

# final contributions forecasters
avg_permutation_contributions = defaultdict(dict)
avg_coefficients_contributions = defaultdict(dict)
avg_weighted_avg_contributions = defaultdict(dict)
avg_weighted_soft_avg_contributions = defaultdict(dict)

# loop over test days
for i in tqdm(range(sim_params['num_test_days']), desc='Testing Days'):

    # generate timestamps train and prediction
    start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

    logger.info(' ')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors=True).info(f'<blue>Start training: {start_training_timestamp} - End training: {end_training_timestamp}</blue>')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

    day_previous_start_prediction_timestamp = start_prediction_timestamp - pd.Timedelta('1day')
    df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
    df_test = df_filtered[df_filtered.index.to_series().between(day_previous_start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]
                                                                                                                            
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of training data: {len(df_train)} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of test data: {len(df_test)} </blue>')

    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

    # forecaster - day ahead forecast
    df_day_ahead_pred_train = create_day_ahead_predictions(df_train)
    df_day_ahead_pred_test = create_day_ahead_predictions(df_test)

    # forecaster - day ahead 11 forecast
    df_day_ahead11_pred_train = create_day_ahead_11_predictions(df_train)
    df_day_ahead11_pred_test = create_day_ahead_11_predictions(df_test)

    # forecaster - week ahead forecast
    df_week_ahead_pred_train = create_week_ahead_predictions(df_train)
    df_week_ahead_pred_test = create_week_ahead_predictions(df_test)

    # forecaster - day ahead quantile-10
    df_day_ahead_q10_train = create_day_ahead_quantiles10(df_train)
    df_day_ahead_q10_test = create_day_ahead_quantiles10(df_test)

    # forecaster - day ahead 11 quantile-10
    df_day_ahead11_q10_train = create_day_ahead_11_quantiles10(df_train)
    df_day_ahead11_q10_test = create_day_ahead_11_quantiles10(df_test)

    # forecaster - week ahead quantile-10
    df_week_ahead_q10_train = create_week_ahead_quantiles10(df_train)
    df_week_ahead_q10_test = create_week_ahead_quantiles10(df_test)

    # forecaster - day ahead quantile-90
    df_day_ahead_q90_train = create_day_ahead_quantiles90(df_train)
    df_day_ahead_q90_test = create_day_ahead_quantiles90(df_test)

    # forecaster - day ahead 11 quantile-90
    df_day_ahead11_q90_train = create_day_ahead_11_quantiles90(df_train)
    df_day_ahead11_q90_test = create_day_ahead_11_quantiles90(df_test)

    # forecaster - week ahead quantile-90
    df_week_ahead_q90_train = create_week_ahead_quantiles90(df_train)
    df_week_ahead_q90_test = create_week_ahead_quantiles90(df_test)

    # forecaster - most recent forecast (intra-day market)
    if sim_params['most_recent']:
        # mean forecasts
        df_most_recent_pred_train = create_most_recent_predictions(df_train)
        df_most_recent_pred_test = create_most_recent_predictions(df_test)
        # q10 forecasts
        df_most_recent_q10_train = create_most_recent_quantiles10(df_train)
        df_most_recent_q10_test = create_most_recent_quantiles10(df_test)
        # q90 forecasts
        df_most_recent_q90_train = create_most_recent_quantiles90(df_train)
        df_most_recent_q90_test = create_most_recent_quantiles90(df_test)
    
    # forecaster - malicious forecast
    if sim_params['malicious']:
        # mean forecasts
        df_malicious_pred_train = create_malicious_predictions(df_train, column= sim_params['malicious_name'])
        df_malicious_pred_test = create_malicious_predictions(df=df_test, column= sim_params['malicious_name'], cheat=True, df_train=df_train)
        # q10 forecasts
        df_malicious_q10_train = create_malicious_quantiles10(df_train, column= sim_params['malicious_name'])
        df_malicious_q10_test = create_malicious_quantiles10(df=df_test, column= sim_params['malicious_name'], cheat=True, df_train=df_train)
        # q90 forecasts
        df_malicious_q90_train = create_malicious_quantiles90(df_train, column= sim_params['malicious_name'])
        df_malicious_q90_test = create_malicious_quantiles90(df=df_test, column= sim_params['malicious_name'], cheat=True, df_train=df_train)

    # forecaster - noisy forecast
    if sim_params['noisy']:
        # mean forecasts
        df_noisy_pred_train = create_noisy_predictions(df_train, column= sim_params['noisy_name'])
        df_noisy_pred_test = create_noisy_predictions(df_test, column= sim_params['noisy_name'])
        # q10 forecasts
        df_noisy_q10_train = create_noisy_quantiles10(df_train, column= sim_params['noisy_name'])
        df_noisy_q10_test = create_noisy_quantiles10(df_test, column= sim_params['noisy_name'])
        # q90 forecasts
        df_noisy_q90_train = create_noisy_quantiles90(df_train, column= sim_params['noisy_name'])
        df_noisy_q90_test = create_noisy_quantiles90(df_test, column= sim_params['noisy_name'])

# # ----------------------------> SELLERS DATA <----------------------------
    # sellers data
    df_train_ensemble_quantile50 = pd.concat([df_day_ahead_pred_train, df_day_ahead11_pred_train, df_week_ahead_pred_train], axis=1)
    df_test_ensemble_quantile50 = pd.concat([df_day_ahead_pred_test, df_day_ahead11_pred_test, df_week_ahead_pred_test], axis=1)
    if sim_params['malicious']:
        df_train_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_malicious_pred_train], axis=1)
        df_test_ensemble_quantile50 = pd.concat([df_test_ensemble_quantile50, df_malicious_pred_test], axis=1)
    if sim_params['most_recent']:
        df_train_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_most_recent_pred_train], axis=1)
        df_test_ensemble_quantile50 = pd.concat([df_test_ensemble_quantile50, df_most_recent_pred_test], axis=1)
    if sim_params['noisy']:
        df_train_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_noisy_pred_train], axis=1)
        df_test_ensemble_quantile50 = pd.concat([df_test_ensemble_quantile50, df_noisy_pred_test], axis=1)
    df_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_test_ensemble_quantile50], axis=0)

    df_train_ensemble_quantile10 = pd.concat([df_day_ahead_q10_train, df_day_ahead11_q10_train, df_week_ahead_q10_train], axis=1)
    df_test_ensemble_quantile10 = pd.concat([df_day_ahead_q10_test, df_day_ahead11_q10_test, df_week_ahead_q10_test], axis=1)
    if sim_params['malicious']:
        df_train_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_malicious_q10_train], axis=1)
        df_test_ensemble_quantile10 = pd.concat([df_test_ensemble_quantile10, df_malicious_q10_test], axis=1)
    if sim_params['most_recent']:
        df_train_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_most_recent_q10_train], axis=1)
        df_test_ensemble_quantile10 = pd.concat([df_test_ensemble_quantile10, df_most_recent_q10_test], axis=1)
    if sim_params['noisy']:
        df_train_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_noisy_q10_train], axis=1)
        df_test_ensemble_quantile10 = pd.concat([df_test_ensemble_quantile10, df_noisy_q10_test], axis=1)
    df_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_test_ensemble_quantile10], axis=0)

    df_train_ensemble_quantile90 = pd.concat([df_day_ahead_q90_train, df_day_ahead11_q90_train, df_week_ahead_q90_train], axis=1)
    df_test_ensemble_quantile90 = pd.concat([df_day_ahead_q90_test, df_day_ahead11_q90_test, df_week_ahead_q90_test], axis=1)
    if sim_params['malicious']:
        df_train_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_malicious_q90_train], axis=1)
        df_test_ensemble_quantile90 = pd.concat([df_test_ensemble_quantile90, df_malicious_q90_test], axis=1)
    if sim_params['most_recent']:
        df_train_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_most_recent_q90_train], axis=1)
        df_test_ensemble_quantile90 = pd.concat([df_test_ensemble_quantile90, df_most_recent_q90_test], axis=1)
    if sim_params['noisy']:
        df_train_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_noisy_q90_train], axis=1)
        df_test_ensemble_quantile90 = pd.concat([df_test_ensemble_quantile90, df_noisy_q90_test], axis=1)

    df_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_test_ensemble_quantile90], axis=0)

    lst_cols_name_q50 = ['s1_q50_b1r1', 's2_q50_b1r1', 's3_q50_b1r1']
    lst_cols_name_q10 = ['s1_q10_b1r1', 's2_q10_b1r1', 's3_q10_b1r1']
    lst_cols_name_q90 = ['s1_q90_b1r1', 's2_q90_b1r1', 's3_q90_b1r1']
    
    if sim_params['malicious']:
        lst_cols_name_q50.append('s5_q50_b1r1')
        lst_cols_name_q10.append('s5_q10_b1r1')
        lst_cols_name_q90.append('s5_q90_b1r1')

    if sim_params['most_recent']:
        lst_cols_name_q50.append('s4_q50_b1r1')
        lst_cols_name_q10.append('s4_q10_b1r1')
        lst_cols_name_q90.append('s4_q90_b1r1')

    if sim_params['noisy']:
        lst_cols_name_q50.append('s6_q50_b1r1')
        lst_cols_name_q10.append('s6_q10_b1r1')
        lst_cols_name_q90.append('s6_q90_b1r1')

    df_ensemble_quantile50.columns = lst_cols_name_q50
    df_ensemble_quantile10.columns = lst_cols_name_q10
    df_ensemble_quantile90.columns = lst_cols_name_q90

    df_market = pd.concat([df_ensemble_quantile50, df_ensemble_quantile10, df_ensemble_quantile90], axis=1)  

# # ----------------------------> BUYERS DATA <----------------------------
    df_train_buyer = pd.DataFrame(df_train['measured'])
    df_test_buyer = pd.DataFrame(df_test['measured'])
    forecast_range = pd.date_range(start=start_prediction_timestamp, end=end_prediction_timestamp, freq='15min')
    df_test_buyer['measured'] = [None for i in range(len(df_test_buyer))]
    df_buyer = pd.concat([df_train_buyer, df_test_buyer], axis=0)
    df_buyer['b1r1'] = df_buyer['measured']
    df_buyer.drop(columns=['measured'], inplace=True)

# # ----------------------------> PREDICO PLATFORM ML ENGINE <----------------------------
    results_ensemble_forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                                            df_buyer=df_buyer, 
                                                            df_market=df_market,
                                                            end_training_timestamp=end_training_timestamp,
                                                            forecast_range = forecast_range,
                                                            challenge_usecase='simulation',
                                                            simulation=True)

# # ----------------------------> COMBINATION SCHEME DATA <----------------------------
    if sim_params['malicious']:
        # mean forecasts
        df_train['maliciousforecast'] = df_malicious_pred_train.values
        df_test['maliciousforecast'] = df_malicious_pred_test.values
        # q10 forecasts
        df_train['maliciousconfidence10'] = df_malicious_q10_train.values
        df_test['maliciousconfidence10'] = df_malicious_q10_test.values
        # q90 forecasts
        df_train['maliciousconfidence90'] = df_malicious_q90_train.values
        df_test['maliciousconfidence90'] = df_malicious_q90_test.values

    if sim_params['noisy']:
        # mean forecasts
        df_train['noisyforecast'] = df_noisy_pred_train.values
        df_test['noisyforecast'] = df_noisy_pred_test.values
        # q10 forecasts
        df_train['noisyconfidence10'] = df_noisy_q10_train.values
        df_test['noisyconfidence10'] = df_noisy_q10_test.values
        # q90 forecasts
        df_train['noisyconfidence90'] = df_noisy_q90_train.values
        df_test['noisyconfidence90'] = df_noisy_q90_test.values

    df_train_norm, day_previous_df_test_norm, day_previous_df_test_norm_var = process_combination_scheme(df_train, df_test, end_training_timestamp, day_previous_start_prediction_timestamp)
    df_pred_ensemble = results_ensemble_forecasts['wind_power']['predictions']   
    df_pred_ensemble.rename(columns={'q50_' + 'b1r1': '50_predictions', 'q10_' + 'b1r1': '10_predictions', 'q90_' + 'b1r1': '90_predictions', 'norm_' + 'b1r1': 'target'}, inplace=True)
    df_pred_ensemble['target'] = day_previous_df_test_norm['norm_measured'].values[-96:]
    df_var_ensemble = results_ensemble_forecasts['wind_power_ramp']['predictions']
    df_var_ensemble.rename(columns={'q50_' + 'b1r1': '50_var_predictions', 'q10_' + 'b1r1': '10_var_predictions', 'q90_' + 'b1r1': '90_var_predictions', 'targets': 'target'}, inplace=True)
    df_var_ensemble['target'] = day_previous_df_test_norm_var['norm_measured'].values[-96:]
    
    df_test_ensemble = pd.DataFrame(df_pred_ensemble['target']) 
    df_2stage_test = pd.DataFrame(df_var_ensemble['target'])

# # ----------------------------> PERFORMANCE METRICS <----------------------------
    # performance ensemble
    lst_rmse_ensemble, rmse_ensemble = collect_rmse_result(df_pred_ensemble, '50_predictions', lst_rmse_ensemble)

    lst_pb_ensemble_q10, lst_pb_ensemble_q90, pinball_ensemble_q10, pinball_ensemble_q90 = collect_pb_result(df_pred_ensemble, 
                                                                                                            '10_predictions', '90_predictions', 
                                                                                                            lst_pb_ensemble_q10, lst_pb_ensemble_q90)

    # performance variability ensemble
    lst_rmse_var_ensemble, rmse_var_ensemble = collect_rmse_result(df_var_ensemble, '50_var_predictions', lst_rmse_var_ensemble)

    # performance best model selection
    df_best_model_var = run_model_selection(sim_params, df_train_norm , day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp , window_size_valid = weight_avg_params['window_size_valid'], var=True)
    lst_rmse_var_best_model, rmse_var_best_model = collect_rmse_result(df_best_model_var, 'mean_prediction', lst_rmse_var_best_model)

    # performance weighted average
    df_weighted_avg_var, dict_weights_var = calculate_weighted_avg(sim_params, df_train_norm , day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp , window_size_valid=weight_avg_params['window_size_valid'], var=True)
    lst_rmse_var_weighted_avg, rmse_var_weighted_avg = collect_rmse_result(df_weighted_avg_var, 'mean_prediction', lst_rmse_var_weighted_avg)

    # performance weighted avg soft
    df_weighted_avg_soft_var, dict_weights_soft_var = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'], var=True, norm='softmax')
    lst_rmse_var_weighted_avg_soft, rmse_var_weighted_avg_soft = collect_rmse_result(df_weighted_avg_soft_var, 'mean_prediction', lst_rmse_var_weighted_avg_soft)

    # plot contribution weighted average
    if ens_params['plot_importance_weighted_avg']:
        plot_weight_avg_contributions(dict_weights_var, quantile=0.5, stage='Wind Power Variability', days= weight_avg_params['window_size_valid'])

    # performance equal weights
    df_equal_weights_var = calculate_equal_weights(day_previous_df_test_norm_var, start_prediction_timestamp)
    lst_rmse_var_equal_weights, rmse_var_equal_weights = collect_rmse_result(df_equal_weights_var, 'mean_prediction', lst_rmse_var_equal_weights)

    # performance day-ahead
    df_dayahead_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'dayahead', start_prediction_timestamp)
    lst_rmse_var_baseline_dayahead, rmse_var_dayahead = collect_rmse_result(df_dayahead_var, 'norm_dayaheadforecast', lst_rmse_var_baseline_dayahead)

    # performance day-ahead-11h
    df_dayahead_11h_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'dayahead11h', start_prediction_timestamp)
    lst_rmse_var_baseline_dayahead11h, rmse_var_dayahead_11h = collect_rmse_result(df_dayahead_11h_var, 'norm_dayahead11hforecast', lst_rmse_var_baseline_dayahead11h)

    # performance week ahead
    df_week_ahead_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'weekahead', start_prediction_timestamp)
    lst_rmse_var_baseline_week_ahead, rmse_var_week_ahead = collect_rmse_result(df_week_ahead_var, 'norm_weekaheadforecast', lst_rmse_var_baseline_week_ahead)

    # performance most recent
    if sim_params['most_recent']:
        df_most_recent_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'mostrecent', start_prediction_timestamp)
        lst_rmse_var_baseline_most_recent, rmse_var_most_recent = collect_rmse_result(df_most_recent_var, 'norm_mostrecentforecast', lst_rmse_var_baseline_most_recent)

    # performance malicious
    if sim_params['malicious']:
        df_malicious_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'malicious', start_prediction_timestamp)
        lst_rmse_var_baseline_malicious, rmse_var_malicious = collect_rmse_result(df_malicious_var, 'norm_maliciousforecast', lst_rmse_var_baseline_malicious)

    # performance noisy
    if sim_params['noisy']:
        df_noisy_var = create_df_forecaster_second_stage(day_previous_df_test_norm_var, 'noisy', start_prediction_timestamp)
        lst_rmse_var_baseline_noisy, rmse_var_noisy = collect_rmse_result(df_noisy_var, 'norm_noisyforecast', lst_rmse_var_baseline_noisy)

    # performance best model selection
    df_best_model = run_model_selection(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'])
    lst_rmse_best_model, rmse_best_model = collect_rmse_result(df_best_model, 'mean_prediction', lst_rmse_best_model)
    lst_pb_best_model_q10, lst_pb_best_model_q90, pinball_best_model_q10, pinball_best_model_q90 = collect_pb_result(df_best_model,
                                                                                                                    'Q10', 'Q90',
                                                                                                                    lst_pb_best_model_q10, lst_pb_best_model_q90)

    # performance weighted average
    df_weighted_avg, dict_weights = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'])
    lst_rmse_weighted_avg, rmse_weighted_avg = collect_rmse_result(df_weighted_avg, 'mean_prediction', lst_rmse_weighted_avg)
    lst_pb_weighted_avg_q10, lst_pb_weighted_avg_q90, pinball_weighted_avg_q10, pinball_weighted_avg_q90 = collect_pb_result(df_weighted_avg, 
                                                                                                                                'Q10', 'Q90', 
                                                                                                                                lst_pb_weighted_avg_q10, lst_pb_weighted_avg_q90)
    # performance weighted avg soft
    df_weighted_avg_soft, dict_weights_soft = calculate_weighted_avg(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'], norm='softmax')
    lst_rmse_weighted_avg_soft, rmse_weighted_avg_soft = collect_rmse_result(df_weighted_avg_soft, 'mean_prediction', lst_rmse_weighted_avg_soft)
    lst_pb_weighted_avg_soft_q10, lst_pb_weighted_avg_soft_q90, pinball_weighted_avg_soft_q10, pinball_weighted_avg_soft_q90 = collect_pb_result(df_weighted_avg_soft,
                                                                                                                                                'Q10', 'Q90', 
                                                                                                                                                lst_pb_weighted_avg_soft_q10, lst_pb_weighted_avg_soft_q90)
    # plot forecasts weighted avg
    if ens_params['plot_weighted_avg']:
        plot_weighted_avg_forecasts(df_weighted_avg)

    # plot contribution weighted average
    if ens_params['plot_importance_weighted_avg']:
        for quantile in ens_params['quantiles']:
            plot_weight_avg_contributions(dict_weights, quantile, stage='Wind Power', days = weight_avg_params['window_size_valid'])

    # performance equal weights
    df_equal_weights = calculate_equal_weights(day_previous_df_test_norm, start_prediction_timestamp)
    lst_rmse_equal_weights, rmse_equal_weights = collect_rmse_result(df_equal_weights, 'mean_prediction', lst_rmse_equal_weights)
    lst_pb_equal_weights_q10, lst_pb_equal_weights_q90, pinball_equal_weights_q10, pinball_equal_weights_q90 = collect_pb_result(df_equal_weights, 
                                                                                                                                        'Q10', 'Q90', 
                                                                                                                                        lst_pb_equal_weights_q10, lst_pb_equal_weights_q90)
    # performance day-ahead
    df_dayahead = create_df_forecaster_first_stage(day_previous_df_test_norm, 'dayahead', start_prediction_timestamp)
    lst_rmse_baseline_dayahead, rmse_dayahead = collect_rmse_result(df_dayahead, 'norm_dayaheadforecast', lst_rmse_baseline_dayahead)
    lst_pb_dayahead_q10, lst_pb_dayahead_q90, pinball_dayahead_q10, pinball_dayahead_q90 = collect_pb_result(df_dayahead, 
                                                                                                                'norm_dayaheadconfidence10', 'norm_dayaheadconfidence90', 
                                                                                                                lst_pb_dayahead_q10, lst_pb_dayahead_q90)
    # performance day-ahead-11h
    df_dayahead_11h = create_df_forecaster_first_stage(day_previous_df_test_norm, 'dayahead11h', start_prediction_timestamp)
    lst_rmse_baseline_dayahead11h, rmse_dayahead_11h = collect_rmse_result(df_dayahead_11h, 'norm_dayahead11hforecast', lst_rmse_baseline_dayahead11h)
    lst_pb_dayahead_11h_q10, lst_pb_dayahead_11h_q90, pinball_dayahead_11h_q10, pinball_dayahead_11h_q90 = collect_pb_result(df_dayahead_11h, 
                                                                                                                                'norm_dayahead11hconfidence10', 'norm_dayahead11hconfidence90', 
                                                                                                                                lst_pb_dayahead_11h_q10, lst_pb_dayahead_11h_q90)
    # performance week ahead
    df_week_ahead = create_df_forecaster_first_stage(day_previous_df_test_norm, 'weekahead', start_prediction_timestamp)
    lst_rmse_baseline_week_ahead, rmse_week_ahead = collect_rmse_result(df_week_ahead, 'norm_weekaheadforecast', lst_rmse_baseline_week_ahead)
    lst_pb_week_ahead_q10, lst_pb_week_ahead_q90, pinball_week_ahead_q10, pinball_week_ahead_q90 = collect_pb_result(df_week_ahead, 
                                                                                                                        'norm_weekaheadconfidence10', 'norm_weekaheadconfidence90', 
                                                                                                                        lst_pb_week_ahead_q10, lst_pb_week_ahead_q90) 
    # performance most recent
    if sim_params['most_recent']:
        df_most_recent = create_df_forecaster_first_stage(day_previous_df_test_norm, 'mostrecent', start_prediction_timestamp)
        lst_rmse_baseline_most_recent, rmse_most_recent = collect_rmse_result(df_most_recent, 'norm_mostrecentforecast', lst_rmse_baseline_most_recent)
        lst_pb_most_recent_q10, lst_pb_most_recent_q90, pinball_most_recent_q10, pinball_most_recent_q90 = collect_pb_result(df_most_recent, 
                                                                                                                                'norm_mostrecentconfidence10', 'norm_mostrecentconfidence90', 
                                                                                                                                lst_pb_most_recent_q10, lst_pb_most_recent_q90) 
    # performance malicious cheat
    if sim_params['malicious']:
        df_malicious = create_df_forecaster_first_stage(day_previous_df_test_norm, 'malicious', start_prediction_timestamp)
        lst_rmse_baseline_malicious, rmse_malicious = collect_rmse_result(df_malicious, 'norm_maliciousforecast', lst_rmse_baseline_malicious)
        lst_pb_malicious_q10, lst_pb_malicious_q90, pinball_malicious_q10, pinball_malicious_q90 = collect_pb_result(df_malicious, 
                                                                                                                        'norm_maliciousconfidence10', 'norm_maliciousconfidence90', 
                                                                                                                        lst_pb_malicious_q10, lst_pb_malicious_q90)
    # performance noisy
    if sim_params['noisy']:
        df_noisy = create_df_forecaster_first_stage(day_previous_df_test_norm, 'noisy', start_prediction_timestamp)
        lst_rmse_baseline_noisy, rmse_noisy = collect_rmse_result(df_noisy, 'norm_noisyforecast', lst_rmse_baseline_noisy)
        lst_pb_noisy_q10, lst_pb_noisy_q90, pinball_noisy_q10, pinball_noisy_q90 = collect_pb_result(df_noisy, 
                                                                                                        'norm_noisyconfidence10', 'norm_noisyconfidence90', 
                                                                                                        lst_pb_noisy_q10, lst_pb_noisy_q90)
    # plot forecasts
    if ens_params['plt_wind_power_ensemble']:
        plot_ensemble_forecasts(df_pred_ensemble, df_test_ensemble)
        nr_previous_days = len(pd.date_range(start=start_training_timestamp, end=end_training_timestamp, freq='1D')) - 1
        plt.title(f'Ensemble Forecasts - Quantile {ens_params["model_type"]}')
        #plot_ramp_events(df_test_norm_diff, ens_params['compute_abs_difference'])
        if not ens_params['zoom_in_variability']:  # zoom in the variability forecasts
            plt.ylim(-0.01, 1)
        plt.show()
        
    # plot variability forecast results
    if ens_params['plt_wind_power_variability_ensemble']:
        plot_var_ensemble_forecasts(df_var_ensemble, df_2stage_test)
        nr_previous_days = len(pd.date_range(start=start_training_timestamp, end=end_training_timestamp, freq='1D')) - 1
        plt.title(f'Ensemble Variability Forecasts - Quantile {ens_params["var_model_type"]}')
        #plot_ramp_events(df_test_norm_diff, ens_params['compute_abs_difference'])
        if not ens_params['zoom_in_variability']:  # zoom in the variability forecasts
            plt.ylim(-0.6, 0.6)
        plt.show()

    ## ----------------------------> DISPLAY METRICS <----------------------------
    if sim_params['display_metrics']:
        results_metrics = {'ensemble': {'rmse': rmse_ensemble, 
                                        'pb10': pinball_ensemble_q10, 
                                        'pb90': pinball_ensemble_q90, 
                                        'rmse_var': rmse_var_ensemble},
                            'best_model': {'rmse': rmse_best_model,
                                            'pb10': pinball_best_model_q10, 
                                            'pb90': pinball_best_model_q90, 
                                            'rmse_var': rmse_var_best_model},
                            'weighted_avg': {'rmse': rmse_weighted_avg, 
                                            'pb10': pinball_weighted_avg_q10, 
                                            'pb90': pinball_weighted_avg_q90, 
                                            'rmse_var': rmse_var_weighted_avg},
                            'weighted_avg_soft': {'rmse': rmse_weighted_avg_soft, 
                                                'pb10': pinball_weighted_avg_soft_q10, 
                                                'pb90': pinball_weighted_avg_soft_q90, 
                                                'rmse_var': rmse_var_weighted_avg_soft},
                            'equal_weights': {'rmse': rmse_equal_weights, 
                                            'pb10': pinball_equal_weights_q10, 
                                            'pb90': pinball_equal_weights_q90, 
                                            'rmse_var': rmse_var_equal_weights},
                            'day_ahead': {'rmse': rmse_dayahead, 
                                        'pb10': pinball_dayahead_q10, 
                                        'pb90': pinball_dayahead_q90, 
                                        'rmse_var': rmse_var_dayahead},
                            'day_ahead_11h': {'rmse': rmse_dayahead_11h, 
                                            'pb10': pinball_dayahead_11h_q10, 
                                            'pb90': pinball_dayahead_11h_q90, 
                                            'rmse_var': rmse_var_dayahead_11h},
                            'week_ahead': {'rmse': rmse_week_ahead, 
                                        'pb10': pinball_week_ahead_q10, 
                                        'pb90': pinball_week_ahead_q90, 
                                        'rmse_var': rmse_var_week_ahead}
                            }
        if sim_params['most_recent']:
            results_metrics['most_recent'] = {'rmse': rmse_most_recent, 
                                            'pb10': pinball_most_recent_q10, 
                                            'pb90': pinball_most_recent_q90, 
                                            'rmse_var': rmse_var_most_recent}
        if sim_params['malicious']:
            results_metrics['malicious'] = {'rmse': rmse_malicious, 
                                                    'pb10': pinball_malicious_q10, 
                                                    'pb90': pinball_malicious_q90, 
                                                    'rmse_var': rmse_var_malicious}
        if sim_params['noisy']:
            results_metrics['noisy'] = {'rmse': rmse_noisy, 
                                            'pb10': pinball_noisy_q10, 
                                            'pb90': pinball_noisy_q90, 
                                            'rmse_var': rmse_var_noisy}
        
        display_forecasting_metrics(sim_params=sim_params, ens_params=ens_params, dict_metrics = results_metrics)
    
    # # ----------------------------> FORECASTERS PERMUTATION CONTRIBUTIONS <----------------------------
    y_test = df_test['measured'].values[-96:]
    iter_permutation_contributions = compute_forecasters_contributions(buyer_resource_name, ens_params, y_test, forecast_range)
    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters permutation contributions computed </blue>')
    avg_permutation_contributions = update_dict_weights(avg_permutation_contributions, iter_permutation_contributions, iteration=i)

    # # ----------------------------> FORECASTERS COEFFICIENTS CONTRIBUTIONS <----------------------------
    if ens_params['model_type'] == 'LR':
        logger.info(' ')
        logger.opt(colors = True).info('<blue> -----------------> Forecasters coefficients contributions computed </blue>')
        with open('/Users/gio/Desktop/Elia-RES-Forecasting/info_model/b1r1_previous_day.pickle', 'rb') as handle:
            previous_day = pickle.load(handle)
        iter_coefficients_contributions = compute_coefficients(previous_day)
        avg_coefficients_contributions = update_dict_weights(avg_coefficients_contributions, 
                                                            iter_coefficients_contributions, 
                                                            iteration=i)

    # # ----------------------------> FORECASTERS WEIGHTED AVERAGE CONTRIBUTIONS (SUM NORMALIZATION) <---------------------------- 
    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters weighted average contributions computed </blue>')
    iter_weighted_avg_contributions = defaultdict(dict)
    iter_weighted_avg_contributions['wind_power'] = dict_weights
    iter_weighted_avg_contributions['wind_power_ramp'] = dict_weights_var
    avg_weighted_avg_contributions = update_dict_weights(avg_weighted_avg_contributions, iter_weighted_avg_contributions, iteration=i)

    ## ----------------------------> FORECASTERS WEIGHTED AVERAGE CONTRIBUTIONS (SOFTMAX NORMALIZATION) <----------------------------
    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters weighted average contributions (softmax) computed </blue>')
    iter_weighted_avg_soft_contributions = defaultdict(dict)
    iter_weighted_avg_soft_contributions['wind_power'] = dict_weights_soft
    iter_weighted_avg_soft_contributions['wind_power_ramp'] = dict_weights_soft_var
    avg_weighted_soft_avg_contributions = update_dict_weights(avg_weighted_soft_avg_contributions, iter_weighted_avg_soft_contributions, iteration=i)

    # Clear output
    #clear_output(wait=True)


In [None]:
# most recent
if not sim_params['most_recent']:
    lst_rmse_baseline_most_recent = None
    lst_pb_most_recent_q10 = None
    lst_pb_most_recent_q90 = None
    lst_rmse_var_baseline_most_recent = None
    
# malicious
if not sim_params['malicious']:
    lst_rmse_baseline_malicious = None
    lst_pb_malicious_q10 = None
    lst_pb_malicious_q90 = None
    lst_rmse_var_baseline_malicious = None

# noisy
if not sim_params['noisy']:
    lst_rmse_baseline_noisy = None
    lst_pb_noisy_q10 = None
    lst_pb_noisy_q90 = None
    lst_rmse_var_baseline_noisy = None

# plot statistical comparison q50
title1='RMSE-based Statistical Significance'
title2='RMSE-based Statistical Comparison: critical difference diagram of ranks'
data_q50, avg_rank_q50 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                                lst_rmse_ensemble,
                                                                lst_rmse_best_model,
                                                                lst_rmse_equal_weights, 
                                                                lst_rmse_weighted_avg,
                                                                lst_rmse_weighted_avg_soft,
                                                                lst_rmse_baseline_dayahead, 
                                                                lst_rmse_baseline_dayahead11h, 
                                                                lst_rmse_baseline_week_ahead,
                                                                lst_rmse_baseline_most_recent,
                                                                lst_rmse_baseline_malicious,
                                                                lst_rmse_baseline_noisy,
                                                                title1, title2)
# plot statistical comparison q10
title1 = 'Q10 Pinball loss-based Statistical Significance'
title2 = 'Q10 Pinball loss-based Statistical Comparison: critical difference diagram of ranks'
data_q10, avg_rank_q10 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                            lst_pb_ensemble_q10, 
                                                            lst_pb_best_model_q10,
                                                            lst_pb_equal_weights_q10, 
                                                            lst_pb_weighted_avg_q10, 
                                                            lst_pb_weighted_avg_soft_q10,
                                                            lst_pb_dayahead_q10, 
                                                            lst_pb_dayahead_11h_q10, 
                                                            lst_pb_week_ahead_q10,
                                                            lst_pb_most_recent_q10,
                                                            lst_pb_malicious_q10,
                                                            lst_pb_noisy_q10,
                                                            title1, title2)
# plot statistical comparison q90
title1 = 'Q90 Pinball loss-based Statistical Significance'
title2 = 'Q90 Pinball loss-based Statistical Comparison: critical difference diagram of ranks'
data_q90, avg_rank_q90 = run_statistical_comparison_analysis(ens_params['model_type'],
                                                            lst_pb_ensemble_q90,
                                                            lst_pb_best_model_q90,
                                                            lst_pb_equal_weights_q90, 
                                                            lst_pb_weighted_avg_q90,
                                                            lst_pb_weighted_avg_soft_q90, 
                                                            lst_pb_dayahead_q90, 
                                                            lst_pb_dayahead_11h_q90, 
                                                            lst_pb_week_ahead_q90,
                                                            lst_pb_most_recent_q90,
                                                            lst_pb_malicious_q90,
                                                            lst_pb_noisy_q90,
                                                            title1, title2)
# plot statistical comparison variability
title1 = 'RMSE-based Statistical Significance'
title2 = 'RMSE-based Statistical Comparison: critical difference diagram of ranks'
data_q50_var, avg_rank_q50_var = run_statistical_comparison_analysis(ens_params['var_model_type'],
                                                                lst_rmse_var_ensemble,
                                                                lst_rmse_var_best_model, 
                                                                lst_rmse_var_equal_weights, 
                                                                lst_rmse_var_weighted_avg,
                                                                lst_rmse_var_weighted_avg_soft, 
                                                                lst_rmse_var_baseline_dayahead, 
                                                                lst_rmse_var_baseline_dayahead11h, 
                                                                lst_rmse_var_baseline_week_ahead,
                                                                lst_rmse_var_baseline_most_recent,
                                                                lst_rmse_var_baseline_malicious,
                                                                lst_rmse_var_baseline_noisy,
                                                                title1, title2)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Generate random data for the histogram
most_recent = np.array(lst_rmse_baseline_most_recent)
ensemble = np.array(lst_rmse_ensemble)
equal_weights = np.array(lst_rmse_equal_weights)

# Plotting a basic histogram
plt.hist(most_recent, bins=15, color='skyblue')
plt.hist(ensemble, bins=15, color='orange')
plt.hist(equal_weights, bins=15, color='green')

# Adding labels and title
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Basic Histogram')
# Display the plot
plt.show()

In [None]:
# Display the styled DataFrame
dfs = [data_q10, data_q50, data_q90, data_q50_var]
prefixes = ['Q10', 'Q50', 'Q90', 'Q50_var']
result, styled_result = display_table_metrics(dfs, prefixes)
styled_result

In [None]:
# compute percentage of improvement of the best
perc_improvement_df = (result/np.min(result, axis=0)-1)*100
perc_improvement_df.T

In [None]:
df_pivot = weighted_avg_pivot_data(sim_params, avg_weighted_avg_contributions)

# # Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title('Stacked Bar Chart of Average Weighted Avg Scheme Contributions')
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title('Heatmap of Average Weighted Avg Scheme Contributions')
plt.show()

if sim_params['save_scenario_contributions']:
    type_score = 'avg_scheme'
    scenario = sim_params['scenario']
    df_pivot.to_csv(f'/Users/gio/Desktop/Elia-RES-Forecasting/info_model/df_pivot_{scenario}_{type_score}.csv')

In [None]:
df_pivot = weighted_avg_pivot_data(sim_params, avg_weighted_soft_avg_contributions)

# # Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title('Stacked Bar Chart of Average Weighted Soft Avg Scheme Contributions')
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title('Heatmap of Average Weighted Soft Avg Scheme Contributions')
plt.show()

if sim_params['save_scenario_contributions']:
    type_score = 'avg_scheme'
    scenario = sim_params['scenario']
    df_pivot.to_csv(f'/Users/gio/Desktop/Elia-RES-Forecasting/info_model/df_pivot_{scenario}_{type_score}.csv')

In [None]:
df_pivot = permutation_pivot_data(sim_params, avg_permutation_contributions)

# Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title('Stacked Bar Chart of Average Permutation Contributions')
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title('Heatmap of Average Permutation Contributions')
plt.show()

if sim_params['save_scenario_contributions']:
    type_score = 'permutation'
    scenario = sim_params['scenario']
    df_pivot.to_csv(f'/Users/gio/Desktop/Elia-RES-Forecasting/info_model/df_pivot_{scenario}_{type_score}.csv')

In [None]:
df_pivot = lasso_coefs_pivot_data(sim_params, avg_coefficients_contributions)

# Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title("Stacked Bar Chart of Average Lasso's Coefficients Contributions")
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title("Heatmap of Average Lasso's Coefficients Contributions")
plt.show()

if sim_params['save_scenario_contributions']:
    type_score = 'coefs'
    scenario = sim_params['scenario']
    df_pivot.to_csv(f'/Users/gio/Desktop/Elia-RES-Forecasting/info_model/df_pivot_{scenario}_{type_score}.csv')

In [None]:
iter_coefficients_contributions

In [None]:
avg_coefficients_contributions

In [None]:
correlation_matrix = df_train[['mostrecentforecast', 'dayaheadforecast', 'dayahead11hforecast', 'weekaheadforecast']].corr()
correlation_matrix.style.background_gradient(cmap='coolwarm')

In [None]:
all_num = np.array([13, 5, 9, 34, 2])
print('offline', np.mean(all_num))
print('-----------------')
for i, a in enumerate(all_num):
    n = i+1
    if n == 1:
        mu = a
    else:
        mu = mu + (a - mu)/n
    print('a:', a, 'n:', n, 'mu:', mu)
print('online', mu)