In [1]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv("PATH_CURRENT"))

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from loguru import logger
from sklearn.metrics import mean_pinball_loss
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.simulation.submission_module import submission_forecasters
from source.simulation.buyer_module import prepare_buyer_data
from source.utils.generate_timestamp import generate_timestamps
from source.ml_engine import create_ensemble_forecasts
from source.simulation.helpers_simulation import process_combination_scheme
from source.ensemble.stack_generalization.ramp_detection.boxplot_detector import detect_wind_ramp_boxplot
from source.ensemble.stack_generalization.ramp_detection.lof_detector import detect_wind_ramp_lof
from source.ensemble.stack_generalization.ramp_detection.kde_detector import detect_wind_ramp_kde
from source.ensemble.stack_generalization.ramp_detection.eq_detector import detect_wind_ramp_eq
from source.ensemble.stack_generalization.ramp_detection.utils import process_ramp_events

from source.plots.plot_forecasts import plot_forecasts, plot_var_forecasts, plot_ramp_detection
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
from IPython.display import clear_output

clear_output(wait=True)

In [3]:
from config.ramp_calib_setting import Simulation, WeightedAvg, Stack
from source.utils.session_ml_info import delete_previous_day_pickle
sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params

In [4]:
def check_wind_ramp_events_day(df, list_ramp_alarm, i):
    """
    Checks for wind ramp events in a specific day and returns their occurrences.
    """
    # Get the datetime from the ramp alarm list
    datetime = list_ramp_alarm[i][0]
    # Process ramp events and get the updated dataframe and threshold
    df, _ = process_ramp_events(df)
    # Filter the dataframe for measurements within the specific day
    df_day_measurements = df.loc[datetime : datetime + pd.Timedelta(days=1)]
    # Check if there are any wind ramp events for the day
    wind_ramp = df_day_measurements['ramp_events'].sum() > 0
    # If wind ramp events exist, get their indices
    list_wind_ramps = []
    if wind_ramp:
        list_wind_ramps = df_day_measurements[df_day_measurements['ramp_events'] == 1].index.tolist()
    return list_wind_ramps

In [5]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_stacked_bar_chart(df_pivot, title):
    """
    Plots a stacked bar chart using the provided pivot DataFrame.
    """
    # Plot the stacked bar chart
    df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
    plt.xlabel('Key and Quantile')
    plt.ylabel('Value')
    plt.title(f"Stacked Bar Chart of {title} Contributions")
    plt.show()

def plot_heatmap(df_pivot, title):
    """
    Plots a heatmap using the provided pivot DataFrame.
    """
    # Plot the heatmap
    plt.figure(figsize=(15, 6))
    sns.heatmap(df_pivot, annot=True, cmap='viridis')
    plt.xlabel('Series')
    plt.ylabel('Key and Quantile')
    plt.title(f"Heatmap of {title} Contributions")
    plt.show()

In [None]:
# set random seed
np.random.seed(sim_params['random_seed'])

# process and concatenate files
files = [sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4'], 
            sim_params['file_5'], sim_params['file_6'], sim_params['file_7'], sim_params['file_8'], 
            sim_params['file_9'], sim_params['file_10'], sim_params['file_11'], sim_params['file_12']]
logger.info(' ')
logger.info(f'Load Files: {files}')

df = process_and_concat_files(files)

# filter data forecasters
df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# replace NaN values
if sim_params['replace_nan']:
    logger.info(' ')
    logger.warning("Replacing NaN values with 0s")
    print(df_filtered.isna().sum())
    df_filtered.fillna(0, inplace=True)

# set buyer resource name
buyer_resource_name = 'b1r1'

# remove previous day pickle file
logger.info(' ')
delete_previous_day_pickle()
logger.opt(colors = True).warning('previous day pickle file removed')

# Collect Ramp Alarm
list_ramp_alarm = []
# Collect Intraday Ramp Alarm
list_ramp_alarm_intraday = []

from collections import defaultdict
from source.simulation.helpers_simulation import update_dict_weights
avg_permutation_contributions = defaultdict(dict)
avg_shapley_contributions = defaultdict(dict)


# loop over test days
for i in tqdm(range(sim_params['num_test_days']), desc='Testing Days'):

    # generate timestamps train and prediction
    start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

    if i >= 5:
        day_calibration = 5
        start_training_timestamp = start_training_timestamp - pd.Timedelta('5day')

    logger.info(' ')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors=True).info(f'<blue>Start training: {start_training_timestamp} - End training: {end_training_timestamp}</blue>')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

    day_previous_start_prediction_timestamp = start_prediction_timestamp #- pd.Timedelta('1day')
    df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
    df_test = df_filtered[df_filtered.index.to_series().between(day_previous_start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]
                                                                                                                            
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of training data: {len(df_train)} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of test data: {len(df_test)} </blue>')

    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

# # ----------------------------> FORECASTERS PREDICTION SUBMISSION <----------------------------

    df_market, df_train, df_test = submission_forecasters(sim_params, df_train, df_test)  

# # ----------------------------> target DATA <----------------------------

    df_buyer, forecast_range = prepare_buyer_data(df_train, df_test, start_prediction_timestamp, end_prediction_timestamp)

# # ----------------------------> PREDICO PLATFORM ML ENGINE <----------------------------

# # ----------------------------> ENSEMBLE FORECASTS <----------------------------

    # #------------------------------------ case 1)

    # df_market.drop(df_market.filter(like='q10').columns, axis=1, inplace=True)
    # df_market.drop(df_market.filter(like='q90').columns, axis=1, inplace=True)

    # #------------------------------------ case 2) 

    # # randomly raplace float values with nans in df_market
    # df_market = df_market.mask(np.random.random(df_market.shape) < 0.1)

    # #------------------------------------ case 3) 
    # import pickle
    # with open('forecast_data.pickle', 'rb') as f:
    #     data = pickle.load(f)
    # launch_time = data['launch_time']
    # import pytz
    # utc = pytz.UTC 
    # forecast_range = data['forecast_range']
    # end_training_timestamp = pd.to_datetime(min(launch_time.replace(tzinfo=utc), forecast_range[0].replace(tzinfo=utc)), utc=True)
    # df_market_train = data['df_market'].bfill().iloc[:-192]
    # df_market_test = data['df_market'].iloc[-96:]
    # # concatenate
    # df_market = pd.concat([df_market_train, df_market_test])
    # df_buyer_train = data['df_buyer'].iloc[:-192]
    # df_buyer_test = data['df_buyer'].iloc[-96:]
    # # concatenate
    # df_buyer = pd.concat([df_buyer_train, df_buyer_test])
    # challenge_usecase = data['challenge_usecase']
    # challenge_id = data['challenge_id']

    results_predico_forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                                            df_buyer=df_buyer, 
                                                            df_market=df_market,
                                                            end_training_timestamp=end_training_timestamp,
                                                            forecast_range = forecast_range,
                                                            challenge_usecase='simulation',
                                                            simulation=True)
    
    
    # # ----------------------------> TARGET VARIABILITY<----------------------------
    df_train_norm, day_previous_df_test_norm, day_previous_df_test_norm_var = process_combination_scheme(df_train, df_test, end_training_timestamp, day_previous_start_prediction_timestamp)

    df_pred_plot = results_predico_forecasts['wind_power']['predictions']
    df_pred_plot.rename(columns={'q50_' + 'b1r1': '50_predictions', 'q10_' + 'b1r1': '10_predictions', 'q90_' + 'b1r1': '90_predictions'}, inplace=True)
    df_test_plot = pd.DataFrame(day_previous_df_test_norm['norm_measured'].iloc[-96:])
    df_test_plot.columns = ['targets']

    # drop 'norm_measured' column
    df_train_norm = df_train_norm.drop(columns=['norm_measured'])
    day_previous_df_test_norm = day_previous_df_test_norm.drop(columns=['norm_measured'])

    # get last 96 values of the day_previous_df_test_norm
    df_test_norm = day_previous_df_test_norm.iloc[-96:]
    target_variability = day_previous_df_test_norm_var['norm_measured'].values[-96:]

    # Predictions Insample and Outsample retrieved from the ensemble forecasts
    pred_var_insample = results_predico_forecasts['wind_power_variability']['predictions_insample']
    pred_var_outsample = results_predico_forecasts['wind_power_variability']['predictions_outsample']

    # set params for ramp detection
    preprocess_ramps = ens_params['preprocess_ramps']
    max_consecutive_points = ens_params['max_consecutive_points']

    # Wind Ramp Detection using Boxplot technique
    if ens_params['detector'] == 'box':
        list_ramp_alarm, alarm_status, upper_box_bound, df_ramp_clusters = detect_wind_ramp_boxplot(pred_insample = pred_var_insample, 
                                                                                                    pred_outsample = pred_var_outsample, 
                                                                                                    forecast_range=forecast_range, 
                                                                                                    list_ramp_alarm = list_ramp_alarm, 
                                                                                                    df_train = df_train, 
                                                                                                    q1 = ens_params['q1_box'], 
                                                                                                    q3 = ens_params['q3_box'], 
                                                                                                    k = ens_params['k_box'], 
                                                                                                    preprocess_ramps = preprocess_ramps,
                                                                                                    max_consecutive_points = max_consecutive_points)
    # Wind Ramp Detection using KDE technique
    if ens_params['detector'] == 'kde':
        list_ramp_alarm, alarm_status, df_ramp_clusters = detect_wind_ramp_kde(df_train = df_train,
                                                                                df_insample = pred_var_insample, 
                                                                                df_outsample = pred_var_outsample, 
                                                                                forecast_range=forecast_range, 
                                                                                list_ramp_alarm = list_ramp_alarm, 
                                                                                threshold_quantile = ens_params['threshold_quantile_kde'],
                                                                                preprocess_ramps = preprocess_ramps,
                                                                                cv_folds = ens_params['cv_folds_kde'],
                                                                                max_consecutive_points = max_consecutive_points)
    # Wind Ramp Detection using EQ technique
    if ens_params['detector'] == 'eq':
        list_ramp_alarm, list_ramp_alarm_intraday, alarm_status, df_ramp_clusters = detect_wind_ramp_eq(df_train = df_train,
                                                                                                        df_insample = pred_var_insample, 
                                                                                                        df_outsample = pred_var_outsample,
                                                                                                        list_ramp_alarm = list_ramp_alarm, 
                                                                                                        threshold_quantile = ens_params['threshold_quantile_eq'],
                                                                                                        list_ramp_alarm_intraday = list_ramp_alarm_intraday,
                                                                                                        preprocess_ramps = preprocess_ramps,
                                                                                                        max_consecutive_points = max_consecutive_points)
    # Wind Ramp Detection using LOF technique
    if ens_params['detector'] == 'lof':
        list_ramp_alarm, alarm_status, df_ramp_clusters = detect_wind_ramp_lof(pred_insample = pred_var_insample, 
                                                                                    pred_outsample = pred_var_outsample,
                                                                                    df_train_norm = df_train_norm, 
                                                                                    df_test_norm = df_test_norm, 
                                                                                    forecast_range=forecast_range, 
                                                                                    list_ramp_alarm = list_ramp_alarm, 
                                                                                    df_train = df_train, 
                                                                                    n_neighbors = ens_params['n_neighbors_lof'], 
                                                                                    contamination = ens_params['contamination_lof'],
                                                                                    preprocess_ramps = preprocess_ramps,
                                                                                    max_consecutive_points = max_consecutive_points)


    # # # # ----------------------------> PLOT FORECASTS <----------------------------

    # compute rmse
    rmse = np.sqrt(np.mean((df_pred_plot['50_predictions'].values - df_test_plot['targets'].values)**2))
    # pinball loss q10
    pinball_loss_q10 = mean_pinball_loss(df_test_plot['targets'].values, df_pred_plot['10_predictions'].values, alpha=0.1)
    # pinball loss q90
    pinball_loss_q90 = mean_pinball_loss(df_test_plot['targets'].values, df_pred_plot['90_predictions'].values, alpha=0.9)

    df_pred_var_plot = results_predico_forecasts['wind_power_variability']['predictions'][['q50_b1r1', 'q10_b1r1', 'q90_b1r1']]
    df_pred_var_plot.rename(columns={'q50_' + 'b1r1': '50_var_predictions', 'q10_' + 'b1r1': '10_var_predictions', 'q90_' + 'b1r1': '90_var_predictions'}, inplace=True)
    df_test_var_plot = pd.DataFrame(day_previous_df_test_norm_var['norm_measured'].iloc[-96:])
    df_test_var_plot.columns = ['targets']

    # # # # ----------------------------> WIND RAMP EVENTS <----------------------------
    list_wind_ramps = check_wind_ramp_events_day(df, list_ramp_alarm, i)

    str_forecaster = 'dayahead'
    df_dayahead = df_test.filter(like=str_forecaster, axis=1)
    if str_forecaster == 'dayahead':
        df_dayahead.drop(['dayahead11hforecast', 'dayahead11hconfidence10','dayahead11hconfidence90'], axis=1, inplace=True)
    # replace with "50_predictions", "10_predictions", "90_predictions"
    df_dayahead.columns = ['50_predictions', '10_predictions', '90_predictions']
    # retain last 96 values
    df_dayahead = df_dayahead.iloc[-96:]

    # compute rmse
    rmse_dayahead = np.sqrt(np.mean((df_dayahead['50_predictions'].values - df_test_plot['targets'].values)**2))
    # pinball loss q10
    pinball_loss_q10_dayahead = mean_pinball_loss(df_test_plot['targets'].values, df_dayahead['10_predictions'].values, alpha=0.1)
    # pinball loss q90
    pinball_loss_q90_dayahead = mean_pinball_loss(df_test_plot['targets'].values, df_dayahead['90_predictions'].values, alpha=0.9)

    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters: wind power ensemble</blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> RMSE: {rmse} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q10: {pinball_loss_q10} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q90: {pinball_loss_q90} </blue>')

    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters: dayahead </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> RMSE: {rmse_dayahead} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q10: {pinball_loss_q10_dayahead} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q90: {pinball_loss_q90_dayahead} </blue>')

    # filter dataframes where "mostrecent" is contained in name
    df_dayahead11 = df_test.filter(like='dayahead11h', axis=1)
    # replace with "50_predictions", "10_predictions", "90_predictions"
    df_dayahead11.columns = ['50_predictions', '10_predictions', '90_predictions']
    # retain last 96 values
    df_dayahead11 = df_dayahead11.iloc[-96:]

    # compute rmse
    rmse_dayahead11 = np.sqrt(np.mean((df_dayahead11['50_predictions'].values - df_test_plot['targets'].values)**2))
    # pinball loss q10
    pinball_loss_q10_dayahead11 = mean_pinball_loss(df_test_plot['targets'].values, df_dayahead11['10_predictions'].values, alpha=0.1)
    # pinball loss q90
    pinball_loss_q90_dayahead11 = mean_pinball_loss(df_test_plot['targets'].values, df_dayahead11['90_predictions'].values, alpha=0.9)

    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters: dayahead11h </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> RMSE: {rmse_dayahead11} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q10: {pinball_loss_q10_dayahead11} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q90: {pinball_loss_q90_dayahead11} </blue>')

    # filter dataframes where "weekahead" is contained in name
    df_weekahead = df_test.filter(like='weekahead', axis=1)
    # replace with "50_predictions", "10_predictions", "90_predictions"
    df_weekahead.columns = ['50_predictions', '10_predictions', '90_predictions']
    # retain last 96 values
    df_weekahead = df_weekahead.iloc[-96:]

    # compute rmse
    rmse_weekahead = np.sqrt(np.mean((df_weekahead['50_predictions'].values - df_test_plot['targets'].values)**2))
    # pinball loss q10
    pinball_loss_q10_weekahead = mean_pinball_loss(df_test_plot['targets'].values, df_weekahead['10_predictions'].values, alpha=0.1)
    # pinball loss q90
    pinball_loss_q90_weekahead = mean_pinball_loss(df_test_plot['targets'].values, df_weekahead['90_predictions'].values, alpha=0.9)

    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters: weekahead </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> RMSE: {rmse_weekahead} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q10: {pinball_loss_q10_weekahead} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q90: {pinball_loss_q90_weekahead} </blue>')

    if sim_params['most_recent']:
        # filter dataframes where "mostrecent" is contained in name
        df_most_recent = df_test.filter(like='mostrecent', axis=1)
        # replace with "50_predictions", "10_predictions", "90_predictions"
        df_most_recent.columns = ['50_predictions', '10_predictions', '90_predictions']
        # retain last 96 values
        df_most_recent = df_most_recent.iloc[-96:]

        # compute rmse
        rmse_most_recent = np.sqrt(np.mean((df_most_recent['50_predictions'].values - df_test_plot['targets'].values)**2))
        # pinball loss q10
        pinball_loss_q10_most_recent = mean_pinball_loss(df_test_plot['targets'].values, df_most_recent['10_predictions'].values, alpha=0.1)
        # pinball loss q90
        pinball_loss_q90_most_recent = mean_pinball_loss(df_test_plot['targets'].values, df_most_recent['90_predictions'].values, alpha=0.9)

        logger.info(' ')
        logger.opt(colors = True).info(f'<blue> -----------------> Forecasters: mostrecent </blue>')
        logger.opt(colors = True).info(f'<blue> -----------------> RMSE: {rmse_most_recent} </blue>')
        logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q10: {pinball_loss_q10_most_recent} </blue>')
        logger.opt(colors = True).info(f'<blue> -----------------> Pinball Loss Q90: {pinball_loss_q90_most_recent} </blue>')

    # collect rmse results in list
    list_rmse = [rmse_dayahead, rmse_dayahead11, rmse_weekahead]
    # collect Q10 pinball loss results in list
    list_pinball_loss_q10 = [pinball_loss_q10_dayahead, pinball_loss_q10_dayahead11, pinball_loss_q10_weekahead]  
    # collect Q90 pinball loss results in list
    list_pinball_loss_q90 = [pinball_loss_q90_dayahead, pinball_loss_q90_dayahead11, pinball_loss_q90_weekahead]
    if sim_params['most_recent']:
        list_rmse.append(rmse_most_recent)
        list_pinball_loss_q10.append(pinball_loss_q10_most_recent)
        list_pinball_loss_q90.append(pinball_loss_q90_most_recent)

    # plot as stacked bar chart
    df_results = pd.DataFrame({'1/(Pinball Loss Q10)': list_pinball_loss_q10, '1/RMSE': list_rmse, '1/(Pinball Loss Q90)': list_pinball_loss_q90})
    list_names = ['dayahead', 'dayahead11h', 'weekahead']
    if sim_params['most_recent']:
        list_names.append('mostrecent')
    df_results.index = list_names
    # compute 1/loss for each metric
    df_accuracy = 1/df_results
    # normalize by sum
    df_accuracy = df_accuracy.div(df_accuracy.sum(axis=0), axis=1).T

    # plot stack bar chart with x-axis the metric and y-axis the accuracy and as color the forecasters
    df_accuracy.plot(kind='bar', stacked=True, figsize=(15, 6))
    plt.xlabel('Metrics')
    plt.ylabel('Accuracy')
    plt.title('Stacked Bar Chart of Forecasters Accuracy')

    # plot heatmap
    plt.figure(figsize=(15, 6))
    sns.heatmap(df_accuracy.T, annot=True, cmap='viridis')
    plt.xlabel('Metrics')
    plt.ylabel('FAccuracy')
    plt.title('Heatmap of Forecasters Accuracy')

    # plot day-ahead forecasts
    plot_forecasts(df_dayahead, df_test_plot, list_wind_ramps, title = 'Wind Power - Day-Ahead Forecasts', color='blue')

    # plot day-ahead-11h forecasts
    plot_forecasts(df_dayahead11, df_test_plot, list_wind_ramps, title = 'Wind Power - Day-Ahead-11h Forecasts', color='orange')

    # plot week-ahead forecasts
    plot_forecasts(df_weekahead, df_test_plot, list_wind_ramps, title = 'Wind Power - Week-Ahead Forecasts', color='green')

    # plot most recent forecasts
    if sim_params['most_recent']:
        plot_forecasts(df_most_recent, df_test_plot, list_wind_ramps, title = 'Wind Power - Most-Recent Forecasts', color='gray')

    # plot wind power forecast results
    if ens_params['plt_wind_power_ensemble']:
        plot_forecasts(df_pred_plot, df_test_plot, list_wind_ramps, title = 'Wind Power - QR Forecasts')

    # plot variability forecast results
    if ens_params['plt_wind_power_variability_ensemble']:
        plot_var_forecasts(df_pred_var_plot, df_test_var_plot, list_wind_ramps, title = 'Wind Power Variability - QR Forecasts')

    if not df_ramp_clusters.empty:
        num_ramp_cluster_events = len(df_ramp_clusters.cluster_id.unique())
        logger.info(' ')
        logger.opt(colors = True).info(f'<blue> -----------------> Number of Ramp Cluster Events: {num_ramp_cluster_events} </blue>')
        plot_ramp_detection(df_test_var_plot, df_pred_var_plot, df_ramp_clusters, list_wind_ramps)

    # # # # ----------------------------> FORECASTERS PERMUTATION CONTRIBUTIONS <----------------------------
    from source.assessment_contributions import compute_forecasters_contributions
    from source.plots.display_contributions import permutation_pivot_data
    from matplotlib import pyplot as plt
    import seaborn as sns

    if ens_params['model_type'] == 'LR' and ens_params['var_model_type'] == 'LR':
        logger.info(' ')
        logger.opt(colors = True).info('<blue> -----------------> Forecasters LASSO coefficients contributions computed </blue>')
        import pickle
        from source.simulation.helpers_simulation import compute_coefficients
        from source.plots.display_contributions import lasso_coefs_pivot_data
        with open('/Users/gio/Desktop/Elia-RES-Forecasting/info_model/b1r1_previous_day.pickle', 'rb') as handle:
            previous_day = pickle.load(handle)
        iter_coefficients_contributions = compute_coefficients(ens_params, previous_day, p_values=True)
        df_pivot = lasso_coefs_pivot_data(sim_params, iter_coefficients_contributions)
        title = 'Lasso Coefficients'
        plot_stacked_bar_chart(df_pivot, title)
        plot_heatmap(df_pivot.T, title)

    logger.info(' -----------------> Forecasters  Permutation Contributions')
    logger.info(' ')
    ens_params['contribution_method'] = 'permutation'
    contr_mthd = ens_params['contribution_method']
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters {contr_mthd} contributions computed </blue>')
    y_test = df_test['measured'].values
    iter_permutation_contributions = compute_forecasters_contributions(buyer_resource_name, ens_params, y_test, forecast_range)
    avg_permutation_contributions = update_dict_weights(avg_permutation_contributions, iter_permutation_contributions, iteration=i)

    df_pivot = permutation_pivot_data(sim_params, iter_permutation_contributions)
    
    # Plot the stacked bar chart
    df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
    plt.xlabel('Key and Quantile')
    plt.ylabel('Value')
    plt.title('Stacked Bar Chart of Daily Permutation Contributions')
    plt.show()

    # Plot the heatmap
    plt.figure(figsize=(15, 6))
    sns.heatmap(df_pivot.T, annot=True, cmap='viridis')
    plt.xlabel('Series')
    plt.ylabel('Key and Quantile')
    plt.title('Heatmap of Daily Permutation Contributions')
    plt.show()

    ens_params['contribution_method'] = 'shapley'
    contr_mthd = ens_params['contribution_method']
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Forecasters {contr_mthd} contributions computed </blue>')
    y_test = df_test['measured'].values
    iter_shapley_contributions = compute_forecasters_contributions(buyer_resource_name, ens_params, y_test, forecast_range)
    avg_shapley_contributions = update_dict_weights(avg_shapley_contributions, iter_shapley_contributions, iteration=i)

    logger.info(' ')
    logger.opt(colors = True).info(f'{iter_shapley_contributions}')
    df_pivot = permutation_pivot_data(sim_params, iter_shapley_contributions)

    # Plot the stacked bar chart
    df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
    plt.xlabel('Key and Quantile')
    plt.ylabel('Value')
    plt.title('Stacked Bar Chart of Daily Shapley Contributions')
    plt.show()
    
    # Plot the heatmap
    plt.figure(figsize=(15, 6))
    sns.heatmap(df_pivot.T, annot=True, cmap='viridis')
    plt.xlabel('Series')
    plt.ylabel('Key and Quantile')
    plt.title('Heatmap of Daily Shapley Contributions')
    plt.show()

    # clear_output(wait=True)
    break


In [None]:
def extract_quantile_columns(df, quantile):
    """Extract columns containing the specified quantile."""
    columns = [name for name in df.columns if quantile in name]
    if columns:
        return df[columns]
    else:
        print(f"No columns found for {quantile}")
        return pd.DataFrame()
    
extract_quantile_columns(df_market, 'q10')

In [None]:
df_market

In [None]:
df_market
# drop columns containing either q10 or q90


In [None]:
df_pivot = permutation_pivot_data(sim_params, avg_permutation_contributions)

# Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title('Stacked Bar Chart of Average Permutation Contributions')
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot.T, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title('Heatmap of Average Permutation Contributions')
plt.show()

In [None]:
df_pivot = permutation_pivot_data(sim_params, avg_shapley_contributions)

# Plot the stacked bar chart
df_pivot.plot(kind='bar', stacked=True, figsize=(15, 6))
plt.xlabel('Key and Quantile')
plt.ylabel('Value')
plt.title('Stacked Bar Chart of Average Shapley Contributions')
plt.show()

# Plot the heatmap
plt.figure(figsize=(15, 6))
sns.heatmap(df_pivot.T, annot=True, cmap='viridis')
plt.xlabel('Series')
plt.ylabel('Key and Quantile')
plt.title('Heatmap of Average Shapley Contributions')
plt.show()