In [1]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv("PATH_CURRENT"))

import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from source.ensemble.stack_generalization.ramp_detection.utils_plot import plot_ramp_events
from source.ensemble.stack_generalization.ramp_detection.utils_plot import plot_ramp_confusion_matrix

# Set parameters
max_consecutive_points = 3
plot_results = 'TP'
cluster_color = False
k = 1.8
q3_q1 = (0.75, 0.25)
plot_prediction = True
plot_iqw = False

def load_and_append_results(file_paths, key_names):
    """
    Load data from the provided pickle file paths and extract the required keys.
    """
    result_list = []
    for file_path in file_paths:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        result_list += [{key: entry[key] for key in key_names} for entry in data]
    return result_list

def plot_heatmap(data_list, value_col, title, cmap=sns.diverging_palette(10, 133, as_cmap=True)):
    """
    Converts a list of dictionaries to a DataFrame, pivots the table, and plots a heatmap.
    """
    # Convert list of dictionaries to DataFrame
    df_results = pd.DataFrame(data_list)
    # Determine column names based on value_col
    columns = df_results.columns.tolist()
    columns[columns.index(value_col)] = value_col
    df_results.columns = columns
    # Pivot table
    df_results_pivot = df_results.pivot(index='q3_q1', columns='k', values=value_col)
    # Plot heatmap
    plt.figure(figsize=(10, 7))
    sns.heatmap(df_results_pivot, annot=True, fmt=".2f", cmap=cmap)
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    plt.title(title)
    plt.show()

In [2]:
list_k_values = [1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8]
str_most_recent = f'{max_consecutive_points}_consecutive'

In [None]:
# Define the file paths for symmetric and asymmetric datasets
file_paths_sym_f1 = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_symmetric.pkl' for k in list_k_values]
file_paths_asym_f1 = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_asymmetric.pkl' for k in list_k_values]
file_paths_asym_roc = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_roc_auc_asymmetric.pkl' for k in list_k_values]
file_paths_sym_roc = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_roc_auc_symmetric.pkl' for k in list_k_values]
file_paths_asym_cis = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_csi_asymmetric.pkl' for k in list_k_values]
file_paths_sym_cis = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_csi_symmetric.pkl' for k in list_k_values]
file_paths_asym_bs = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_bs_asymmetric.pkl' for k in list_k_values]
file_paths_sym_bs = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_bs_symmetric.pkl' for k in list_k_values]

list_results_sym_f1 = load_and_append_results(file_paths_sym_f1, ['q3_q1', 'k', 'f1'])
list_results_asym_f1 = load_and_append_results(file_paths_asym_f1, ['q3_q1', 'k', 'f1'])
list_results_asym_roc = load_and_append_results(file_paths_asym_roc, ['q3_q1', 'k', 'roc_auc'])
list_results_sym_roc = load_and_append_results(file_paths_sym_roc, ['q3_q1', 'k', 'roc_auc'])
list_results_asym_cis = load_and_append_results(file_paths_asym_cis, ['q3_q1', 'k', 'csi'])
list_results_sym_cis = load_and_append_results(file_paths_sym_cis, ['q3_q1', 'k', 'csi'])
list_results_asym_bs = load_and_append_results(file_paths_asym_bs, ['q3_q1', 'k', 'bs'])
list_results_sym_bs = load_and_append_results(file_paths_sym_bs, ['q3_q1', 'k', 'bs'])

plot_heatmap(list_results_sym_f1, 'f1', 'F1 Score - Symmetric - DAY')
plot_heatmap(list_results_asym_f1, 'f1', 'F1 Score - Asymmetric - DAY')
plot_heatmap(list_results_sym_roc, 'roc_auc', 'ROC AUC - Symmetric - DAY')
plot_heatmap(list_results_asym_roc, 'roc_auc', 'ROC AUC - Asymmetric - DAY')
plot_heatmap(list_results_sym_cis, 'csi', 'CSI - Symmetric - DAY')
plot_heatmap(list_results_asym_cis, 'csi', 'CSI - Asymmetric - DAY')
plot_heatmap(list_results_sym_bs, 'bs', 'Bias Score - Symmetric - DAY', cmap=sns.diverging_palette(133, 10, as_cmap=True))
plot_heatmap(list_results_asym_bs, 'bs', 'Bias Score - Asymmetric - DAY', cmap=sns.diverging_palette(133, 10, as_cmap=True))

In [None]:
# Define the file paths for symmetric and asymmetric datasets
file_paths_sym_f1 = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_symmetric.pkl' for k in list_k_values]
file_paths_asym_f1 = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_asymmetric.pkl' for k in list_k_values]
file_paths_asym_roc = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_roc_auc_asymmetric.pkl' for k in list_k_values]
file_paths_sym_roc = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_roc_auc_symmetric.pkl' for k in list_k_values]
file_paths_sym_cis = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_csi_symmetric.pkl' for k in list_k_values]
file_paths_asym_cis = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_csi_asymmetric.pkl' for k in list_k_values]
file_paths_sym_bs = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_bs_symmetric.pkl' for k in list_k_values]
file_paths_asym_bs = [f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_bs_asymmetric.pkl' for k in list_k_values]

list_results_sym_f1 = load_and_append_results(file_paths_sym_f1, ['q3_q1', 'k', 'f1_intraday'])
list_results_asym_f1 = load_and_append_results(file_paths_asym_f1, ['q3_q1', 'k', 'f1_intraday'])
list_results_asym_roc = load_and_append_results(file_paths_asym_roc, ['q3_q1', 'k', 'roc_auc_intraday'])
list_results_sym_roc = load_and_append_results(file_paths_sym_roc, ['q3_q1', 'k', 'roc_auc_intraday'])
list_results_asym_cis = load_and_append_results(file_paths_asym_cis, ['q3_q1', 'k', 'csi_intraday'])
list_results_sym_cis = load_and_append_results(file_paths_sym_cis, ['q3_q1', 'k', 'csi_intraday'])
list_results_asym_bs = load_and_append_results(file_paths_asym_bs, ['q3_q1', 'k', 'bs_intraday'])
list_results_sym_bs = load_and_append_results(file_paths_sym_bs, ['q3_q1', 'k', 'bs_intraday'])

plot_heatmap(list_results_sym_f1, 'f1_intraday', 'F1 Score - Symmetric - INTRADAY')
plot_heatmap(list_results_asym_f1, 'f1_intraday', 'F1 Score - Asymmetric - INTRADAY')
plot_heatmap(list_results_sym_roc, 'roc_auc_intraday', 'ROC AUC - Symmetric - INTRADAY')
plot_heatmap(list_results_asym_roc, 'roc_auc_intraday', 'ROC AUC - Asymmetric - INTRADAY')
plot_heatmap(list_results_sym_cis, 'csi_intraday', 'CSI - Symmetric - INTRADAY')
plot_heatmap(list_results_asym_cis, 'csi_intraday', 'CSI - Asymmetric - INTRADAY')
plot_heatmap(list_results_sym_bs, 'bs_intraday', 'Bias Score - Symmetric - INTRADAY', cmap=sns.diverging_palette(133, 10, as_cmap=True))
plot_heatmap(list_results_asym_bs, 'bs_intraday', 'Bias Score - Asymmetric - INTRADAY', cmap=sns.diverging_palette(133, 10, as_cmap=True))

In [None]:
q3_q1 = (0.775, 0.5)
k = 1.4


# Load results for F1 scores
with open(f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_asymmetric.pkl', 'rb') as f:
    data = pickle.load(f)

# Retrieve ramp events by date
ramp_events_by_date = [data[i]['ramp_events_by_intraday'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]

# Plot confusion matrix
f1, roc_auc, cis, bs, fpr, tpr = plot_ramp_confusion_matrix(ramp_events_by_date)


In [None]:
q3_q1 = (0.775, 0.5)
k = 1.4

# Load results for F1 scores
with open(f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_asymmetric.pkl', 'rb') as f:
    data = pickle.load(f)

# Retrieve ramp events by date
ramp_events_by_intraday = [data[i]['ramp_events_by_intraday'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]

from config.ramp_calib_setting import Simulation
import pandas as pd
import pickle
from loguru import logger
from source.utils.file_read import process_and_concat_files 

sim_params = Simulation.testing_period

# process and concatenate files
files = [sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4'], 
            sim_params['file_5'], sim_params['file_6'], sim_params['file_7'], sim_params['file_8'], 
            sim_params['file_9'], sim_params['file_10'], sim_params['file_11'], sim_params['file_12']]

df = process_and_concat_files(files)
df_variability = abs(df['measured'].diff()).to_frame()

# add true positive if "predicted" == 1 and "ramp_event"==1
ramp_events_by_intraday['tp'] = (ramp_events_by_intraday['predicted_ramps'] == 1) & (ramp_events_by_intraday['ramp_events'] == 1)
# add false positive if "predicted" == 1 and "ramp_event"==0
ramp_events_by_intraday['fp'] = (ramp_events_by_intraday['predicted_ramps'] == 1) & (ramp_events_by_intraday['ramp_events'] == 0)
# add true negative if "predicted" == 0 and "ramp_event"==0
ramp_events_by_intraday['tn'] = (ramp_events_by_intraday['predicted_ramps'] == 0) & (ramp_events_by_intraday['ramp_events'] == 0)
# add false negative if "predicted" == 0 and "ramp_event"==1
ramp_events_by_intraday['fn'] = (ramp_events_by_intraday['predicted_ramps'] == 0) & (ramp_events_by_intraday['ramp_events'] == 1)

# groupby 8-hours and compute mean
df_variability_intraday = df_variability.groupby(pd.Grouper(freq='8h')).mean()

# # join with ramp_events_by_date by datetime on ramp_events_by_date
ramp_events_by_intraday = ramp_events_by_intraday.set_index('datetime')

df_variability_intraday = df_variability_intraday.join(ramp_events_by_intraday, how='inner')

# stack plot boxplot of variability by ramp event for true positive, false positive, true negative, false negative
df_variability_tp = df_variability_intraday[df_variability_intraday['tp'] == True]
df_variability_fp = df_variability_intraday[df_variability_intraday['fp'] == True]
df_variability_tn = df_variability_intraday[df_variability_intraday['tn'] == True]
df_variability_fn = df_variability_intraday[df_variability_intraday['fn'] == True]

df_variability_tp = df_variability_tp[['measured']]
df_variability_fp = df_variability_fp[['measured']]
df_variability_tn = df_variability_tn[['measured']]
df_variability_fn = df_variability_fn[['measured']]

df_variability_tp['type'] = 'TP'
df_variability_fp['type'] = 'FP'
df_variability_tn['type'] = 'TN'
df_variability_fn['type'] = 'FN'

df_variability = pd.concat([df_variability_tp, df_variability_fp, df_variability_tn, df_variability_fn])

# plot bar chart with standard error
plt.figure(figsize=(15, 7))
sns.boxplot(x='type', y='measured', data=df_variability, whis=1.5)
plt.ylabel('Measured Variability')
plt.xlabel('Confusion Matrix Outcame')
plt.title('Measured Variability by Confusion Matrix Outcame')
plt.show()


In [None]:
q3_q1 = (0.775, 0.5)
k = 1.4

# Load results for F1 scores
with open(f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_f1_asymmetric.pkl', 'rb') as f:
    data = pickle.load(f)

# Retrieve ramp events by date
ramp_events_by_intraday = [data[i]['ramp_events_by_intraday'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]
# Retrieve ramp alarm days
list_ramp_alarm_days = [data[i]['alarms_days'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]

from config.ramp_calib_setting import Simulation
import pandas as pd
import pickle
from loguru import logger
from source.utils.file_read import process_and_concat_files 

sim_params = Simulation.testing_period

sim_params = Simulation.testing_period

# process and concatenate files
files = [sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4'], 
            sim_params['file_5'], sim_params['file_6'], sim_params['file_7'], sim_params['file_8'], 
            sim_params['file_9'], sim_params['file_10'], sim_params['file_11'], sim_params['file_12']]

df = process_and_concat_files(files)
df_variability = df['measured'].diff().to_frame()

# add true positive if "predicted" == 1 and "ramp_event"==1
ramp_events_by_intraday['tp'] = (ramp_events_by_intraday['predicted_ramps'] == 1) & (ramp_events_by_intraday['ramp_events'] == 1)
# add false positive if "predicted" == 1 and "ramp_event"==0
ramp_events_by_intraday['fp'] = (ramp_events_by_intraday['predicted_ramps'] == 1) & (ramp_events_by_intraday['ramp_events'] == 0)
# add true negative if "predicted" == 0 and "ramp_event"==0
ramp_events_by_intraday['tn'] = (ramp_events_by_intraday['predicted_ramps'] == 0) & (ramp_events_by_intraday['ramp_events'] == 0)
# add false negative if "predicted" == 0 and "ramp_event"==1
ramp_events_by_intraday['fn'] = (ramp_events_by_intraday['predicted_ramps'] == 0) & (ramp_events_by_intraday['ramp_events'] == 1)

# groupby 8-hours and compute max and min
df_variability_intraday_max = df_variability.groupby(pd.Grouper(freq='8h')).agg({'measured': 'max'})
df_variability_intraday_min = df_variability.groupby(pd.Grouper(freq='8h')).agg({'measured': 'min'})
df_variability_intraday_max_min = df_variability_intraday_max.join(df_variability_intraday_min, lsuffix='_max', rsuffix='_min')

# # join with ramp_events_by_date by datetime on ramp_events_by_date
ramp_events_by_intraday = ramp_events_by_intraday.set_index('datetime')

df_variability_intraday = df_variability_intraday_max_min.join(ramp_events_by_intraday, how='inner')

df_variability_intraday['measured'] = df_variability_intraday.apply(lambda x: x['measured_max'] if abs(x['measured_max']) > abs(x['measured_min']) else x['measured_min'], axis=1)

# stack plot boxplot of variability by ramp event for true positive, false positive, true negative, false negative
df_variability_tp = df_variability_intraday[df_variability_intraday['tp'] == True]

# concatenate all list_ramp_alarm_days[i][0] for i in range(len(list_ramp_alarm_days))
list_ramp = [list_ramp_alarm_days[i][0] for i in range(len(list_ramp_alarm_days))]
df_is_anomalous = pd.concat([list_ramp_alarm_days[i][0] for i in range(len(list_ramp_alarm_days))])
df_is_anomalous = df_is_anomalous[df_is_anomalous['is_anomalous'] == True]
# get max Q90 and min Q10 for each 8-hour period
df_all = df_is_anomalous.groupby(pd.Grouper(freq='8h')).agg({'Q90': 'max', 'Q10': 'min'})
df_dropna = df_all.dropna()
# join df_variability_tp and df_dropna by datetime on df_variability_tp
df_variability_tp = df_variability_tp.join(df_dropna, how='inner')
df_variability_tp['direction'] = df_variability_tp.apply(lambda x: 'up' if abs(x['Q90']) > abs(x['Q10']) else 'down', axis=1) 
# if "measured" is positive and "direction" is "up" or "measured" is negative and "direction" is "down" then "correct" is True
df_variability_tp['correct'] = (df_variability_tp['measured'] > 0) & (df_variability_tp['direction'] == 'up') | (df_variability_tp['measured'] < 0) & (df_variability_tp['direction'] == 'down')
# count in percentage the number of correct predictions
df_variability_tp['correct'].value_counts(normalize=True) * 100

In [None]:
# Retrieve ramp events by date
ramp_events_by_date = [data[i]['ramp_events_by_date'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]
# Retrieve ramp alarm days
list_ramp_alarm_days = [data[i]['alarms_days'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]
# Retrieve ramp threshold
ramp_threshold = [data[i]['ramp_threshold'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]

# Load results for ROC AUC scores
with open(f'/Users/gio/Desktop/Elia-RES-Forecasting/results/box/{str_most_recent}/{max_consecutive_points}_{k}_roc_auc_asymmetric.pkl', 'rb') as f:
    data = pickle.load(f)

# Retrieve ROC AUC score
roc_auc = [data[i]['roc_auc'] for i in range(len(data)) if data[i]['q3_q1'] == q3_q1][0]

df_ramp_events_detected, list_mean_distance = plot_ramp_events(ramp_events_by_date, 
                                                            list_ramp_alarm_days, 
                                                            ramp_threshold, 
                                                            plot_results=plot_results, 
                                                            max_consecutive_points=max_consecutive_points, 
                                                            plot_prediction=plot_prediction, 
                                                            plot_iqw=plot_iqw, 
                                                            cluster_color = cluster_color,
                                                            intraday=False)

In [None]:
if len(list_mean_distance) !=0:
    df_distance = pd.DataFrame(list_mean_distance)
    df_distance.columns = ['mean_distance_per_day', 'num_ramp', 'num_clusters']
    print(' ')
    print(df_distance)
    print('')
    print('Intraday Analysis: 8-hours time window')
    print('Error Phase', df_distance['mean_distance_per_day'].mean())
    print('Tot Number of Ramps Detected', df_distance['num_clusters'].sum())
    print('Tot Number of Ramps Observed', df_distance['num_ramp'].sum())
    # number of days with at least one ramp event
    print('Number of days with at least one ramp event', len(df_distance))


In [None]:
df = df_ramp_events_detected[0][0]

#insert a new column with "up" if abs(Q90) > abs(Q10) and "down" otherwise
df['direction'] = df.apply(lambda x: 'up' if abs(x['Q90']) > abs(x['Q10']) else 'down', axis=1) 
df
