In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from loguru import logger
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
from IPython.display import clear_output

from source.utils.file_read import read_csv_file, filter_data, join_dataframes, replace_nan_values
from source.utils.collect_results import create_df_forecaster_first_stage
from source.utils.generate_timestamp import generate_timestamps
from source.simulation.submission_module import submission_forecasters
from source.simulation.buyer_module import prepare_buyer_data
from source.ensemble.combination_scheme.equal_weights import calculate_equal_weights
from source.ensemble.combination_scheme.avg_weights import calculate_weighted_avg
from source.ensemble.combination_scheme.model_selection import run_model_selection
from source.plots.plot_forecasts import plot_forecasts
from source.ml_engine import create_ensemble_forecasts
from source.simulation.helpers_simulation import process_combination_scheme
from source.utils.session_ml_info import delete_previous_day_pickle

In [2]:
# Configuration settings
from config.simulation_setting import Simulation, WeightedAvg, Stack

sim_params = Simulation.testing_period  # Simulation parameters
weight_avg_params = WeightedAvg.params  # Weighted Average parameters
ens_params = Stack.params  # QRA Ensemble parameters

In [3]:
# set random seed
np.random.seed(sim_params['random_seed'])

# read csv file
df_processed = read_csv_file(sim_params['csv_filename'], sim_params['list_columns'], sim_params['starting_period'], sim_params['ending_period'])

# replace NaN values
if sim_params['replace_nan']:
    df_processed = replace_nan_values(sim_params, df_processed)

# remove previous day pickle file
logger.info(' ')
delete_previous_day_pickle()

# save csv variables
list_csv_to_save = []

# loop over test days
for i in tqdm(range(sim_params['num_test_days']), desc='Testing Days'):

    # generate timestamps train and prediction
    start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

    if i >= ens_params['day_calibration'] and ens_params['conformalized_qr']:
        day_calibration = ens_params['day_calibration']
        start_training_timestamp = start_training_timestamp - pd.Timedelta(f'{day_calibration}day')

    # trimming data for training and testing
    df_train = filter_data(df_processed, start_training_timestamp, end_training_timestamp, string = 'training')
    df_test = filter_data(df_processed, start_prediction_timestamp, end_prediction_timestamp, string = 'testing')

# # ----------------------------> FORECASTERS SUBMISSION <----------------------------

    logger.debug("Forecasters submission ...")
    df_market, df_train, df_test = submission_forecasters(sim_params, df_train, df_test)   

# # ----------------------------> MARKET OPERATOR DATA <----------------------------

    logger.debug("Market operator data ...")
    df_buyer, forecast_range = prepare_buyer_data(df_train, df_test, start_prediction_timestamp, end_prediction_timestamp)

# # ----------------------------> PREDICO PLATFORM ML ENGINE <----------------------------

# # ----------------------------> ENSEMBLE FORECASTS <----------------------------

    logger.debug("Wind Ensemble forecasts ...")
    results_ensemble_forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                                            df_buyer=df_buyer, 
                                                            df_market=df_market,
                                                            end_training_timestamp=end_training_timestamp,
                                                            forecast_range = forecast_range,
                                                            challenge_usecase='simulation',
                                                            simulation=True)
    
    ## ----------------------------> SAVE to CSV <----------------------------
    # wind power
    df_pred_ensemble = results_ensemble_forecasts['wind_power']['predictions']
    df_pred_ensemble.rename(columns={'q50_' + sim_params['buyer_resource_name']: '50_predictions', 
                                        'q10_' + sim_params['buyer_resource_name']: '10_predictions',
                                        'q90_' + sim_params['buyer_resource_name']: '90_predictions', 
                                        'norm_' + sim_params['buyer_resource_name']: 'targets'}, inplace=True)
    # create dataframes
    df_test_ensemble = pd.DataFrame(df_test['measured']) 
    df_test_ensemble.rename(columns={'measured': 'targets'}, inplace=True)
    
    # drop targets column
    df_pred_ensemble_clean = df_pred_ensemble.drop(columns=['targets'], axis=1)

    # list dataframes wind power
    list_df_wind_power = [df_test, df_pred_ensemble_clean]

    if sim_params['baselines_comparison']:

        # # # ----------------------------> COMBINATION SCHEME DATA <----------------------------

        # process data for baselines combination schemes
        logger.debug("Combination scheme data ...")
        df_train_norm, day_previous_df_test_norm, day_previous_df_test_norm_var = process_combination_scheme(df_train, df_test, end_training_timestamp, start_prediction_timestamp)
        
        # # ----------------------------> PERFORMANCE METRICS <----------------------------

        ## ----------------------------> WIND POWER <----------------------------

        # performance best model selection
        logger.debug("Best model selection ...")
        df_best_model = run_model_selection(sim_params, df_train_norm, day_previous_df_test_norm, end_training_timestamp, start_prediction_timestamp, window_size_valid=weight_avg_params['window_size_valid'])
        df_best_model_clean = df_best_model.drop(columns=['targets'], axis=1)
        list_df_wind_power.append(df_best_model_clean)
        
        # performance weighted average
        logger.debug("Weighted average ...")
        df_weighted_avg, dict_weights = calculate_weighted_avg(sim_params, 
                                                            df_train_norm, 
                                                            day_previous_df_test_norm, 
                                                            end_training_timestamp, 
                                                            start_prediction_timestamp, 
                                                            window_size_valid=weight_avg_params['window_size_valid'])
        df_weighted_avg_clean = df_weighted_avg.drop(columns=['targets'], axis=1)
        list_df_wind_power.append(df_weighted_avg_clean)

        # performance weighted avg soft
        logger.debug("Weighted average soft ...")
        df_weighted_avg_soft, dict_weights_soft = calculate_weighted_avg(sim_params, 
                                                                        df_train_norm, 
                                                                        day_previous_df_test_norm, 
                                                                        end_training_timestamp, 
                                                                        start_prediction_timestamp, 
                                                                        window_size_valid=weight_avg_params['window_size_valid'], 
                                                                        norm='softmax')
        df_weighted_avg_soft_clean = df_weighted_avg_soft.drop(columns=['targets'], axis=1)
        list_df_wind_power.append(df_weighted_avg_soft_clean)
        
        # performance equal weights
        logger.debug("Equal weights ...")
        df_equal_weights = calculate_equal_weights(day_previous_df_test_norm, start_prediction_timestamp)
        df_equal_weights_clean = df_equal_weights.drop(columns=['targets'], axis=1)
        list_df_wind_power.append(df_equal_weights_clean)

        # performance malicious cheat
        if sim_params['malicious']:
            logger.debug("Malicious forecaster ...")
            df_malicious = create_df_forecaster_first_stage(day_previous_df_test_norm, 'malicious', start_prediction_timestamp)
            list_df_wind_power.append(df_malicious)

        # performance noisy
        if sim_params['noisy']:
            logger.debug("Noisy forecaster ...")
            df_noisy = create_df_forecaster_first_stage(day_previous_df_test_norm, 'noisy', start_prediction_timestamp)
            list_df_wind_power.append(df_noisy)

        # plot forecasts
        if ens_params['plt_wind_power_ensemble']:
            plot_forecasts(df_pred_ensemble, df_test_ensemble, list_wind_ramps=[], title=f'Wind Power Forecasting')

    # join dataframes wind power forecasters baseline
    df_csv_wind_power = join_dataframes(*list_df_wind_power)
    list_csv_to_save.append(df_csv_wind_power)

    #Clear output
    clear_output(wait=True)

    # import time
    # time.sleep(30)

Testing Days:  54%|█████▍    | 1089/2000 [1:04:43<38:54,  2.56s/it][32m2024-12-15 14:17:50.445[0m | [1mINFO    [0m | [36msource.utils.generate_timestamp[0m:[36mgenerate_timestamps[0m:[36m12[0m - [1m [0m
[32m2024-12-15 14:17:50.446[0m | [1mINFO    [0m | [36msource.utils.generate_timestamp[0m:[36mgenerate_timestamps[0m:[36m13[0m - [1m[34m-------------------------------------------------------------------------------------------[0m[1m[0m
[32m2024-12-15 14:17:50.446[0m | [1mINFO    [0m | [36msource.utils.generate_timestamp[0m:[36mgenerate_timestamps[0m:[36m14[0m - [1m[34mStart training: 2023-12-26 00:00:00+00:00 - End training: 2024-01-25 00:00:00+00:00[0m[1m[0m
[32m2024-12-15 14:17:50.446[0m | [1mINFO    [0m | [36msource.utils.generate_timestamp[0m:[36mgenerate_timestamps[0m:[36m15[0m - [1m[34m-------------------------------------------------------------------------------------------[0m[1m[0m
[32m2024-12-15 14:17:50.446[0m | [1mI

AssertionError: Test dataframe must have 96 rows

In [4]:
# # ----------------------------> SAVE TO CSV <----------------------------
# from list to dataframe
if sim_params['most_recent']:
    df_csv = pd.concat(list_csv_to_save).to_csv('wp_forecasters_comparison_results.csv')
else:
    df_csv = pd.concat(list_csv_to_save).to_csv('wp_forecasters_comparison_results_no_mostrecent.csv')

In [None]:
# # ----------------------------> PLOT FORECASTS <----------------------------
df_csv[['measured', '10_predictions', '50_predictions', '90_predictions']].iloc[:2000].plot(figsize=(20,10))