In [None]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv("PATH_CURRENT"))

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from loguru import logger
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.simulation.submission_module import submission_forecasters
from source.simulation.buyer_module import prepare_buyer_data
from source.utils.generate_timestamp import generate_timestamps
from source.ml_engine import create_ensemble_forecasts
from source.simulation.helpers_simulation import process_combination_scheme
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
from IPython.display import clear_output

In [2]:
from config.ramp_calib_setting import Simulation, WeightedAvg, Stack
from source.utils.session_ml_info import delete_previous_day_pickle
sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params

In [3]:
# set random seed
np.random.seed(sim_params['random_seed'])

# process and concatenate files
files = [sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4'], 
            sim_params['file_5'], sim_params['file_6'], sim_params['file_7'], sim_params['file_8'], 
            sim_params['file_9'], sim_params['file_10'], sim_params['file_11'], sim_params['file_12']]

logger.info(' ')
logger.info(f'Load Files: {files}')

df = process_and_concat_files(files)

# filter data forecasters
df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# replace NaN values
if sim_params['replace_nan']:
    logger.info(' ')
    logger.warning("Replacing NaN values with 0s")
    print(df_filtered.isna().sum())
    df_filtered.fillna(0, inplace=True)

# set buyer resource name
buyer_resource_name = 'b1r1'

# remove previous day pickle file
logger.info(' ')
delete_previous_day_pickle()
logger.opt(colors = True).warning('previous day pickle file removed')

# initialize lists to store results
list_pred_var_results = []  # boxplot
list_lof_results = []  # local outlier factor

# loop over test days
for i in tqdm(range(sim_params['num_test_days']), desc='Testing Days'):

    # generate timestamps train and prediction
    start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

    logger.info(' ')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors=True).info(f'<blue>Start training: {start_training_timestamp} - End training: {end_training_timestamp}</blue>')
    logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
    logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

    day_previous_start_prediction_timestamp = start_prediction_timestamp - pd.Timedelta('1day')
    df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
    df_test = df_filtered[df_filtered.index.to_series().between(day_previous_start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]
                                                                                                                            
    logger.info(' ')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of training data: {len(df_train)} </blue>')
    logger.opt(colors = True).info(f'<blue> -----------------> Length of test data: {len(df_test)} </blue>')

    logger.info(' ')
    logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

# # ----------------------------> FORECASTERS PREDICTION SUBMISSION <----------------------------

    df_market, df_train, df_test = submission_forecasters(sim_params, df_train, df_test)  

# # ----------------------------> target DATA <----------------------------

    df_buyer, forecast_range = prepare_buyer_data(df_train, df_test, start_prediction_timestamp, end_prediction_timestamp)

# # ----------------------------> PREDICO PLATFORM ML ENGINE <----------------------------

# # ----------------------------> ENSEMBLE FORECASTS <----------------------------

    results_ensemble_forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                                            df_buyer=df_buyer, 
                                                            df_market=df_market,
                                                            end_training_timestamp=end_training_timestamp,
                                                            forecast_range = forecast_range,
                                                            challenge_usecase='simulation',
                                                            simulation=True)
    
    # # ----------------------------> TARGET VARIABILITY<----------------------------
    df_train_norm, day_previous_df_test_norm, day_previous_df_test_norm_var = process_combination_scheme(df_train, df_test, end_training_timestamp, day_previous_start_prediction_timestamp)

    # drop 'norm_measured' column
    df_train_norm = df_train_norm.drop(columns=['norm_measured'])
    day_previous_df_test_norm = day_previous_df_test_norm.drop(columns=['norm_measured'])

    # get last 96 values of the day_previous_df_test_norm
    df_test_norm = day_previous_df_test_norm.iloc[-96:]
    target_variability = day_previous_df_test_norm_var['norm_measured'].values[-96:]

    # Predictions Insample and Outsample retrieved from the ensemble forecasts
    pred_var_insample = results_ensemble_forecasts['wind_power_variability']['predictions_insample']
    pred_var_outsample = results_ensemble_forecasts['wind_power_variability']['predictions_outsample']


# # ----------------------------> SAVE RESULTS <----------------------------

## ----------------------------> BoxPlot <----------------------------

    if sim_params['boxplot'] or sim_params['kde']:

        list_pred_var_results.append({"pred_var_in": pred_var_insample, 
                                        "pred_var_out": pred_var_outsample,
                                        "target_variability": target_variability, 
                                        "forecast_range": forecast_range, 
                                        "df_train": df_train})
    
## ----------------------------> Local Outlier Factor <----------------------------

    if sim_params['lof']:

        list_lof_results.append({"pred_var_in": pred_var_insample, 
                                        "pred_var_out": pred_var_outsample,
                                        "df_train_norm": df_train_norm,
                                        "df_test_norm": df_test_norm,
                                        "target_variability": target_variability, 
                                        "forecast_range": forecast_range, 
                                        "df_train": df_train})

    #Clear output
    clear_output(wait=True)

Testing Days: 100%|██████████| 300/300 [16:44<00:00,  3.35s/it]


In [4]:
import pickle

# save results prediction variability as pickle file
logger.info(' ')
logger.opt(colors = True).info('<blue> -----------------> Saving results prediction variability as pickle file </blue>')

# save results prediction variability as pickle file
if sim_params['boxplot']:
    with open('results_pred_var_no_mostrecent.pkl', 'wb') as f:
        pickle.dump(list_pred_var_results, f)
    logger.opt(colors = True).info('<blue> -----------------> results saved </blue>')
    
# save results local outlier factor as pickle file
if sim_params['lof']:
    with open('results_lof_no_mostrecent.pkl', 'wb') as f:
        pickle.dump(list_lof_results, f)
    logger.opt(colors = True).info('<blue> -----------------> results saved </blue>')

[32m2024-09-26 08:27:27.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m [0m
[32m2024-09-26 08:27:27.209[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1m[34m -----------------> Saving results prediction variability as pickle file [0m[1m[0m
[32m2024-09-26 08:27:27.329[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1m[34m -----------------> results saved [0m[1m[0m
[32m2024-09-26 08:27:27.485[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1m[34m -----------------> results saved [0m[1m[0m
