In [1]:
import numpy as np
from loguru import logger
import pandas as pd
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.utils.generate_timestamp import generate_timestamps
from source.forecasters.deterministic import create_day_ahead_predictions, create_day_ahead_11_predictions, create_week_ahead_predictions
from source.forecasters.probabilistic import create_day_ahead_quantiles10, create_day_ahead_11_quantiles10, create_week_ahead_quantiles10
from source.forecasters.probabilistic import create_day_ahead_quantiles90, create_day_ahead_11_quantiles90, create_week_ahead_quantiles90

In [2]:
from config.PREDICO_setting import Simulation, WeightedAvg, Stack, Buyer

sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params
buyer_params = Buyer.params

# Read Files

In [3]:
# set random seed
np.random.seed(42)

files = [sim_params['file_0'], sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4']]
df = process_and_concat_files(files)

# get the maximum capacity
maximum_capacity = df.measured.max()

df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# Forecasters

In [4]:
# loss quantile gradient boosting regressor
lst_rmse_gbr_ensemble = []
# loss equal weights scheme
lst_rmse_equal_weights = []
# loss weighted average scheme
lst_rmse_weighted_avg = []
# loss baseline day ahead
lst_rmse_baseline_dayahead = []
# loss baseline day ahead 11
lst_rmse_baseline_dayahead11h = []
# loss baseline week ahead
lst_rmse_baseline_week_ahead = []

# loss var gradient boosting regressor
lst_rmse_var_gbr_ensemble = []
# loss var equal weights scheme
lst_rmse_var_equal_weights = []
# loss var weighted average scheme
lst_rmse_var_weighted_avg = []
# loss var baseline day ahead
lst_rmse_var_baseline_dayahead = []
# loss var baseline day ahead 11
lst_rmse_var_baseline_dayahead11h = []
# loss var baseline week ahead
lst_rmse_var_baseline_week_ahead = []

# loss quantile gradient boosting regressor
lst_pb_gbr_ensemble_q10 = []
lst_pb_gbr_ensemble_q90 = []
# loss equal weights scheme
lst_pb_weighted_avg_q10 = []
lst_pb_weighted_avg_q90 = []
# loss weighted average scheme
lst_pb_equal_weights_q10 = []
lst_pb_equal_weights_q90 = []
# loss baseline day ahead
lst_pb_dayahead_q10 = []
lst_pb_dayahead_q90  = []
# loss baseline day ahead 11
lst_pb_dayahead_11h_q10 = []
lst_pb_dayahead_11h_q90 = []
# loss baseline week ahead
lst_pb_week_ahead_q10 = []
lst_pb_week_ahead_q90 = []


i = 0  # index of the testing period

# generate timestamps train and prediction
start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

logger.info(' ')
logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
df_test = df_filtered[df_filtered.index.to_series().between(start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]

logger.info(' ')
logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

# forecaster - day ahead forecast
df_day_ahead_pred_train = create_day_ahead_predictions(df_train)
df_day_ahead_pred_test = create_day_ahead_predictions(df_test)

# forecaster - day ahead 11 forecast
df_day_ahead11_pred_train = create_day_ahead_11_predictions(df_train)
df_day_ahead11_pred_test = create_day_ahead_11_predictions(df_test)

# forecaster - week ahead forecast
df_week_ahead_pred_train = create_week_ahead_predictions(df_train)
df_week_ahead_pred_test = create_week_ahead_predictions(df_test)

# forecaster - day ahead quantile-10
df_day_ahead_q10_train = create_day_ahead_quantiles10(df_train)
df_day_ahead_q10_test = create_day_ahead_quantiles10(df_test)

# forecaster - day ahead 11 quantile-10
df_day_ahead11_q10_train = create_day_ahead_11_quantiles10(df_train)
df_day_ahead11_q10_test = create_day_ahead_11_quantiles10(df_test)

# forecaster - week ahead quantile-10
df_week_ahead_q10_train = create_week_ahead_quantiles10(df_train)
df_week_ahead_q10_test = create_week_ahead_quantiles10(df_test)

# forecaster - day ahead quantile-90
df_day_ahead_q90_train = create_day_ahead_quantiles90(df_train)
df_day_ahead_q90_test = create_day_ahead_quantiles90(df_test)

# forecaster - day ahead 11 quantile-90
df_day_ahead11_q90_train = create_day_ahead_11_quantiles90(df_train)
df_day_ahead11_q90_test = create_day_ahead_11_quantiles90(df_test)

# forecaster - week ahead quantile-90
df_week_ahead_q90_train = create_week_ahead_quantiles90(df_train)
df_week_ahead_q90_test = create_week_ahead_quantiles90(df_test)

[32m2024-06-25 19:07:15.404[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1m [0m
[32m2024-06-25 19:07:15.405[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m53[0m - [1m[34m-------------------------------------------------------------------------------------------[0m[1m[0m
[32m2024-06-25 19:07:15.405[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m54[0m - [1m[34mStart prediction: 2023-02-20 00:00:00+00:00 - End prediction: 2023-02-21 00:00:00+00:00[0m[1m[0m
[32m2024-06-25 19:07:15.408[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m [0m
[32m2024-06-25 19:07:15.408[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m60[0m - [1m[34m -----------------> Forecasters prediction submitted [0m[1m[0m


# create buyer target

In [5]:
df_train = pd.DataFrame(df_train['measured'])
df_test = pd.DataFrame(df_test['measured'])

df_test_nan = df_test.copy()
df_test_nan['measured'] = [None for i in range(len(df_test))]
forecast_range = pd.date_range(start_prediction_timestamp, end_prediction_timestamp,freq='15min')
df_buyer = pd.concat([df_train, df_test_nan], axis=0)
df_buyer['b1r1'] = df_buyer['measured']
df_buyer.drop(columns=['measured'], inplace=True)
df_buyer.index = pd.to_datetime(df_buyer.index, format = '%Y-%m-%d %H:%M:%S', utc=True)
#df_buyer.to_csv('measurements.csv')

  df_buyer = pd.concat([df_train, df_test_nan], axis=0)


# create market features

In [6]:
df_train_ensemble_quantile50 = pd.concat([df_day_ahead_pred_train, df_day_ahead11_pred_train, df_week_ahead_pred_train], axis=1)
df_test_ensemble_quantile50 = pd.concat([df_day_ahead_pred_test, df_day_ahead11_pred_test, df_week_ahead_pred_test], axis=1)
df_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_test_ensemble_quantile50], axis=0)

df_train_ensemble_quantile10 = pd.concat([df_day_ahead_q10_train, df_day_ahead11_q10_train, df_week_ahead_q10_train], axis=1)
df_test_ensemble_quantile10 = pd.concat([df_day_ahead_q10_test, df_day_ahead11_q10_test, df_week_ahead_q10_test], axis=1)
df_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_test_ensemble_quantile10], axis=0)

df_train_ensemble_quantile90 = pd.concat([df_day_ahead_q90_train, df_day_ahead11_q90_train, df_week_ahead_q90_train], axis=1)
df_test_ensemble_quantile90 = pd.concat([df_day_ahead_q90_test, df_day_ahead11_q90_test, df_week_ahead_q90_test], axis=1)
df_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_test_ensemble_quantile90], axis=0)

df_ensemble_quantile50.columns = ['s1_q50_b1r1', 's2_q50_b1r1', 's3_q50_b1r1']
df_ensemble_quantile10.columns = ['s1_q10_b1r1', 's2_q10_b1r1', 's3_q10_b1r1']
df_ensemble_quantile90.columns = ['s1_q90_b1r1', 's2_q90_b1r1', 's3_q90_b1r1']

df_market = pd.concat([df_ensemble_quantile50, df_ensemble_quantile10, df_ensemble_quantile90], axis=1)
df_market.index = pd.to_datetime(df_market.index, format = '%Y-%m-%d %H:%M:%S', utc=True)
#df_market.to_csv('forecasts.csv')

# PREDICO ML engine

In [7]:
from source.ml_engine import create_ensemble_forecasts


forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                    df_buyer=df_buyer, 
                                    df_market=df_market,
                                    forecast_range=forecast_range,
                                    challenge_usecase=buyer_params['challenge_usecase'],
                                    simulation=False)

assert len(forecasts) == 288, 'The number of forecasts is not correct'

[32m2024-06-25 19:07:16.485[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m40[0m - [1m  [0m
[32m2024-06-25 19:07:16.486[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m41[0m - [1m[38;2;250;128;114m PREDICO Machine Learning Engine [0m[1m [0m
[32m2024-06-25 19:07:16.486[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m42[0m - [1m  [0m
[32m2024-06-25 19:07:16.486[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m43[0m - [1m[38;2;250;128;114m Predictions from 2023-02-20 00:00:00+00:00 to 2023-02-21 00:00:00+00:00 [0m[1m [0m
[32m2024-06-25 19:07:16.486[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m44[0m - [1m  [0m
[32m2024-06-25 19:07:16.487[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m46[0m - [1m[38;2;25

In [8]:
import pickle

buyer_resource_name = 'b1r1'

with open('info_model/b1r1_previous_day.pickle', 'rb') as previous_day_file:
    results = pickle.load(previous_day_file)

previous_day_lt = pd.to_datetime(results['previous_lt']) - pd.DateOffset(days=1)
current_day_lt = previous_day_lt + pd.DateOffset(days=1)

previous_date = previous_day_lt.date().strftime('%Y-%m-%d')
current_day = (current_day_lt - pd.DateOffset(days=1)).strftime('%Y-%m-%d')
assert previous_date == current_day, 'The date is not correct'

# create the forecast range for the previous day
previous_day_forecast_range = pd.date_range(start=previous_day_lt, end=current_day_lt, freq='15min').strftime('%Y-%m-%d %H:%M')[:-1]
assert len(previous_day_forecast_range) == 96, f'The number of timestamps is not correct {len(previous_day_forecast_range)}'

# get the buyer measurements for the previous day
y_test = df_buyer[df_buyer.index.isin(pd.to_datetime(previous_day_forecast_range, utc=True))].values
# assert nans are not present
assert np.isnan(y_test).sum() == 0, 'There are nans in the buyer measurements'
assert y_test.shape[0] == 96, 'The number of buyer measurements is not correct'

In [9]:
from source.assessment_contributions import compute_forecasters_contributions

#y_test = df_test['measured'].values
results_contributions = compute_forecasters_contributions(buyer_resource_name, ens_params, y_test, previous_day_forecast_range)


[32m2024-06-25 19:07:22.066[0m | [1mINFO    [0m | [36msource.assessment_contributions[0m:[36mcompute_forecasters_contributions[0m:[36m8[0m - [1mLoad model info from file: ./info_model/b1r1_previous_day.pickle[0m
[32m2024-06-25 19:07:22.075[0m | [1mINFO    [0m | [36msource.assessment_contributions[0m:[36mcompute_forecasters_contributions[0m:[36m10[0m - [1mGet the contributions for the buyer resource: b1r1[0m
[32m2024-06-25 19:07:22.076[0m | [1mINFO    [0m | [36msource.ensemble.stack_generalization.test_importance.first_stage_importance[0m:[36mwind_power_importance[0m:[36m55[0m - [1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[

df_2stage_test_permuted                            predictions  ...  predictions_t-6
datetime                                ...                 
2023-02-19 00:00:00+00:00     0.002640  ...         0.000000
2023-02-19 00:15:00+00:00     0.000000  ...         0.000000
2023-02-19 00:30:00+00:00     0.000000  ...        -0.007709
2023-02-19 00:45:00+00:00     0.000000  ...        -0.000665
2023-02-19 01:00:00+00:00     0.000000  ...        -0.000243
...                                ...  ...              ...
2023-02-19 22:45:00+00:00     0.000000  ...         0.027678
2023-02-19 23:00:00+00:00    -0.003232  ...         0.140722
2023-02-19 23:15:00+00:00    -0.000813  ...         0.047618
2023-02-19 23:30:00+00:00     0.000807  ...         0.045542
2023-02-19 23:45:00+00:00     0.000000  ...         0.011979

[96 rows x 8 columns]
permutation_score 0.00600780242294192
base_score 0.00600780242294192

df_2stage_test_permuted                            predictions  ...  predictions_t-6
datet

[32m2024-06-25 19:07:30.047[0m | [1mINFO    [0m | [36msource.ensemble.stack_generalization.test_importance.second_stage_importance[0m:[36mwind_power_ramp_importance[0m:[36m94[0m - [1m[34mQuantile: 0.9[0m[1m[0m


df_2stage_test_permuted                            predictions  ...  predictions_t-6
datetime                                ...                 
2023-02-19 00:00:00+00:00     0.002640  ...         0.000000
2023-02-19 00:15:00+00:00     0.000000  ...         0.000000
2023-02-19 00:30:00+00:00     0.000000  ...        -0.007709
2023-02-19 00:45:00+00:00     0.000000  ...        -0.000665
2023-02-19 01:00:00+00:00     0.000000  ...        -0.000243
...                                ...  ...              ...
2023-02-19 22:45:00+00:00     0.000000  ...         0.027678
2023-02-19 23:00:00+00:00    -0.003232  ...         0.140722
2023-02-19 23:15:00+00:00    -0.000813  ...         0.047618
2023-02-19 23:30:00+00:00     0.000807  ...         0.045542
2023-02-19 23:45:00+00:00     0.000000  ...         0.011979

[96 rows x 8 columns]
permutation_score 0.00600780242294192
base_score 0.00600780242294192

df_2stage_test_permuted                            predictions  ...  predictions_t-6
datet

[32m2024-06-25 19:07:31.256[0m | [1mINFO    [0m | [36msource.ensemble.stack_generalization.test_importance.second_stage_importance[0m:[36mwind_power_ramp_importance[0m:[36m94[0m - [1m[34mQuantile: 0.5[0m[1m[0m


df_2stage_test_permuted                            predictions  ...  predictions_t-6
datetime                                ...                 
2023-02-19 00:00:00+00:00     0.002640  ...         0.000000
2023-02-19 00:15:00+00:00     0.000000  ...         0.000000
2023-02-19 00:30:00+00:00     0.000000  ...        -0.007709
2023-02-19 00:45:00+00:00     0.000000  ...        -0.000665
2023-02-19 01:00:00+00:00     0.000000  ...        -0.000243
...                                ...  ...              ...
2023-02-19 22:45:00+00:00     0.000000  ...         0.027678
2023-02-19 23:00:00+00:00    -0.003232  ...         0.140722
2023-02-19 23:15:00+00:00    -0.000813  ...         0.047618
2023-02-19 23:30:00+00:00     0.000807  ...         0.045542
2023-02-19 23:45:00+00:00     0.000000  ...         0.011979

[96 rows x 8 columns]
permutation_score 0.0010267492719182433
base_score 0.0010267492719182433

df_2stage_test_permuted                            predictions  ...  predictions_t-6
d

In [10]:
results_contributions

defaultdict(dict,
            {'wind_power': {0.1: {'s1': 0.5591802888226757,
               's2': 0.44008723739131594,
               's3': 0.0007324737860083989},
              0.9: {'s1': 0.0, 's2': 0.0, 's3': 1.0},
              0.5: {'s1': 0.008912671038444484,
               's2': 0.9910873289615556,
               's3': 0.0}},
             'wind_power_ramp': {0.1: {'s1': 0.0, 's2': 0.0, 's3': 0.0},
              0.9: {'s1': 0.0, 's2': 0.0, 's3': 0.0},
              0.5: {'s1': 0.0, 's2': 0.0, 's3': 0.0}}})