In [10]:
import numpy as np
from loguru import logger
import pandas as pd
from source.utils.file_read import process_and_concat_files 
from source.utils.file_read import filter_df
from source.utils.generate_timestamp import generate_timestamps
from source.forecasters.deterministic import create_day_ahead_predictions, create_day_ahead_11_predictions, create_week_ahead_predictions
from source.forecasters.probabilistic import create_day_ahead_quantiles10, create_day_ahead_11_quantiles10, create_week_ahead_quantiles10
from source.forecasters.probabilistic import create_day_ahead_quantiles90, create_day_ahead_11_quantiles90, create_week_ahead_quantiles90

In [11]:
from config.PREDICO_setting import Simulation, WeightedAvg, Stack, Buyer

sim_params = Simulation.testing_period
weight_avg_params = WeightedAvg.params
ens_params = Stack.params
buyer_params = Buyer.params

# Read Files

In [12]:
# set random seed
np.random.seed(42)

files = [sim_params['file_0'], sim_params['file_1'], sim_params['file_2'], sim_params['file_3'], sim_params['file_4']]
df = process_and_concat_files(files)

# get the maximum capacity
maximum_capacity = df.measured.max()

df_filtered = filter_df(df, sim_params['forecasts_col'], sim_params['measured_col'])

# Forecasters

In [13]:
# loss quantile gradient boosting regressor
lst_rmse_gbr_ensemble = []
# loss equal weights scheme
lst_rmse_equal_weights = []
# loss weighted average scheme
lst_rmse_weighted_avg = []
# loss baseline day ahead
lst_rmse_baseline_dayahead = []
# loss baseline day ahead 11
lst_rmse_baseline_dayahead11h = []
# loss baseline week ahead
lst_rmse_baseline_week_ahead = []

# loss var gradient boosting regressor
lst_rmse_var_gbr_ensemble = []
# loss var equal weights scheme
lst_rmse_var_equal_weights = []
# loss var weighted average scheme
lst_rmse_var_weighted_avg = []
# loss var baseline day ahead
lst_rmse_var_baseline_dayahead = []
# loss var baseline day ahead 11
lst_rmse_var_baseline_dayahead11h = []
# loss var baseline week ahead
lst_rmse_var_baseline_week_ahead = []

# loss quantile gradient boosting regressor
lst_pb_gbr_ensemble_q10 = []
lst_pb_gbr_ensemble_q90 = []
# loss equal weights scheme
lst_pb_weighted_avg_q10 = []
lst_pb_weighted_avg_q90 = []
# loss weighted average scheme
lst_pb_equal_weights_q10 = []
lst_pb_equal_weights_q90 = []
# loss baseline day ahead
lst_pb_dayahead_q10 = []
lst_pb_dayahead_q90  = []
# loss baseline day ahead 11
lst_pb_dayahead_11h_q10 = []
lst_pb_dayahead_11h_q90 = []
# loss baseline week ahead
lst_pb_week_ahead_q10 = []
lst_pb_week_ahead_q90 = []


i = 0  # index of the testing period

# generate timestamps train and prediction
start_training_timestamp, end_training_timestamp, start_prediction_timestamp, end_prediction_timestamp = generate_timestamps(sim_params['start_training'], i, sim_params['window_size'])

logger.info(' ')
logger.opt(colors = True).info('<blue>-------------------------------------------------------------------------------------------</blue>')
logger.opt(colors = True).info(f'<blue>Start prediction: {start_prediction_timestamp} - End prediction: {end_prediction_timestamp}</blue>')

df_train = df_filtered[df_filtered.index.to_series().between(start_training_timestamp, end_training_timestamp)].iloc[:-1,:]
df_test = df_filtered[df_filtered.index.to_series().between(start_prediction_timestamp, end_prediction_timestamp)].iloc[:-1,:]

logger.info(' ')
logger.opt(colors = True).info('<blue> -----------------> Forecasters prediction submitted </blue>')

# forecaster - day ahead forecast
df_day_ahead_pred_train = create_day_ahead_predictions(df_train)
df_day_ahead_pred_test = create_day_ahead_predictions(df_test)

# forecaster - day ahead 11 forecast
df_day_ahead11_pred_train = create_day_ahead_11_predictions(df_train)
df_day_ahead11_pred_test = create_day_ahead_11_predictions(df_test)

# forecaster - week ahead forecast
df_week_ahead_pred_train = create_week_ahead_predictions(df_train)
df_week_ahead_pred_test = create_week_ahead_predictions(df_test)

# forecaster - day ahead quantile-10
df_day_ahead_q10_train = create_day_ahead_quantiles10(df_train)
df_day_ahead_q10_test = create_day_ahead_quantiles10(df_test)

# forecaster - day ahead 11 quantile-10
df_day_ahead11_q10_train = create_day_ahead_11_quantiles10(df_train)
df_day_ahead11_q10_test = create_day_ahead_11_quantiles10(df_test)

# forecaster - week ahead quantile-10
df_week_ahead_q10_train = create_week_ahead_quantiles10(df_train)
df_week_ahead_q10_test = create_week_ahead_quantiles10(df_test)

# forecaster - day ahead quantile-90
df_day_ahead_q90_train = create_day_ahead_quantiles90(df_train)
df_day_ahead_q90_test = create_day_ahead_quantiles90(df_test)

# forecaster - day ahead 11 quantile-90
df_day_ahead11_q90_train = create_day_ahead_11_quantiles90(df_train)
df_day_ahead11_q90_test = create_day_ahead_11_quantiles90(df_test)

# forecaster - week ahead quantile-90
df_week_ahead_q90_train = create_week_ahead_quantiles90(df_train)
df_week_ahead_q90_test = create_week_ahead_quantiles90(df_test)

[32m2024-06-21 11:44:38.621[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1m [0m
[32m2024-06-21 11:44:38.622[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m53[0m - [1m[34m-------------------------------------------------------------------------------------------[0m[1m[0m
[32m2024-06-21 11:44:38.622[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m54[0m - [1m[34mStart prediction: 2023-02-23 00:00:00+00:00 - End prediction: 2023-02-24 00:00:00+00:00[0m[1m[0m
[32m2024-06-21 11:44:38.625[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1m [0m
[32m2024-06-21 11:44:38.625[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m60[0m - [1m[34m -----------------> Forecasters prediction submitted [0m[1m[0m


# create buyer target

In [14]:
df_train = pd.DataFrame(df_train['measured'])
df_test = pd.DataFrame(df_test['measured'])
df_test['measured'] = [None for i in range(len(df_test))]
forecast_range = pd.date_range(start_prediction_timestamp, end_prediction_timestamp,freq='15min')
df_buyer = pd.concat([df_train, df_test], axis=0)
df_buyer['b1r1'] = df_buyer['measured']
df_buyer.drop(columns=['measured'], inplace=True)
df_buyer.index = pd.to_datetime(df_buyer.index, format = '%Y-%m-%d %H:%M:%S', utc=True)
#df_buyer.to_csv('measurements.csv')

  df_buyer = pd.concat([df_train, df_test], axis=0)


In [15]:
df_buyer.index < forecast_range[0]

array([ True,  True,  True, ..., False, False, False])

# create market features

In [16]:
df_train_ensemble_quantile50 = pd.concat([df_day_ahead_pred_train, df_day_ahead11_pred_train, df_week_ahead_pred_train], axis=1)
df_test_ensemble_quantile50 = pd.concat([df_day_ahead_pred_test, df_day_ahead11_pred_test, df_week_ahead_pred_test], axis=1)
df_ensemble_quantile50 = pd.concat([df_train_ensemble_quantile50, df_test_ensemble_quantile50], axis=0)

df_train_ensemble_quantile10 = pd.concat([df_day_ahead_q10_train, df_day_ahead11_q10_train, df_week_ahead_q10_train], axis=1)
df_test_ensemble_quantile10 = pd.concat([df_day_ahead_q10_test, df_day_ahead11_q10_test, df_week_ahead_q10_test], axis=1)
df_ensemble_quantile10 = pd.concat([df_train_ensemble_quantile10, df_test_ensemble_quantile10], axis=0)

df_train_ensemble_quantile90 = pd.concat([df_day_ahead_q90_train, df_day_ahead11_q90_train, df_week_ahead_q90_train], axis=1)
df_test_ensemble_quantile90 = pd.concat([df_day_ahead_q90_test, df_day_ahead11_q90_test, df_week_ahead_q90_test], axis=1)
df_ensemble_quantile90 = pd.concat([df_train_ensemble_quantile90, df_test_ensemble_quantile90], axis=0)

df_ensemble_quantile50.columns = ['s1_q50_b1r1', 's2_q50_b1r1', 's3_q50_b1r1']
df_ensemble_quantile10.columns = ['s1_q10_b1r1', 's2_q10_b1r1', 's3_q10_b1r1']
df_ensemble_quantile90.columns = ['s1_q90_b1r1', 's2_q90_b1r1', 's3_q90_b1r1']

df_market = pd.concat([df_ensemble_quantile50, df_ensemble_quantile10, df_ensemble_quantile90], axis=1)
df_market.index = pd.to_datetime(df_market.index, format = '%Y-%m-%d %H:%M:%S', utc=True)
#df_market.to_csv('forecasts.csv')

# PREDICO ML engine

In [17]:
from source.ml_engine import create_ensemble_forecasts


forecasts = create_ensemble_forecasts(ens_params=ens_params,
                                    df_buyer=df_buyer, 
                                    df_market=df_market,
                                    forecast_range=forecast_range,
                                    challenge_usecase=buyer_params['challenge_usecase'],
                                    simulation=False)

assert len(forecasts) == 288, 'The number of forecasts is not correct'

[32m2024-06-21 11:44:38.863[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m37[0m - [1m  [0m
[32m2024-06-21 11:44:38.864[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m38[0m - [1m[38;2;250;128;114m PREDICO Machine Learning Engine [0m[1m [0m
[32m2024-06-21 11:44:38.864[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m39[0m - [1m  [0m
[32m2024-06-21 11:44:38.864[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m40[0m - [1m[38;2;250;128;114m Predictions from 2023-02-23 00:00:00+00:00 to 2023-02-24 00:00:00+00:00 [0m[1m [0m
[32m2024-06-21 11:44:38.864[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m41[0m - [1m  [0m
[32m2024-06-21 11:44:38.865[0m | [1mINFO    [0m | [36msource.ml_engine[0m:[36mcreate_ensemble_forecasts[0m:[36m43[0m - [1m[38;2;25

In [23]:
forecasts

Unnamed: 0,datetime,variable,value
0,2023-02-23 00:00:00+00:00,q10_b1r1,0.000000
1,2023-02-23 00:15:00+00:00,q10_b1r1,0.000000
2,2023-02-23 00:30:00+00:00,q10_b1r1,0.000000
3,2023-02-23 00:45:00+00:00,q10_b1r1,0.001486
4,2023-02-23 01:00:00+00:00,q10_b1r1,0.003178
...,...,...,...
283,2023-02-23 22:45:00+00:00,q50_b1r1,0.054544
284,2023-02-23 23:00:00+00:00,q50_b1r1,0.033063
285,2023-02-23 23:15:00+00:00,q50_b1r1,0.022686
286,2023-02-23 23:30:00+00:00,q50_b1r1,0.003158


In [21]:
import pickle

buyer_resource_name = 'b1r1'

with open('info_model/b1r1_previous_day.pickle', 'rb') as previous_day_file:
    results = pickle.load(previous_day_file)

previous_day_lt = pd.to_datetime(results['previous_lt']) - pd.DateOffset(days=1)
previous_date = previous_day_lt.date().strftime('%Y-%m-%d')
current_day_lt = previous_day_lt + pd.DateOffset(days=1)
current_day = (current_day_lt - pd.DateOffset(days=1)).strftime('%Y-%m-%d')
assert previous_date == current_day, 'The date is not correct'

# create the forecast range for the previous day
previous_day_forecast_range = pd.date_range(start=previous_day_lt, end=current_day_lt, freq='15min').strftime('%Y-%m-%d %H:%M')[:-1]
assert len(previous_day_forecast_range) == 96, f'The number of timestamps is not correct {len(previous_day_forecast_range)}'

# get the buyer measurements for the previous day
y_test = df_buyer[df_buyer.index.isin(previous_day_forecast_range)].values
# assert nans are not present
assert np.isnan(y_test).sum() == 0, 'There are nans in the buyer measurements'

  y_test = df_buyer[df_buyer.index.isin(previous_day_forecast_range)].values


In [22]:
from source.assessment_contributions import compute_forecasters_contributions

results_contributions = compute_forecasters_contributions(buyer_resource_name, ens_params, y_test, previous_day_forecast_range)
results_contributions

[32m2024-06-21 11:44:40.036[0m | [1mINFO    [0m | [36msource.assessment_contributions[0m:[36mcompute_forecasters_contributions[0m:[36m8[0m - [1mLoad model info from file: ./info_model/b1r1_previous_day.pickle[0m
[32m2024-06-21 11:44:40.037[0m | [1mINFO    [0m | [36msource.assessment_contributions[0m:[36mcompute_forecasters_contributions[0m:[36m10[0m - [1mGet the contributions for the buyer resource: b1r1[0m
[32m2024-06-21 11:44:40.037[0m | [1mINFO    [0m | [36msource.ensemble.stack_generalization.test_importance.first_stage_importance[0m:[36mwind_power_importance[0m:[36m55[0m - [1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[34m--[0m[1m[

defaultdict(dict, {})