### Data Loading

In [75]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [76]:
%run C:\Users\CTLGNN00C\Documents\GitHub\epftoolbox\epftoolbox\data\_datasets.py
%run C:\Users\CTLGNN00C\Documents\GitHub\epftoolbox\epftoolbox\models\_lear.py

In [77]:
path = "C:/Users/CTLGNN00C/Documents/ENERGY/Progetto MGP"

In [78]:
train_df, test_df = read_data(path, dataset = "FR") # Grouped dataframes (by date)

## Pre-processing

In [79]:
# Check indice iniziale
if train_df.index[0].hour != 0 or test_df.index[0].hour != 0:
    print('Problem with the index')

In [80]:
# Numero di variabili esogene (tutte tranne il prezzo)
n_exogenous_inputs = len(train_df.columns) - 1

In [81]:
# Features: 96 prices + n_exogenous * (24 * 3 exogeneous) + 7 weekday dummies
# Price lags: D-1, D-2, D-3, D-7
# Exogeneous inputs lags: D, D-1, D-7

n_features = 96 + 7 + n_exogenous_inputs * 72

In [82]:
# Dato che compare il lag D-7 dobbiamo scartare almeno la prima settimana

indexTrain = train_df.loc[train_df.index[0] + pd.Timedelta(weeks=1):].index

In [83]:
date_test = None

if date_test is None:
    indexTest = test_df.loc[test_df.index[0] + pd.Timedelta(weeks=1):].index
else:
    indexTest = test_df.loc[date_test:date_test + pd.Timedelta(hours=23)].index


In [84]:
# Seleziono un timestamp ogni 24 -> un valore giornaliero
predDatesTrain = indexTrain.round('1h')[::24]
predDatesTest = indexTest.round('1h')[::24]

In [85]:
# Creiamo due datasets con tante colonne quante ore del giorno

indexTrain = pd.DataFrame(index=predDatesTrain, columns=['h' + str(hour) for hour in range(24)])
indexTest = pd.DataFrame(index=predDatesTest, columns=['h' + str(hour) for hour in range(24)])
for hour in range(24):
    indexTrain.loc[:, 'h' + str(hour)] = indexTrain.index + pd.Timedelta(hours=hour)
    indexTest.loc[:, 'h' + str(hour)] = indexTest.index + pd.Timedelta(hours=hour)

In [108]:
# Preallocamento della memoria
X_train = np.zeros([indexTrain.shape[0], n_features])
X_test = np.zeros([indexTest.shape[0], n_features])
Y_train = np.zeros([indexTrain.shape[0], 24])

In [109]:
feature_index = 0

In [110]:
# Aggiungere i prezzi storici nei giorni D-1, D-2, D-3 e D-7


for hour in range(24):
    # Lag
    for past_day in [1, 2, 3, 7]:

        # definiamo gli indici temporali utilizzando dataframes di appoggio
        pastIndexTrain = pd.to_datetime(indexTrain.loc[:, 'h' + str(hour)]) - pd.Timedelta(hours=24 * past_day)
        pastIndexTest = pd.to_datetime(indexTest.loc[:, 'h' + str(hour)].values) - pd.Timedelta(hours=24 * past_day)

        # Prezzi storici nei giorni passati all'ora h
        X_train[:, feature_index] = train_df.loc[pastIndexTrain, 'Price']
        X_test[:, feature_index] = test_df.loc[pastIndexTest, 'Price']
        feature_index += 1

In [111]:
X_train

array([[36.538, 25.206, 31.041, ...,  0.   ,  0.   ,  0.   ],
       [21.324, 36.538, 25.206, ...,  0.   ,  0.   ,  0.   ],
       [22.727, 21.324, 36.538, ...,  0.   ,  0.   ,  0.   ],
       ...,
       [48.76 , 43.36 , 49.95 , ...,  0.   ,  0.   ,  0.   ],
       [53.48 , 48.76 , 43.36 , ...,  0.   ,  0.   ,  0.   ],
       [38.8  , 53.48 , 48.76 , ...,  0.   ,  0.   ,  0.   ]])

In [112]:
# Variabili esogene nei giorni D, D-1,  D-7

for hour in range(24):
    for past_day in [1, 7]:
        
        for exog in range(1, n_exogenous_inputs + 1):

            # Definying the corresponding past time indexs using the auxiliary dataframses 
            pastIndexTrain = pd.to_datetime(indexTrain.loc[:, 'h' + str(hour)].values) - \
                pd.Timedelta(hours=24 * past_day)
            pastIndexTest = pd.to_datetime(indexTest.loc[:, 'h' + str(hour)].values) - \
                pd.Timedelta(hours=24 * past_day)

            # Including the exogenous input at day D-past_day and hour "h" 
            X_train[:, feature_index] = train_df.loc[pastIndexTrain, 'Exogenous ' + str(exog)]                    
            X_test[:, feature_index] = test_df.loc[pastIndexTest, 'Exogenous ' + str(exog)]
            feature_index += 1

    # For each of the exogenous inputs we include feature if feature selection indicates it
    for exog in range(1, n_exogenous_inputs + 1):
        
        # Definying the corresponding future time indexs using the auxiliary dataframses 
        futureIndexTrain = pd.to_datetime(indexTrain.loc[:, 'h' + str(hour)].values)
        futureIndexTest = pd.to_datetime(indexTest.loc[:, 'h' + str(hour)].values)

        # Including the exogenous input at day D and hour "h" 
        X_train[:, feature_index] = train_df.loc[futureIndexTrain, 'Exogenous ' + str(exog)]        
        X_test[:, feature_index] = test_df.loc[futureIndexTest, 'Exogenous ' + str(exog)] 
        feature_index += 1

In [124]:
# Aggiungiamo le variabily dummy

for dayofweek in range(7):
    X_train[indexTrain.index.dayofweek == dayofweek, feature_index] = 1
    X_test[indexTest.index.dayofweek == dayofweek, feature_index] = 1
    feature_index += 1

    # Estrazione dei valori predetti di y
    for hour in range(24):
        # Definizione degli indici di tempo all'ora h
        futureIndexTrain = pd.to_datetime(indexTrain.loc[:, 'h' + str(hour)].values)
        futureIndexTest = pd.to_datetime(indexTest.loc[:, 'h' + str(hour)].values)

        # Estrazione dei valori di Y basandoci sugli indici di tempo
        Y_train[:, hour] = train_df.loc[futureIndexTrain, 'Price']

In [134]:
model = LEAR(calibration_window=364*3)

model.recalibrate(X_train, Y_train)

In [135]:
model.predict(X_test)

ValueError: setting an array element with a sequence.