In [51]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from models.create_dataset import create_dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# lasso model
from epftoolbox.models import LEAR
from sklearn.linear_model import Lasso
import logging
import os


In [52]:

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


In [53]:
# load data
target_col = 'DK1_price'
data = create_dataset(target_col=target_col)
# split into X and y
X = data.drop(target_col, axis=1)
# apparently all variables need to be named Exogenous 1, 2, 3, etc.
X.columns = ['Exogenous ' + str(i) for i in range(1, X.shape[1] + 1)]

y = data[target_col]
# rename to 'Price' for some reason for this toolbox to work
y = y.rename('Price')

['NO_Solar', 'NO_Wind Offshore', 'NO_Solar_lag_1', 'NO_Solar_lag_2', 'NO_Solar_lag_3', 'NO_Solar_lag_7', 'NO_Wind Offshore_lag_1', 'NO_Wind Offshore_lag_2', 'NO_Wind Offshore_lag_3', 'NO_Wind Offshore_lag_7']


In [58]:
# split into train and test
# set start time as first time in index where hour is 0

start_time = X[X.index.hour == 0].index[0]
test_cutoff = pd.to_datetime('2021-01-01 00:00')
X = X[X.index >= start_time]
y = y[y.index >= start_time]

# subtract one hour
X_train, X_test = X[X.index < test_cutoff], X[X.index >= test_cutoff]
y_train, y_test = y[y.index < test_cutoff], y[y.index >= test_cutoff]

In [59]:
# split into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=False)

In [60]:
start_time
#pd.to_datetime('2021-01-01 00:00')

Timestamp('2015-03-04 00:00:00')

In [61]:
# fit Lear model
calibration_window = 364 * 4  # 4 years
day_range = pd.date_range(start=test_cutoff, end=y.index[-1], freq='D')
preds = pd.DataFrame(index=y_test.index, columns=['pred'])

model = LEAR(calibration_window=calibration_window)

forecast = pd.DataFrame(index=X_test.index[::24], columns=['h' + str(k) for k in range(24)])
forecast_dates = forecast.index
# rename to 'Price' for some reason for this toolbox to work
target_col = 'Price'




for i, day in enumerate(day_range):
    if day.day == 1:
        logger.info(f'Predicting day {day} ({i+1}/{len(day_range)})')

    # get train data
    X_train = X[X.index < day].values
    y_train = y[y.index < day].values
    # reshape
    y_train = y_train.reshape(-1, 24)
    X_train = X_train.reshape(-1, 24)
    # add newaxis
    #y_train = y_train[:, np.newaxis]
    X_test = X[(X.index >= day) & (X.index < day + pd.Timedelta(days=1))].values
    #X_test = X_test.reshape(-1, 24)
    # to numpy
    #X_train = X_train.values
    # calibrate and predict
    pred = model.recalibrate_predict(X_train, y_train, X_test)
    preds.loc[X_test.index, 'pred'] = pred


2023-05-01 15:29:21,448 - __main__ - INFO - Predicting day 2021-01-01 00:00:00 (1/730)


ValueError: cannot reshape array of size 5009760 into shape (1,24)

In [None]:
for date in forecast_dates:
    print(date)
    # For simulation purposes, we assume that the available data is
    # the data up to current date where the prices of current date are not known
    #data_available = X.loc[:date + pd.Timedelta(hours=23), :]
    #data_available = pd.concat([df_train, df_test.loc[:date + pd.Timedelta(hours=23), :]], axis=0)


    # We set the real prices for current date to NaN in the dataframe of available data
    # data_available.loc[date:date + pd.Timedelta(hours=23), target_col] = np.NaN
    X_train = X[X.index < date]
    y_train = y[y.index < date]
    X_test = X[(X.index >= date) & (X.index < date + pd.Timedelta(days=1))]
    # Recalibrating the model with the most up-to-date available data and making a prediction
    # for the next day
    # Yp = model.recalibrate_and_forecast_next_day(df=data_available, next_day_date=date,
    #                                              calibration_window=calibration_window)
    # Saving the current prediction
    Yp = model.recalibrate_predict(X_train, y_train, X_test)
    forecast.loc[date, :] = Yp

    # Computing metrics up-to-current-date
    #mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))
    #smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100

    # Pringint information
    #print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}'.format(str(date)[:10], smape, mae))

    # Saving forecast
    #forecast.to_csv(forecast_file_path)