In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import scipy
import sktime
from datetime import datetime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.utils.plotting import plot_series
import sklearn
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error, root_mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [30]:
excluded_columns = ['Total Load', 'Difference with previous load', 'Datetime', 'Resolution code', 'Total Load Persistence', 
                   'Most recent forecast', 'Most recent P10', 'Most recent P90', 'Day-ahead 6PM forecast',
                    'Day-ahead 6PM P10', 'Day-ahead 6PM P90', 'Week-ahead forecast', 'Total Load Interpolated Persistence',
                    'Difference with previous load interpolated']
def get_windows(y, cv, shift):
    train_windows = []
    test_windows = []
    for i, (train, test) in enumerate(cv.split(y["Total Load Interpolated"])):
        if not (i%shift):
            train_windows.append(y[y.columns[~y.columns.isin(excluded_columns)]].loc[train].to_numpy().flatten())
            test_windows.append(y.loc[test, ["Total Load Interpolated"]].to_numpy().flatten())
    return train_windows, test_windows

In [31]:
data = pd.read_csv("Processed_data.csv")[int(-24*4*365*1.5):].reset_index()
data = data.drop([data.columns[0], data.columns[1]], axis=1)

In [32]:
data.columns[~data.columns.isin(excluded_columns)]

Index(['Total Load Interpolated', 'Year', 'Month', 'Day', 'Hour', 'Minute',
       'Season', 'Lockdown'],
      dtype='object')

In [33]:
window_length = 12
horizon = 4
fh = ForecastingHorizon(list(range(1, horizon+1)))
cv = SlidingWindowSplitter(window_length=window_length, fh=fh)
n_splits = cv.get_n_splits(data)

In [34]:
x, y = get_windows(data, cv, window_length)

In [35]:
x[0].shape

(96,)

In [36]:
y[0]

array([10281.1 , 10283.35, 10316.97, 10106.46])

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, shuffle=False)
X_train, X_test, Y_train, Y_test = np.array(X_train), np.array(X_test), np.array(Y_train), np.array(Y_test)

In [38]:
def err(Y_test, y_hat, horizon):
    errors = pd.DataFrame()
    errors["RMSE"] = [root_mean_squared_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MAPE"] = [mean_absolute_percentage_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MAE"] = [mean_absolute_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MSE"] = [mean_squared_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    return errors.T

In [39]:
from sklearn.linear_model import LinearRegression
tree = LinearRegression(n_jobs=-1)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,69.004777,116.621411,166.681169,212.46818
MAPE,0.006466,0.011053,0.015525,0.019672
MAE,54.862614,93.387215,131.284057,166.378303
MSE,4761.659305,13600.553556,27782.612079,45142.727462


In [40]:
from sklearn.linear_model import Ridge
tree = Ridge(alpha=10000)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,68.962177,115.427064,162.833779,207.887748
MAPE,0.006463,0.010947,0.015201,0.019324
MAE,54.853652,92.609016,128.780128,163.808427
MSE,4755.78182,13323.407016,26514.839693,43217.315672


In [41]:
tree = RandomForestRegressor(n_jobs=-1)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,73.992294,112.220308,146.610078,176.878433
MAPE,0.006888,0.010544,0.013726,0.016505
MAE,58.369891,88.927378,116.160038,140.199541
MSE,5474.859581,12593.397554,21494.515089,31285.979986


from sklearn.linear_model import MultiTaskLasso
tree = MultiTaskLasso(max_iter=10000)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Take the vector of the errors, compute the mean and autocorellation (-> 0 and higher possible)

from sklearn.neural_network import MLPRegressor
tree = MLPRegressor(max_iter=10000)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)