# Basic Pipeline Example - VST Forecasting

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from nets import SimpleMLPRegressor, CNNRegressor
from lightgbm import LGBMRegressor
from utils import create_time_series_splits, read_df, mape

In [3]:
excluded_columns = ['Total Load', 'Most recent forecast', 'Most recent P10',
       'Most recent P90', 'Day-ahead 6PM forecast', 'Day-ahead 6PM P10',
       'Day-ahead 6PM P90', 'Week-ahead forecast', 'Year', 'Month', 'Day',
       'Hour', 'Minute', 'Season', 'Lockdown']

In [8]:
num_splits = 5
splits = create_time_series_splits(
    data=read_df('./preprocessed_data.csv'),
    train_size_days=7*4*6,
    test_size_days=7*4,
    num_splits=num_splits,
    window_size_steps=4*6,
    exclude_columns=excluded_columns,
    target_column='Total Load Interpolated',
    prediction_horizon_steps=4,
    shifting_steps=1,
    elia_column_to_return='Day-ahead 6PM forecast',
    alignment_times=['18:00']
)

100%|██████████| 16128/16128 [00:06<00:00, 2402.08it/s]
100%|██████████| 2688/2688 [00:00<00:00, 162852.65it/s]
100%|██████████| 16128/16128 [00:06<00:00, 2379.70it/s]
100%|██████████| 2688/2688 [00:00<00:00, 162299.39it/s]
100%|██████████| 16128/16128 [00:06<00:00, 2427.80it/s]
100%|██████████| 2688/2688 [00:00<00:00, 161965.97it/s]
100%|██████████| 16128/16128 [00:06<00:00, 2402.92it/s]
100%|██████████| 2688/2688 [00:00<00:00, 163713.43it/s]
100%|██████████| 16128/16128 [00:06<00:00, 2428.77it/s]
100%|██████████| 2688/2688 [00:00<00:00, 162878.53it/s]


In [9]:
models = [LinearRegression, DecisionTreeRegressor, MultiOutputRegressor, SimpleMLPRegressor, CNNRegressor]
params = {"LinearRegression": {},
          "DecisionTreeRegressor": {},
          "MultiOutputRegressor": {'estimator': LGBMRegressor(verbose= -1)},
          "SimpleMLPRegressor": {'device': 'mps', 'verbose': True},
          "CNNRegressor": {'device': 'mps', 'verbose': True}}

In [10]:
errors = {}
for m in models:
    errors[m.__name__] = []
    mapes = []
    mapes_ELIA = []
    for split_idx in tqdm(range(num_splits)):
        X_train = splits[split_idx]['X_train'].to_numpy()
        Y_train = splits[split_idx]['Y_train'].to_numpy()
        X_test = splits[split_idx]['X_test'].to_numpy()
        Y_test = splits[split_idx]['Y_test'].to_numpy()
        min_X_train, max_X_train = np.min(X_train), np.max(X_train)
        min_Y_train, max_Y_train = np.min(Y_train), np.max(Y_train)
        X_train, Y_train = (X_train-min_X_train)/(max_X_train-min_X_train), (Y_train-min_Y_train)/(max_Y_train-min_Y_train)
        X_test = (X_test-min_X_train)/(max_X_train-min_X_train)
        ELIA_train = splits[split_idx]['ELIA_train'].to_numpy()
        ELIA_test = splits[split_idx]['ELIA_test'].to_numpy()
        model = m(**params[m.__name__])
        model.fit(X_train, Y_train)
        Y_pred = (model.predict(X_test)*(max_Y_train-min_Y_train))+min_Y_train
        errors[m.__name__].append(mape(Y_pred, Y_test))
        mapes_ELIA.append(mape(ELIA_test, Y_test))
errors["ELIA"] = mapes_ELIA

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/200 [00:00<?, ?it/s]

In [11]:
results = pd.DataFrame(errors)
results.describe().loc[["mean", "std"]]

Unnamed: 0,LinearRegression,DecisionTreeRegressor,MultiOutputRegressor,SimpleMLPRegressor,CNNRegressor,ELIA
mean,1.252771,1.679625,1.250879,1.225581,1.447011,2.213328
std,0.175231,0.520987,0.139271,0.302114,0.329387,0.367589
