In [1]:
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchmetrics
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.11.4


# Imports

In [2]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from google.colab import files
from tqdm.auto import tqdm
from torchmetrics import MeanAbsolutePercentageError
from datetime import datetime  

# Classes + Helpers

## Data processing

In [19]:
def scale_data(load_df, 
               start_train_date,
               end_val_date,
               start_test_date,
               end_test_date):
  
  train_val_df = load_df[(load_df.index >= start_train_date) &
                        (load_df.index <= end_val_date)]
  test_df = load_df[(load_df.index >= start_test_date) &
                    (load_df.index <= end_test_date)]

  scaler = MinMaxScaler()
  train_val_scaled = scaler.fit_transform(train_val_df)
  train_val_df_scaled = pd.DataFrame(train_val_scaled,
                                    columns=train_val_df.columns,
                                    index=train_val_df.index)
  test_scaled = scaler.transform(test_df)
  test_df_scaled = pd.DataFrame(test_scaled,
                                columns=test_df.columns,
                                index=test_df.index)

  load_df_scaled = pd.concat([train_val_df_scaled, test_df_scaled], axis=0)

  return load_df_scaled, scaler

In [20]:
def reframing(X_df, Y_df, n_backwards=1, skip_steps_forward=0, n_forwards=1):

  feat_cols, feat_names = [], []
  # iterate through all columns
  for col_index, col_name in enumerate(X_df.columns):
    series = X_df[col_name].copy()
    # input sequence (t, t-1, ... ,t-(n_backwards+1) )
    for b in range(n_backwards):
      feat_cols.append(series.shift(b))
      feat_names.append(f'{col_name}_(t-{b})')
  
  # put it all together
  X = pd.concat(feat_cols, axis=1)
  X.columns = feat_names
  # drop rows with NaN values
  X.dropna(inplace=True)
  X_index = X.index
    
  # forecast sequence (t + SKIP_STEPS_FORWARD + 1, ... , t + n_forwards)
  series = Y_df
  target_cols, target_names = [], []
  for f in range(skip_steps_forward + 1, n_forwards): 
    target_cols.append(Y_df.shift(-f))
    if f == 0:
      target_names.append(f'{Y_df.name}_t')
    else:
      target_names.append(f'{Y_df.name}_(t+{f})')

  # put it all together
  Y = pd.concat(target_cols, axis=1)
  Y.columns = target_names
  # drop rows with NaN values
  Y.dropna(inplace=True)
  Y_index = Y.index
  
  return X, X_index, Y, Y_index


def reframe_data(load_df_scaled, 
                 target,
                 days_back,
                 last_step_forward,
                 last_step_back,
                 skip_steps_forward):

  time_weather_cols = load_df_scaled.drop(TARGET, axis=1).columns

  # shift future values
  for col in time_weather_cols:
    load_df_scaled[col + f'_(t+{LAST_STEP_FORWARD})'] = load_df_scaled[col].shift(-LAST_STEP_FORWARD)
    load_df_scaled.drop(col, axis=1, inplace=True)

  X_orig, X_index, Y_orig, Y_index = reframing(load_df_scaled, 
                                                load_df_scaled[TARGET], 
                                                n_backwards=LAST_STEP_BACK,
                                                skip_steps_forward=SKIP_STEPS_FORWARD,
                                                n_forwards=LAST_STEP_FORWARD+1)
  common_index = X_index.intersection(Y_index)
  X_df = X_orig.loc[common_index]
  Y_df = Y_orig.loc[common_index]

  load_df_scaled_reframed = pd.concat([X_df, Y_df], axis=1)

  return load_df_scaled_reframed

In [21]:
def create3Dinput(df, last_step_back):
  N, D = df.shape
  D = int(D/last_step_back)
  arr_3d = np.zeros((N, last_step_back, D))
  for i in range(D):
    arr_3d[:, :, i] = df.iloc[:, i*last_step_back:(i+1)*last_step_back].values
  print(arr_3d.shape)
  return arr_3d


def split_data(load_df_scaled_reframed,
               start_train_date,
               end_train_date,
               start_val_date,
               end_val_date,
               start_test_date,
               end_test_date,
               steps_forward,
               last_step_back):

  load_train_df_scaled_reframed = load_df_scaled_reframed[(load_df_scaled_reframed.index >= start_train_date) & 
                                                          (load_df_scaled_reframed.index <= end_train_date)]

  load_val_df_scaled_reframed = load_df_scaled_reframed[(load_df_scaled_reframed.index >= start_val_date) & 
                                                        (load_df_scaled_reframed.index <= end_val_date)]

  load_test_df_scaled_reframed = load_df_scaled_reframed[(load_df_scaled_reframed.index >= start_test_date) & 
                                                        (load_df_scaled_reframed.index <= end_test_date)]

  load_train_df_scaled_reframed = shuffle(load_train_df_scaled_reframed)    

  X_train_df = load_train_df_scaled_reframed.iloc[:, :-steps_forward]
  y_train_df = load_train_df_scaled_reframed.iloc[:, -steps_forward:]

  X_val_df = load_val_df_scaled_reframed.iloc[:, :-steps_forward]
  y_val_df = load_val_df_scaled_reframed.iloc[:, -steps_forward:]

  X_test_df = load_test_df_scaled_reframed.iloc[:, :-steps_forward]
  y_test_df = load_test_df_scaled_reframed.iloc[:, -steps_forward:]

  X_train_3D = create3Dinput(X_train_df, last_step_back)
  X_val_3D = create3Dinput(X_val_df, last_step_back)
  X_test_3D = create3Dinput(X_test_df, last_step_back)

  return X_train_3D, X_val_3D, X_test_3D, y_train_df, y_val_df, y_test_df

In [22]:
class LoadDataset(Dataset):
  def __init__(self, X_3D, y_df):
    self.X = torch.tensor(X_3D, dtype=torch.float32)
    self.y = torch.tensor(y_df.values, dtype=torch.float32)
  
  def __len__(self):
    return len(self.y)

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

## Transformer - Encoder

In [23]:
class MultiHeadAttention(nn.Module):
  def __init__(self, d_k, d_model, n_heads):
    super().__init__()

    # Assume d_v = d_k
    self.d_k = d_k
    self.n_heads = n_heads

    self.key = nn.Linear(d_model, d_k * n_heads)
    self.query = nn.Linear(d_model, d_k * n_heads)
    self.value = nn.Linear(d_model, d_k * n_heads)

    # final linear layer
    self.fc = nn.Linear(d_k * n_heads, d_model)

  def forward(self, q, k, v, mask=None):
    # h -> Number of attention heads
    q = self.query(q)   # [N, T, d_model] --> [N, T, h*d_k]
    k = self.key(k)     # [N, T, d_model] --> [N, T, h*d_k]
    v = self.value(v)   # [N, T, d_model] --> [N, T, h*d_v]

    N = q.shape[0]    # batch size
    T = q.shape[1]    # sequence length

    # make the following change in shape:
    # [N, T, h, d_k (or d_v)] --> [N, h, T, d_k (or d_v)]
    # in order for multiplication to work properly
    q = q.view(N, T, self.n_heads, self.d_k).transpose(1, 2)
    k = k.view(N, T, self.n_heads, self.d_k).transpose(1, 2)
    v = v.view(N, T, self.n_heads, self.d_k).transpose(1, 2)

    # compute attention weights
    # [N, h, T, d_k] x [N, h, d_k, T] --> [N, h, T, T]
    attention_scores = q @ k.transpose(-2, -1) / math.sqrt(self.d_k)
    if mask is not None:
      attention_scores = attention_scores.masked_fill(
          mask[:, None, None, :] == 0, float('-inf')
      )
    attention_weights = F.softmax(attention_scores, dim=-1)

    # compute attention-weighted values
    # [N, h, T, T] x [N, h, T, d_k] --> [N, h, T, d_k]
    A = attention_weights @ v

    # reshape it back before final linear layer
    A = A.transpose(1, 2)   # [N, T, h, d_k]
    A = A.contiguous().view(N, T, self.d_k * self.n_heads)    # [N, T, h*d_k]

    # projection
    return self.fc(A)

In [24]:
class TransformerBlock(nn.Module):
  def __init__(self, d_k, d_model, n_heads, dropout_prob=0.1):
    super().__init__()

    self.ln1 = nn.LayerNorm(d_model)
    self.ln2 = nn.LayerNorm(d_model)
    self.mha = MultiHeadAttention(d_k, d_model, n_heads)
    self.ann = nn.Sequential(
        nn.Linear(d_model, 4 * d_model),
        nn.GELU(),
        nn.Linear(4 * d_model, d_model),
        nn.GELU(),
        nn.Dropout(p=dropout_prob),
    )
    self.dropout = nn.Dropout(p=dropout_prob)

  def forward(self, x, mask=None):
    x = self.ln1(x + self.mha(x, x, x, mask))
    x = self.ln2(x + self.ann(x))
    x = self.dropout(x)
    return x

In [25]:
class PositionalEncoding(nn.Module):
  def __init__(self, d_model, max_len=1024, dropout_prob=0.1):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout_prob)

    position = torch.arange(max_len).unsqueeze(1)
    exp_term = torch.arange(0, d_model, 2)
    div_term = torch.exp(exp_term * (-math.log(10000.0) / d_model))
    pe = torch.zeros(1, max_len, d_model)
    pe[0, :, 0::2] = torch.sin(position * div_term)
    pe[0, :, 1::2] = torch.cos(position * div_term)
    self.register_buffer('pe', pe)

  def forward(self, x):
    # x --> [N, T, D]
    x = x + self.pe[:, :x.size(1), :]
    return self.dropout(x)

In [26]:
class Encoder(nn.Module):
  def __init__(self,
               D,
               max_len,
               d_k,
               d_model,
               n_heads,
               n_layers,
               output_units,
               dropout_prob):
    super().__init__()

    # self.conv1d = nn.Conv1d(in_channels=D,
    #                         out_channels=d_model,
    #                         kernel_size=1)
    self.linear = nn.Linear(D, d_model)
    self.relu = nn.ReLU()
    self.pos_encoding = PositionalEncoding(d_model,
                                           max_len,
                                           dropout_prob)
    transformer_blocks = [
        TransformerBlock(
            d_k,
            d_model,
            n_heads,
            dropout_prob) for _ in range(n_layers)]
    self.transformer_blocks = nn.Sequential(*transformer_blocks)
    self.ln = nn.LayerNorm(d_model)
    self.fc = nn.Linear(d_model, output_units)

  def forward(self, x, mask=None):
    # x = self.conv1d(x).transpose(1, 2)
    x = self.relu(self.linear(x))
    x = self.pos_encoding(x)
    for block in self.transformer_blocks:
      x = block(x, mask)
    
    # many-to-one (x --> [N, T, D])
    x = x[:, 0, :]

    x = self.ln(x)
    x = self.fc(x)
    return x

## Training and Evaluation

In [27]:
def loss_fn(y_preds, y_true, device):
  epsilon = 1.17e-06
  loss = torch.zeros(1, requires_grad=True).to(device)
  abs_diff = torch.abs(y_preds - y_true)
  abs_per_error = abs_diff / torch.clamp(torch.abs(y_true), min=epsilon)
  mape = torch.sum(abs_per_error) / y_true.numel()

  return mape   #+ 0.5 * torch.max(torch.tensor([[0., mape-2]])) * mape

def train_step(model,
               dataloader, 
               optimizer, 
               device):
  
  model.train()
  loss = 0
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    y_preds = model(X).squeeze().to(device)
    batch_loss = loss_fn(y_preds, y, device) 
    loss += batch_loss.item()
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()
  
  loss /= len(dataloader)
  return loss

def val_step(model, dataloader, device):
  model.eval()
  val_loss = 0
  with torch.inference_mode():
    for batch, (X, y) in enumerate(dataloader):
      X, y = X.to(device), y.to(device)
      val_preds = model(X).squeeze().to(device)

      y_unscaled = scaler.data_min_[TARGET_POS] + y * (scaler.data_max_[TARGET_POS] - scaler.data_min_[TARGET_POS])
      y_preds_unscaled = scaler.data_min_[TARGET_POS] + val_preds * (scaler.data_max_[TARGET_POS] - scaler.data_min_[TARGET_POS])

      batch_loss = loss_fn(y_preds_unscaled, y_unscaled, device)
      val_loss += batch_loss.item()
  
  val_loss /= len(dataloader)
  return val_loss

def train(model, 
          train_dataloader,
          val_dataloader,
          optimizer,
          scheduler,
          epochs,
          patience,
          device,
          path):
  
  results = {
      "loss": [],
      "val_loss": []
  }

  for epoch in tqdm(range(epochs)):
    flag = 0
    loss = train_step(model=model,
                      dataloader=train_dataloader,
                      optimizer=optimizer,
                      device=device)

    val_loss = val_step(model=model,
                        dataloader=val_dataloader,
                        device=device)
    scheduler.step(val_loss)
    
    results['loss'].append(loss)
    results['val_loss'].append(val_loss)
    if epoch == 0:
      best_val_loss = val_loss
      best_epoch = -1
      checkpoint(model, optimizer, path)
      flag = 1
      print(f"Epoch: {epoch+1}/{epochs} | Loss: {loss:.4f} | Val loss: {val_loss:.4f} - *Checkpoint*")
    else:
      if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        checkpoint(model, optimizer, path)
        flag = 1
        print(f"Epoch: {epoch+1}/{epochs} | Loss: {loss:.4f} | Val loss: {val_loss:.4f} - *Checkpoint*")
      elif epoch - best_epoch > patience:
        print(f"\nEarly stopping applied at epoch {epoch}.")
        break
    if flag == 0:
      print(f"Epoch: {epoch+1}/{epochs} | Loss: {loss:.4f} | Val loss: {val_loss:.4f}")
  
  return results

def checkpoint(model, optimizer, filepath):
  torch.save({
    "optimizer": optimizer.state_dict(),
    "model": model.state_dict()
  }, filepath)

# MAIN 1

In [None]:
N = 8
T = 168
D = 11

model = Encoder(max_len=1024, 
                D=11,
                d_k=16, 
                d_model=64, 
                n_heads=4, 
                n_layers=2, 
                output_units=24, 
                dropout_prob=0.1)

x = np.random.randn(N, T, D)
x_t = torch.tensor(x, dtype=torch.float32)
print(f"Initial tensor: Shape --> {x_t.size()}")

x_t_tr = x_t.transpose(1, 2)
print(f"Tensor transposed: Shape --> {x_t_tr.size()}")

conv1d = nn.Conv1d(in_channels=D,
                   out_channels=64,
                   kernel_size=1)

x_t_tr = conv1d(x_t_tr)
print(f"Tensor after Conv1d: Shape --> {x_t_tr.size()}")

Initial tensor: Shape --> torch.Size([8, 168, 11])
Tensor transposed: Shape --> torch.Size([8, 11, 168])
Tensor after Conv1d: Shape --> torch.Size([8, 64, 168])


In [None]:
N = 8
T = 168
D = 11
d_model = 64

model = Encoder(max_len=1024, 
                D=11,
                d_k=16, 
                d_model=64, 
                n_heads=4, 
                n_layers=2, 
                output_units=24, 
                dropout_prob=0.1)

x = np.random.randn(N, T, D)
x_t = torch.tensor(x, dtype=torch.float32)
print(f"Initial tensor: Shape --> {x_t.size()}")

linear = nn.Linear(D, d_model)
x_l = linear(x_t)

Initial tensor: Shape --> torch.Size([8, 168, 11])


In [None]:
x_l.size()

torch.Size([8, 168, 64])

# MAIN 2

In [28]:
DAYS_BACK = 3
DAYS_TO_SKIP = 10
STEPS_FORWARD = 24    # 1 day
SKIP_STEPS_FORWARD = 24 * DAYS_TO_SKIP
LAST_STEP_FORWARD = STEPS_FORWARD + SKIP_STEPS_FORWARD
LAST_STEP_BACK = 24 * DAYS_BACK

# keep 1 year for testing
START_TEST_DATE = pd.to_datetime('2018-01-01') - pd.to_timedelta(LAST_STEP_FORWARD, 'h')
END_TEST_DATE = START_TEST_DATE + pd.DateOffset(years=1)

END_VAL_DATE = START_TEST_DATE - pd.to_timedelta(1, 'h')
START_VAL_DATE = pd.to_datetime('2017-01-01') - pd.to_timedelta(LAST_STEP_FORWARD, 'h')

START_TRAIN_DATE = pd.to_datetime('2010-10-01')
END_TRAIN_DATE = START_VAL_DATE - pd.to_timedelta(1, 'h')

TARGET = "TOTAL_CONS"

BATCH_SIZE = 1024
EPOCHS = 1000
PATIENCE = 22
PATH = "model.pth"

print(f"Train from {START_TRAIN_DATE} to {END_TRAIN_DATE}")
print(f"Validation from {START_VAL_DATE} to {END_VAL_DATE}")
print(f"Test from {START_TEST_DATE} to {END_TEST_DATE}")

Train from 2010-10-01 00:00:00 to 2016-12-20 23:00:00
Validation from 2016-12-21 00:00:00 to 2017-12-20 23:00:00
Test from 2017-12-21 00:00:00 to 2018-12-21 00:00:00


In [29]:
load_df = pd.read_csv("/content/FINAL_DATASET_2.csv")
load_df.set_index(pd.to_datetime(load_df["Timestamp"]), inplace=True)
load_df.drop("Timestamp", axis=1, inplace=True)

TARGET_POS = np.where(load_df.columns == TARGET)[0][0]

load_df_scaled, scaler = scale_data(load_df,
                                    START_TRAIN_DATE,
                                    END_VAL_DATE,
                                    START_TEST_DATE,
                                    END_TEST_DATE)

load_df_scaled_reframed = reframe_data(load_df_scaled, 
                                       TARGET,
                                       DAYS_BACK,
                                       LAST_STEP_FORWARD,
                                       LAST_STEP_BACK,
                                       SKIP_STEPS_FORWARD)



In [30]:
X_train_3D, X_val_3D, X_test_3D, y_train_df, y_val_df, y_test_df = split_data(load_df_scaled_reframed,
                                                                              START_TRAIN_DATE,
                                                                              END_TRAIN_DATE,
                                                                              START_VAL_DATE,
                                                                              END_VAL_DATE,
                                                                              START_TEST_DATE,
                                                                              END_TEST_DATE,
                                                                              STEPS_FORWARD,
                                                                              LAST_STEP_BACK)

# X_train_3D = np.transpose(X_train_3D, (0, 2, 1))
# X_val_3D = np.transpose(X_val_3D, (0, 2, 1))
# X_test_3D = np.transpose(X_test_3D, (0, 2, 1))

train_dataset = LoadDataset(X_3D=X_train_3D, 
                            y_df=y_train_df)
train_dataloader = DataLoader(dataset=train_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=True)

val_dataset = LoadDataset(X_3D=X_val_3D,
                          y_df=y_val_df)
val_dataloader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE)

device = "cuda" if torch.cuda.is_available() else "cpu"

(54481, 72, 8)
(8760, 72, 8)
(8497, 72, 8)


In [31]:
model = Encoder(D=X_train_3D.shape[2],
                max_len=LAST_STEP_BACK, 
                d_k=16, 
                d_model=64, 
                n_heads=16, 
                n_layers=2, 
                output_units=24, 
                dropout_prob=0).to(device)
# loss_fn = CustomLoss(device=device)     # MeanAbsolutePercentageError().to(device)
optimizer = torch.optim.Adam(params=model.parameters())
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.33, patience=7, verbose=True)

In [32]:
!rm -rf "model.pth"
start_time = datetime.now()

model_results = train(model=model, 
                      train_dataloader=train_dataloader,
                      val_dataloader=val_dataloader,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      epochs=EPOCHS,
                      patience=PATIENCE,
                      device=device,
                      path=PATH)

total_time = datetime.now() - start_time
print(f"Total training time: {total_time.seconds}")

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch: 1/1000 | Loss: 0.4562 | Val loss: 0.1446 - *Checkpoint*
Epoch: 2/1000 | Loss: 0.3021 | Val loss: 0.1147 - *Checkpoint*
Epoch: 3/1000 | Loss: 0.2197 | Val loss: 0.0847 - *Checkpoint*
Epoch: 4/1000 | Loss: 0.1762 | Val loss: 0.0771 - *Checkpoint*
Epoch: 5/1000 | Loss: 0.1609 | Val loss: 0.0733 - *Checkpoint*
Epoch: 6/1000 | Loss: 0.1540 | Val loss: 0.0772
Epoch: 7/1000 | Loss: 0.1489 | Val loss: 0.0783
Epoch: 8/1000 | Loss: 0.1465 | Val loss: 0.0840
Epoch: 9/1000 | Loss: 0.1461 | Val loss: 0.0817
Epoch: 10/1000 | Loss: 0.1413 | Val loss: 0.0841
Epoch: 11/1000 | Loss: 0.1371 | Val loss: 0.0925
Epoch: 12/1000 | Loss: 0.1365 | Val loss: 0.0661 - *Checkpoint*
Epoch: 13/1000 | Loss: 0.1331 | Val loss: 0.0944
Epoch: 14/1000 | Loss: 0.1341 | Val loss: 0.0658 - *Checkpoint*
Epoch: 15/1000 | Loss: 0.1284 | Val loss: 0.0706
Epoch: 16/1000 | Loss: 0.1298 | Val loss: 0.0644 - *Checkpoint*
Epoch: 17/1000 | Loss: 0.1268 | Val loss: 0.0632 - *Checkpoint*
Epoch: 18/1000 | Loss: 0.1248 | Val loss:

In [33]:
checkpoint = torch.load("model.pth")
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.to("cpu")
model.eval()
with torch.inference_mode():
  test_preds_scaled = model(torch.tensor(X_test_3D, dtype=torch.float32))
test_preds_scaled = test_preds_scaled.to('cpu').squeeze().numpy()

test_preds_df_scaled = pd.DataFrame(test_preds_scaled, columns=np.arange(1, STEPS_FORWARD+1), index=y_test_df.index)
test_preds_df = pd.DataFrame(columns=np.arange(1, STEPS_FORWARD+1), index=test_preds_df_scaled.index)
for i, col in enumerate(test_preds_df_scaled.columns):
  test_preds_df[i+1] = scaler.data_min_[TARGET_POS] + test_preds_df_scaled[col].to_numpy() * (scaler.data_max_[TARGET_POS] - scaler.data_min_[TARGET_POS])

real_df = pd.DataFrame(columns=np.arange(1, STEPS_FORWARD+1), index=test_preds_df.index)
for i, col in enumerate(y_test_df.columns):
  real_df[i+1] = scaler.data_min_[TARGET_POS] + y_test_df[col].to_numpy() * (scaler.data_max_[TARGET_POS] - scaler.data_min_[TARGET_POS])

In [34]:
mape_list = list()
step_results_dict = {}
for step in range(1, STEPS_FORWARD + 1):
  step_index = real_df.index + pd.to_timedelta(SKIP_STEPS_FORWARD + step, 'h')
  step_results_df = pd.DataFrame(
      {
          "real": real_df.loc[:, step].to_numpy(),
          "predictions": test_preds_df.loc[:, step].to_numpy()
      },
      index=step_index
  )
  step_results_df['abs_error'] = abs(step_results_df['real'] - step_results_df['predictions'])
  step_results_df['ape'] = np.where(step_results_df['real'] == 0, np.NaN, 100 * step_results_df['abs_error']/step_results_df['real'])
  step_mape = step_results_df['ape'].mean()
  mape_list.append(step_mape)
  print(f"Step {step} -> MAPE = {step_mape}")

  step_results_dict[step] = step_results_df
mape = np.array(mape_list).mean()
print(f"\nMAPE = {mape}")

Step 1 -> MAPE = 6.766741693219503
Step 2 -> MAPE = 6.513291396075378
Step 3 -> MAPE = 6.820129854769707
Step 4 -> MAPE = 7.387675011771584
Step 5 -> MAPE = 6.833579693935906
Step 6 -> MAPE = 6.82063152209729
Step 7 -> MAPE = 7.236267655494694
Step 8 -> MAPE = 6.552955851597608
Step 9 -> MAPE = 7.376416312203048
Step 10 -> MAPE = 8.119207270024487
Step 11 -> MAPE = 7.961007202671249
Step 12 -> MAPE = 7.270224421597464
Step 13 -> MAPE = 7.119189813083537
Step 14 -> MAPE = 7.332779853735236
Step 15 -> MAPE = 7.56923777872741
Step 16 -> MAPE = 7.115979621419861
Step 17 -> MAPE = 7.064788854556921
Step 18 -> MAPE = 6.447440553542124
Step 19 -> MAPE = 5.904684651089301
Step 20 -> MAPE = 6.404453900214577
Step 21 -> MAPE = 6.899925463324453
Step 22 -> MAPE = 6.57716059747167
Step 23 -> MAPE = 7.354279246190304
Step 24 -> MAPE = 7.629932135621643

MAPE = 7.044915848101456
