In [None]:
!pip install nbeats_forecast

In [None]:
# import packages

import numpy as np
import pandas as pd 
import seaborn as se 
import matplotlib.pyplot as plt 
import datetime
import torch
from nbeats_forecast import NBeats
from torch import optim
from sklearn.metrics import mean_absolute_error, mean_squared_error
from RevIn import RevIN

In [None]:
# split dataset into train/test datasets
def train_test_split(data, n_test):
    return data[:n_test], data[n_test:]

# calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
# dataset loading
performance = {}
for i in range(50):
  data = pd.read_csv(f"/content/dataset50/{i}.csv",index_col = 0)

  # converting numeric timestamps to datetime
  data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
  
  # if all the values are same, then not normalizing
  if data.value.nunique() == 1:
    performance[f"{i}.csv"] = {"MAE":0, "MSE":0, "RMSE":0, "MAPE":0}
    continue
  
  # normalizing the data using RevIN
  layer = RevIN(len(data), affine=False)
  x = torch.tensor(data['value'].values)

  y = layer(x, mode='norm')

  # Train-test split
  size = int(len(data)*0.75) #75% training
  train,val = train_test_split(y,size)

  # model training
  model = NBeats(data = train.detach().numpy().reshape((-1,1)), period_to_forecast=len(val), backcast_length=len(val))
  model.fit(epoch=10,optimiser=optim.AdamW(model.parameters, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False), plot=True)

  prediction = model.predict(predict_data=val.detach().numpy().reshape((-1,1))) # predicting on the 25% test data

  # denormalizing the data to calculate evaluation metrics
  a = layer(torch.tensor(prediction), mode='denorm')
  b = layer(torch.tensor(val), mode='denorm')

  MAE = mean_absolute_error(b,a)
  MSE = mean_squared_error(b,a)
  RMSE = np.sqrt(mean_squared_error(b,a))
  MAPE = mean_absolute_percentage_error(b,a)

  # dictionary consisting perforance of forecast for each dataset
  performance[f"{i}.csv"] = {"MAE":round(MAE,2), "MSE":round(MSE,2), "RMSE":round(RMSE,2), "MAPE":round(MAPE,2)}

  # Forecast Plot and save in a directory
  x = data.timestamp.values
  y = data.value.values

  plt.figure(figsize=(20, 5), dpi=80)
  plt.plot(x,y,label="original")
  plt.plot(x[len(train):],a,label="prediction")
  plt.xlabel("Timestamp")
  plt.ylabel("Value")
  plt.legend(["original", "prediction"], loc ="lower right")
  plt.savefig(f'/content/plots/{i}.png')

# saving the performance of each dataset as csv
pd.DataFrame(performance).T.to_csv('performance.csv')