# NBEATS

### Loading Libraries

In [1]:
%cd ../..

/Users/joaquinromero/Desktop


In [3]:
# # Numerical Computing
# import numpy as np

# # Data Manipulation
# import pandas as pd

# # OS 
# import os
# import shutil
# import joblib

# # Data Visualization
# import plotly.io as pio
# import plotly.express as px
# import plotly.graph_objects as go

# # Path
# from pathlib import Path
# from tqdm.autonotebook import tqdm

# # IPython & Itertools
# from itertools import cycle
# from IPython.display import display, HTML

# # Stats Forecast
# from statsforecast import StatsForecast

# # NeuralForecast
# from neuralforecast import NeuralForecast
# from neuralforecast.models import NBEATS
# from neuralforecast.auto import AutoNBEATS
# from neuralforecast.losses.pytorch import MQLoss

In [2]:
# First, install the missing package
!pip install neuralforecast

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# OS 
import os
import shutil
import joblib

# Data Visualization
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go

# Path
from pathlib import Path
from tqdm.autonotebook import tqdm

# IPython & Itertools
from itertools import cycle
from IPython.display import display, HTML

# Stats Forecast
from statsforecast import StatsForecast

# NeuralForecast - will work after installation
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.auto import AutoNBEATS
from neuralforecast.losses.pytorch import MQLoss

Collecting neuralforecast
  Using cached neuralforecast-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting torch<=2.6.0,>=2.0.0 (from neuralforecast)
  Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting pytorch-lightning>=2.0.0 (from neuralforecast)
  Downloading pytorch_lightning-2.5.4-py3-none-any.whl.metadata (20 kB)
Collecting ray>=2.2.0 (from ray[tune]>=2.2.0->neuralforecast)
  Downloading ray-2.49.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (21 kB)
Collecting optuna (from neuralforecast)
  Using cached optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting torchmetrics>0.7.0 (from pytorch-lightning>=2.0.0->neuralforecast)
  Downloading torchmetrics-1.8.1-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning>=2.0.0->neuralforecast)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Collecting tensorboardX>=1.9 (from ray[tune]>=2.2.0->neuralforecast)
  Using cached tensorboardx

In [4]:
from utilsforecast.evaluation import evaluate

In [5]:
# %load_ext autoreload

# %autoreload 2

In [6]:
tqdm.pandas()

np.random.seed(42)

pio.templates.default = "plotly_white"

In [7]:
os.makedirs("imgs/chapter_16", exist_ok=True)

preprocessed = Path.home() / "Desktop" / "data" / "london_smart_meters" / "preprocessed"

output = Path.home() / "Desktop" / "data" / "london_smart_meters" / "output"

In [8]:
TRAIN_SUBSAMPLE = False  # Trains a subsample of IDs to improve run speed

RETUNE = True  # if false, will use pre-trained hyperparameters when generating the AUTO NeuralForecast

In [9]:
try:
    #Reading the missing value imputed and train test split data
    train_df = pd.read_parquet(preprocessed/"selected_blocks_train_missing_imputed_feature_engg.parquet")
    # Read in the Validation dataset as test_df so that we predict on it
    test_df = pd.read_parquet(preprocessed/"selected_blocks_val_missing_imputed_feature_engg.parquet")
    # test_df = pd.read_parquet(preprocessed/"selected_blocks_test_missing_imputed_feature_engg.parquet")
except FileNotFoundError:
    display(HTML("""
    <div class="alert alert-block alert-warning">
    <b>Warning!</b> File not found. Please make sure you have run 01-Feature Engineering.ipynb in Chapter06
    </div>
    """))

In [None]:
train_df.head(2)

In [None]:
print("Total # of IDs Pre-Sampling: ", len(train_df.LCLid.unique()))

In [None]:
StatsForecast.plot(train_df,
                   engine='matplotlib',
                   id_col='LCLid',
                   time_col= 'timestamp',
                   target_col='energy_consumption')

In [None]:
# To run on smaller set of data for daster iteration.
if TRAIN_SUBSAMPLE:
    print("sub sampling")
    SAMPLE = 10
    sampled_LCLids = pd.Series(train_df.LCLid.unique().remove_unused_categories().categories).sample(SAMPLE, random_state=99).tolist()
    train_df = train_df.loc[train_df.LCLid.isin(sampled_LCLids)]
    test_df = test_df.loc[test_df.LCLid.isin(sampled_LCLids)]

In [None]:
print("Total # of IDs Post Sampling: ", len(train_df.LCLid.unique()))

### Train, Validation, Test Set

In [None]:
print("Training Min Date: ", train_df.timestamp.min(), 
      "\nTraining Max Date: ", train_df.timestamp.max(), 
      "\nTesting Min Date: ", test_df.timestamp.min(),
      "\nTesting Max Date: ", test_df.timestamp.max()
)

In [None]:
#Keeping 1 days aside as a validation set
cutoff = train_df.timestamp.max() - pd.Timedelta(1, "D")

validation_df = train_df[(train_df.timestamp>cutoff)].reset_index(drop=True) # validation prediction set
training_df = train_df[(train_df.timestamp<=cutoff)].reset_index(drop=True) # training set used for validation set

print(f"Train Max: {training_df.timestamp.max()} \nValidation Min: {validation_df.timestamp.min()} \nValidation Max: {validation_df.timestamp.max()}")
print(f"Validation Horizon: {len(validation_df.timestamp.unique())}")

In [None]:
h = 48

max_steps = 100

### Training NBEATS Model

In [None]:
model_untuned = [NBEATS(h=h,  input_size = 48*7,              
                max_steps=max_steps)]

model_untuned = NeuralForecast(models=model_untuned, freq='30min')
model_untuned.fit(training_df[['LCLid','timestamp','energy_consumption']],
                  id_col = 'LCLid',
                  time_col = 'timestamp',
                  target_col='energy_consumption')

In [None]:
# Get Predictions for validation
pred_df =  model_untuned.predict(futr_df=validation_df[['LCLid','timestamp','energy_consumption']]).reset_index()
pred_df = pred_df.merge(validation_df[['LCLid','timestamp','energy_consumption']], on=['LCLid','timestamp'], how='left')
pred_df.head()

In [None]:
# Visualize Results
StatsForecast.plot(validation_df[['LCLid','timestamp','energy_consumption']], 
                   pred_df, engine='matplotlib', 
                   id_col='LCLid',
                   time_col= 'timestamp', 
                   target_col='energy_consumption',
                   models=['NBEATS'])

In [None]:
#### Evaluating NBEATS forecast