# Import Libraries

In [1]:
import sys, os

# Add path to import
sys.path.append(os.path.abspath(".."))

In [2]:
from src.forecasting.utils.libraries_data_handling import np, pd
from src.forecasting.utils.libraries_others import json, time
from src.forecasting.constants.enums import ColumnGroup, PeriodList
from src.forecasting.constants.columns import col_decode, col_encode
from src.forecasting.utils.data_split import  dataframe_train_test_split, timeseries_train_test_split
from src.forecasting.utils.libraries_modelling import torch, concatenate, TimeSeries, Scaler, NBEATSModel, Callback, EarlyStopping, ModelCheckpoint, GaussianLikelihood, MeanAbsolutePercentageError, mean_absolute_percentage_error
from src.forecasting.utils.extract_checkpoint_result import extract_best_model_checkpoint
from src.forecasting.models.nbeats_build_w_optuna import nbeats_build
from src.forecasting.models.evaluate_cv_timeseries import evaluate_cv_timeseries

The StatsForecast module could not be imported. To enable support for the AutoARIMA, AutoETS and Croston models, please consider installing it.
The `XGBoost` module could not be imported. To enable XGBoost support in Darts, follow the detailed instructions in the installation guide: https://github.com/unit8co/darts/blob/master/INSTALL.md
The `XGBoost` module could not be imported. To enable XGBoost support in Darts, follow the detailed instructions in the installation guide: https://github.com/unit8co/darts/blob/master/INSTALL.md
  from .autonotebook import tqdm as notebook_tqdm


# Load Processed Dataset

In [3]:
# Load data
df_past     = pd.read_csv('../data/processed/past_covariates_nonoutliers_with_pre_normalization.csv')
df_category = pd.read_csv('../data/processed/future_covariates_one_hot_encoding.csv')

# Data Proprocessing

In [4]:
# Initialize internal precision of matrix multiplication
if torch.cuda.is_available():
    torch.set_float32_matmul_precision('high')

# Make dir to store results
os.makedirs('../models/best_model/', exist_ok=True)

# Setting number after coma to max 5 digits
np.set_printoptions(suppress=True, precision=5)



## Data Cleaning

In [5]:
# Convert timestamp to datatime
df_past['t'] = pd.to_datetime(df_past['t'], format='%Y-%m-%d %H:%M:%S')

# Set index
df_past = df_past.set_index('t').asfreq('h')

# Convert timestamp to datatime
df_category['t'] = pd.to_datetime(df_category['t'], format='%Y-%m-%d %H:%M:%S')

# Set index
df_category = df_category.set_index('t').asfreq('h')

# Cut categorical data end time to match with df_past
df_category = df_category.iloc[:len(df_past)]

## Feature Selection

In [6]:
# Load correlation results
results_r = pd.read_csv('../data/processed/correlation_scores.csv')

# Preparing feature selection input
X_num = df_past[df_past.columns[ColumnGroup.TARGET:]]

# Take very low correlation level (0.00 - 0.199) to drop
X_num_drop = results_r[results_r['Correlation'] <= 0.2]['Feature'].to_list()

# Encode drop colomns name
X_num_drop = [col_encode[feature] for feature in X_num_drop]

# Drop columns
X_num = X_num.drop(columns=X_num_drop)

## Data Split

In [7]:
# Split dataset into Y and X
Y = df_past[df_past.columns[:ColumnGroup.TARGET]].astype('float32')
X = pd.concat([X_num, df_category], axis=1).astype('float32')

# Split to data train 80% and test 20%
Y_train, Y_test = dataframe_train_test_split(Y, test_size=0.1)
X_train, X_test = dataframe_train_test_split(X, test_size=0.1)

# Change to TimeSeries Dataset
Y_train = TimeSeries.from_dataframe(Y_train, value_cols=Y_train.columns.tolist(), freq='h').astype('float32')
X_train = TimeSeries.from_dataframe(X_train, value_cols=X_train.columns.tolist(), freq='h').astype('float32')
Y_test  = TimeSeries.from_dataframe(Y_test, value_cols=Y_test.columns.tolist(), freq='h').astype('float32')
X_test  = TimeSeries.from_dataframe(X_test, value_cols=X_test.columns.tolist(), freq='h').astype('float32')

# Change unsplitted feature for inference
Y_series = TimeSeries.from_dataframe(Y, value_cols=Y.columns.tolist(), freq='h').astype('float32')
X_series = TimeSeries.from_dataframe(X, value_cols=X.columns.tolist(), freq='h').astype('float32')

## Normalization

In [8]:
# Preparing the Scalers
Y_scaler = Scaler()
X_scaler = Scaler()

# Normalize data
Y_train_transformed  = Y_scaler.fit_transform(Y_train).astype('float32')
X_train_transformed  = X_scaler.fit_transform(X_train).astype('float32')

# Normalize data for inference
Y_series_transformed = Y_scaler.fit_transform(Y_series).astype('float32')
X_series_transformed = X_scaler.fit_transform(X_series).astype('float32')

# Load Best N-BEATS Model

In [9]:
# Load N-BEATS best params
nbeats_best_params = pd.read_excel('../reports/nbeats_params_results.xlsx')
nbeats_best_params = nbeats_best_params.sort_values('MAPE_sum', ascending=True).iloc[0]
nbeats_best_params

timestamp                                     2025-12-02 10:57:07.202000
MAPE_sum                                                          0.9067
MAPE_y1                                                         0.121049
MAPE_y2                                                         0.136292
MAPE_y3                                                         0.198467
MAPE_y4                                                         0.110936
MAPE_y5                                                         0.125953
MAPE_y6                                                         0.214003
val_MAPE                                                        195.4359
val_loss                                                          2.3256
status                                                           SUCCESS
model_name             optuna_nbeats_ic312_oc12_bs64_st11_bl3_ly4_wd5...
GPU                                                                 True
ram_usage_MB                                       

In [11]:
# Initialize model name and work directory
model_name = 'nbeats_best_model'
work_dir   = '../models/best_model'

start_time = time.time()
# Retrain model
nbeats_model = nbeats_build(
    Y                   = Y_train_transformed,
    X                   = X_train_transformed,
    input_chunk_length  = int(nbeats_best_params.input_chunk_length),
    output_chunk_length = int(nbeats_best_params.output_chunk_length),
    n_epochs            = int(nbeats_best_params.n_epochs),
    batch_size          = int(nbeats_best_params.batch_size),
    num_stacks          = int(nbeats_best_params.num_stacks),
    num_blocks          = int(nbeats_best_params.num_blocks),
    num_layers          = int(nbeats_best_params.num_layers),
    layer_widths        = int(nbeats_best_params.layer_widths),
    dropout             = float(nbeats_best_params.dropout),
    include_encoders    = False if pd.isna(nbeats_best_params.add_encoders) else True,
    validation_split    = float(nbeats_best_params.validation_split),
    model_name          = model_name,
    work_dir            = work_dir,
    include_stopper     = False,
    custom_checkpoint   = False,
    lr                  = float(nbeats_best_params.lr)
)
cost_time = time.time() - start_time
print(f'\n✅ N-BEATS Fit cost: {cost_time:.2f} seconds')

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 159 M  | train
-------------------------------------------------------------
159 M     Trainable params
49.4 K    Non-trainable params
159 M     Total params
638.665   Total estimated model params size (MB)
492       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 282/282 [05:17<00:00,  0.89it/s, v_num=logs, train_loss=-0.37]   
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/67 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/67 [00:00<?, ?it/s][A
Validation DataLoader 0:   1%|▏         | 1/67 [00:00<00:06, 10.50it/s][A
Validation DataLoader 0:   3%|▎         | 2/67 [00:00<00:06, 10.52it/s][A
Validation DataLoader 0:   4%|▍         | 3/67 [00:00<00:06, 10.56it/s][A
Validation DataLoader 0:   6%|▌         | 4/67 [00:00<00:05, 10.58it/s][A
Validation DataLoader 0:   7%|▋         | 5/67 [00:00<00:05, 10.60it/s][A
Validation DataLoader 0:   9%|▉         | 6/67 [00:00<00:05, 10.56it/s][A
Validation DataLoader 0:  10%|█         | 7/67 [00:00<00:05, 10.58it/s][A
Validation DataLoader 0:  12%|█▏        | 8/67 [00:00<00:05, 10.59it/s][A
Validation DataLoader 0:  13%|█▎        | 9/67 [00:00<00:05, 10.59it/s][A
Validation DataLoader 0:  15%|█▍        | 10/67 [00:00<00:05, 1

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 282/282 [05:36<00:00,  0.84it/s, v_num=logs, train_loss=-1.37, val_loss=1.790, val_MeanAbsolutePercentageError=224.0, train_MeanAbsolutePercentageError=189.0]

📂 Files in checkpoint dir: ['last-epoch=19.ckpt', 'best-epoch=1-val_loss=-0.33.ckpt']
✅ Model loaded from default checkpoint

✅ N-BEATS Fit cost: 6703.68 seconds


In [13]:
# Cross Validation with Rolling Forecast
cv_test = nbeats_model.historical_forecasts(
    series           = Y_train_transformed,
    past_covariates  = X_train_transformed,
    start            = Y_train_transformed.start_time(),
    forecast_horizon = nbeats_best_params.output_chunk_length,
    stride           = nbeats_best_params.output_chunk_length,
    retrain          = False,
    last_points_only = False,
)

# Evaluate
mape_cv = evaluate_cv_timeseries(
    forecasts  = cv_test,
    scaler     = Y_scaler,
    df_actual  = Y,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [14]:
# Save MAPE results
MAPE_sum     = sum(mape_cv.values())
mape_results = {**{f'MAPE_{k}': v for k, v in mape_cv.items()}}

print(f'\n💹 MAPE_sum : {MAPE_sum}')
print(f'🧠 MAPE CV: {mape_cv}\n')


💹 MAPE_sum : 2.3240526616573334
🧠 MAPE CV: {'y1': 0.31207942962646484, 'y2': 0.3415803909301758, 'y3': 0.5533257722854614, 'y4': 0.3243105709552765, 'y5': 0.2839317321777344, 'y6': 0.5088247656822205}



In [4]:
import torch
print(torch.cuda.is_available())

True


In [5]:
!nvidia-smi

Mon Dec  8 07:04:28 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...    Off |   00000000:01:00.0 Off |                  N/A |
|  0%   47C    P8              9W /  220W |       4MiB /  12282MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                