### Hourly forecasting of energy meter readings on BDG2 dataset

- historical data = 1 week (168 data points)
- forecast horizon = 1 day (24 data points)

**Loading TimesFM Model**

In [1]:
import os
import glob
import time
from datetime import datetime
import pandas as pd
import numpy as np
from collections import defaultdict

import timesfm

2024-10-23 11:45:18.010956: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.5/lib64


In [2]:
tfm = timesfm.TimesFm(
    context_len=512 ,
    horizon_len=24,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cpu'
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

2024-10-23 11:45:23.713827: W external/xla/xla/service/gpu/nvptx_compiler.cc:718] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.5.82). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 2.37 seconds.
Restoring checkpoint from /home/user/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.93 seconds.
Jitting decoding.
Jitted decoding in 16.43 seconds.


In [3]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      #examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      #examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      #examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df["ds"][start:(context_end := start + context_len)].tolist())
      examples["outputs_ts"].append(sub_df["ds"][context_end:(context_end + horizon_len)].tolist())

    #print(num_examples)
  
    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}
  
    return data_fn

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale

# Benchmark
batch_size = 32
context_len = 168
horizon_len = 24

def process_building(df):
   #  input_data = get_batched_data_fn(df, batch_size=32)
    input_data = get_batched_data_fn(df, batch_size=500)

    metrics = defaultdict(list)
    results_all = []
    for i, example in enumerate(input_data()):
        #print(datetime.now(), i)
        raw_forecast, _ = tfm.forecast(inputs=example["inputs"], freq=[0] * len(example["inputs"]))

        #print(f"Batch {i+1}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, Time: {end_time - start_time:.2f}s")
        for ts, y_true, y_pred in zip(example['outputs_ts'], example['outputs'], raw_forecast):
            res_df = pd.DataFrame({'ts': ts, 'y_true': y_true,'y_pred': y_pred})
            results_all.append(res_df)
        
    results_all_df = pd.concat(results_all)
    return results_all_df

def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    results_all = []
    i =0
    for building_name in df.columns:
        print(datetime.now(), building_name, flush=True)
        df1 = df[[building_name]]
        df1 = df1.reset_index()
        df1.columns = ['ds', 'y']

        df1['y'] = minmax_scale(df1['y'])

        res = process_building(df1)
        res['building'] = building_name
        results_all.append(res)
        i+=1
        # if i == 2:
        #    break
        #break
        
    results_all_df = pd.concat(results_all)
    return results_all_df

In [5]:
files_list = glob.glob('/home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/*.csv')

dataset = 'Enernoc'
os.makedirs(f'./forecasts/{dataset}/', exist_ok = True)
os.makedirs(f'./results/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
    if results is not None:
        results.to_csv(f'./forecasts/{dataset}/{os.path.basename(filename)}')
    print('')

2024-10-23 11:45:44.553854 /home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/enernoc.csv
2024-10-23 11:45:44.666521 (8785, 100)
2024-10-23 11:45:44.667583 767
2024-10-23 11:45:47.167285 304
2024-10-23 11:45:49.288766 399
2024-10-23 11:45:51.528211 21
2024-10-23 11:45:53.839712 805
2024-10-23 11:45:56.192247 14
2024-10-23 11:45:58.743033 404
2024-10-23 11:46:00.879432 78
2024-10-23 11:46:02.995921 731
2024-10-23 11:46:05.395198 218
2024-10-23 11:46:07.499130 366
2024-10-23 11:46:09.618496 766
2024-10-23 11:46:11.836444 197
2024-10-23 11:46:13.926797 30
2024-10-23 11:46:16.047038 742
2024-10-23 11:46:18.219180 32
2024-10-23 11:46:20.483827 137
2024-10-23 11:46:22.692523 36
2024-10-23 11:46:25.341102 9
2024-10-23 11:46:27.694469 808
2024-10-23 11:46:29.916217 391
2024-10-23 11:46:32.052220 213
2024-10-23 11:46:34.265287 236
2024-10-23 11:46:36.409738 6
2024-10-23 11:46:38.703607 224
2024-10-23 11:46:41.049299 45
2024-10-23 11:46:43.188265 771
2024-10-23 11:46:45.652714 492
2024

### Metrics

In [6]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import root_mean_squared_log_error
from permetrics.regression import RegressionMetric

dataset = 'Enernoc'
files_list = glob.glob(f'./forecasts/{dataset}/*.csv')

metrics_all_files = []

for filename in files_list:
    res = pd.read_csv(filename)
    metrics_all = []
    for (g, data) in res.groupby(['building']):
        data = data.dropna()
        data = data[data.y_pred >= 0]
        print(g)  
        rmse= root_mean_squared_error(data.y_true, data.y_pred)
        mae= mean_absolute_error(data.y_true, data.y_pred)
        mape = mean_absolute_percentage_error(data.y_true, data.y_pred)
        mse= mean_squared_error(data.y_true, data.y_pred)
        msle= mean_squared_log_error(data.y_true, data.y_pred)
        rmsle= root_mean_squared_log_error(data.y_true, data.y_pred)
        nrmse = rmse / (data.y_true.mean()) 

        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        nrmse_eve = evaluator.normalized_root_mean_square_error()
        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        smape= evaluator.symmetric_mean_absolute_percentage_error()
    
        metrics = pd.DataFrame({'building_name': [g[0]], 
                           'mae': [mae],
                            'mape': [mape],
                           'mse': [mse], 'rmse': [rmse], 'msle': [msle], 'rmsle': [rmsle], 'nrmse' : [nrmse],
                              'nrmse_eve':[nrmse_eve] , 'sMAPE' : [smape]})
        metrics_all.append(metrics)
    
    metrics_all_df = pd.concat(metrics_all)
    metrics_all_df.to_csv(f'./results/{dataset}/{os.path.basename(filename)}')

    metrics_all_df['filename'] = os.path.basename(filename)
    metrics_all_files.append(metrics_all_df)

metrics_all_files_df = pd.concat(metrics_all_files)

(6,)
(8,)
(9,)
(10,)
(12,)
(13,)
(14,)
(21,)
(22,)
(25,)
(29,)
(30,)
(31,)
(32,)
(36,)
(41,)
(42,)
(44,)
(45,)
(49,)
(51,)
(55,)
(56,)
(65,)
(78,)
(88,)
(92,)
(99,)
(100,)
(101,)
(103,)
(109,)
(111,)
(116,)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)


(136,)
(137,)
(144,)
(153,)
(186,)
(197,)
(213,)
(214,)
(217,)
(218,)
(224,)
(228,)
(236,)
(259,)
(270,)
(275,)
(281,)
(285,)
(304,)
(339,)
(341,)
(363,)
(366,)
(384,)
(386,)
(391,)
(399,)
(400,)
(401,)
(404,)
(427,)
(454,)
(455,)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)


(472,)
(474,)
(475,)
(478,)
(484,)
(492,)
(496,)
(512,)
(648,)
(654,)
(673,)
(674,)
(690,)
(697,)
(703,)
(716,)
(718,)
(731,)
(737,)
(742,)
(744,)
(745,)
(755,)
(761,)
(765,)
(766,)
(767,)
(771,)
(786,)
(805,)
(808,)
(832,)
(887,)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)


In [7]:
metrics_all_files_df.to_csv(f'./results/{dataset}/results_combined.csv')
metrics_all_files_df

Unnamed: 0,building_name,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE,filename
0,6,0.041153,1.332196e+11,0.003910,0.062529,0.001678,0.040966,0.126796,0.354790,0.043328,enernoc.csv
0,8,0.141545,4.876642e-01,0.053731,0.231800,0.026560,0.162973,0.583610,0.928373,0.187860,enernoc.csv
0,9,0.050874,8.606750e+12,0.007958,0.089209,0.003863,0.062157,0.243028,0.654456,1.000000,enernoc.csv
0,10,0.051640,9.886653e-02,0.007141,0.084503,0.002978,0.054568,0.135288,0.729407,0.045198,enernoc.csv
0,12,0.033557,6.433885e-02,0.002832,0.053219,0.001102,0.033195,0.104401,0.303780,0.032407,enernoc.csv
...,...,...,...,...,...,...,...,...,...,...,...
0,786,0.069508,4.757946e+11,0.023026,0.151745,0.010325,0.101611,0.179444,1.564703,0.057457,enernoc.csv
0,805,0.116549,4.760429e-01,0.029830,0.172712,0.015401,0.124102,0.358872,0.823130,0.162555,enernoc.csv
0,808,0.048525,2.799771e-01,0.007668,0.087564,0.004496,0.067052,0.486683,0.722066,0.129612,enernoc.csv
0,832,0.141646,4.883799e-01,0.053731,0.231798,0.026559,0.162969,0.583607,0.928938,0.188108,enernoc.csv


In [8]:
metrics_all_files_df.describe()*100

Unnamed: 0,building_name,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,33888.0,5.960321,114489200000000.0,1.277212,9.673424,0.629709,6.709898,27.014571,59.280052,17.199729
std,27637.111925,3.721503,416480400000000.0,1.533531,5.8729,0.776169,4.257873,21.309223,26.795223,27.367449
min,600.0,1.855307,2.856651,0.064497,2.539622,0.02301,1.516902,3.844135,19.553485,1.413555
25%,8550.0,3.2559,9.886627,0.27088,5.200351,0.10767,3.280622,10.193189,35.559004,3.149388
50%,27800.0,5.116327,31.3455,0.753259,8.679049,0.331928,5.761324,23.438839,61.193146,7.567079
75%,54600.0,7.571796,21792760000000.0,1.495021,12.227104,0.701947,8.378229,36.333989,72.947114,14.412672
max,88700.0,17.924922,2791901000000000.0,6.742792,25.966886,3.447485,18.567403,106.484527,156.470308,100.0
