### Hourly forecasting of energy meter readings on BDG2 dataset

- historical data = 1 week (168 data points)
- forecast horizon = 1 day (24 data points)

**Loading TimesFM Model**

In [1]:
import os
import glob
import time
from datetime import datetime
import pandas as pd
import numpy as np
from collections import defaultdict

import timesfm

2024-11-13 11:37:11.925380: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.5/lib64


In [2]:
tfm = timesfm.TimesFm(
    context_len=512 ,
    horizon_len=24,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cpu'
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

2024-10-23 15:51:19.607356: W external/xla/xla/service/gpu/nvptx_compiler.cc:718] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.5.82). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 2.49 seconds.
Restoring checkpoint from /home/user/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.95 seconds.
Jitting decoding.
Jitted decoding in 18.41 seconds.


In [3]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      #examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      #examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      #examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df["ds"][start:(context_end := start + context_len)].tolist())
      examples["outputs_ts"].append(sub_df["ds"][context_end:(context_end + horizon_len)].tolist())

    #print(num_examples)
  
    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}
  
    return data_fn

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale

# Benchmark
batch_size = 32
context_len = 168
horizon_len = 24

def process_building(df):
   #  input_data = get_batched_data_fn(df, batch_size=32)
    input_data = get_batched_data_fn(df, batch_size=500)

    metrics = defaultdict(list)
    results_all = []
    for i, example in enumerate(input_data()):
        #print(datetime.now(), i)
        raw_forecast, _ = tfm.forecast(inputs=example["inputs"], freq=[0] * len(example["inputs"]))

        #print(f"Batch {i+1}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, Time: {end_time - start_time:.2f}s")
        for ts, y_true, y_pred in zip(example['outputs_ts'], example['outputs'], raw_forecast):
            res_df = pd.DataFrame({'ts': ts, 'y_true': y_true,'y_pred': y_pred})
            results_all.append(res_df)
        
    results_all_df = pd.concat(results_all)
    return results_all_df

def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    results_all = []
    i =0
    for building_name in df.columns:
        print(datetime.now(), building_name, flush=True)
        df1 = df[[building_name]]
        df1 = df1.reset_index()
        df1.columns = ['ds', 'y']

        df1['y'] = minmax_scale(df1['y'])

        res = process_building(df1)
        res['building'] = building_name
        results_all.append(res)
        i+=1
        # if i == 2:
        #    break
        #break
        
    results_all_df = pd.concat(results_all)
    return results_all_df

In [5]:
files_list = glob.glob('/home/user/New_Buildings_Datasets/Mathura_and_Bareilly/dataverse_files/processed/Bareilly/*csv')

dataset = 'Bareilly'
os.makedirs(f'./forecasts/{dataset}/', exist_ok = True)
os.makedirs(f'./results/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
    if results is not None:
        results.to_csv(f'./forecasts/{dataset}/{os.path.basename(filename)}')
    print('')

2024-10-23 15:51:42.555891 /home/user/New_Buildings_Datasets/Mathura_and_Bareilly/dataverse_files/processed/Bareilly/Bareilly_2021.csv
2024-10-23 15:51:42.582377 (7296, 38)
2024-10-23 15:51:42.583167 BR02
2024-10-23 15:51:44.815488 BR04
2024-10-23 15:51:46.912734 BR05
2024-10-23 15:51:49.055236 BR06
2024-10-23 15:51:51.196765 BR08
2024-10-23 15:51:53.298718 BR09
2024-10-23 15:51:55.380087 BR11
2024-10-23 15:51:57.804546 BR12
2024-10-23 15:52:00.810438 BR13
2024-10-23 15:52:02.963385 BR15
2024-10-23 15:52:05.094254 BR16
2024-10-23 15:52:07.257893 BR18
2024-10-23 15:52:09.349582 BR19
2024-10-23 15:52:11.422927 BR22
2024-10-23 15:52:13.567138 BR24
2024-10-23 15:52:15.678683 BR27
2024-10-23 15:52:17.759298 BR28
2024-10-23 15:52:19.849994 BR29
2024-10-23 15:52:23.329286 BR30
2024-10-23 15:52:25.401276 BR31
2024-10-23 15:52:27.432028 BR32
2024-10-23 15:52:29.812177 BR33
2024-10-23 15:52:31.953321 BR34
2024-10-23 15:52:34.123140 BR35
2024-10-23 15:52:36.286219 BR36
2024-10-23 15:52:38.427244 

### Metrics

In [3]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import root_mean_squared_log_error
from permetrics.regression import RegressionMetric

dataset = 'Bareilly'
files_list = glob.glob(f'./forecasts/{dataset}/*.csv')

metrics_all_files = []

for filename in files_list:
    res = pd.read_csv(filename)
    metrics_all = []
    for (g, data) in res.groupby(['building']):
        data = data.dropna()
        data = data[data.y_pred >= 0]
        print(g)  
        rmse= root_mean_squared_error(data.y_true, data.y_pred)
        mae= mean_absolute_error(data.y_true, data.y_pred)
        mape = mean_absolute_percentage_error(data.y_true, data.y_pred)
        mse= mean_squared_error(data.y_true, data.y_pred)
        msle= mean_squared_log_error(data.y_true, data.y_pred)
        rmsle= root_mean_squared_log_error(data.y_true, data.y_pred)
        nrmse = rmse / (data.y_true.mean()) 

        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        nrmse_eve = evaluator.normalized_root_mean_square_error()
        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        smape= evaluator.symmetric_mean_absolute_percentage_error()
    
        metrics = pd.DataFrame({'building_name': [g[0]], 
                           'mae': [mae],
                            'mape': [mape],
                           'mse': [mse], 'rmse': [rmse], 'msle': [msle], 'rmsle': [rmsle], 'nrmse' : [nrmse],
                              'nrmse_eve':[nrmse_eve] , 'sMAPE' : [smape]})
        metrics_all.append(metrics)
    
    metrics_all_df = pd.concat(metrics_all)
    metrics_all_df.to_csv(f'./results/{dataset}/{os.path.basename(filename)}')

    metrics_all_df['filename'] = os.path.basename(filename)
    metrics_all_files.append(metrics_all_df)

metrics_all_files_df = pd.concat(metrics_all_files)

('BR02',)
('BR04',)
('BR05',)
('BR06',)
('BR08',)
('BR09',)
('BR11',)
('BR12',)
('BR13',)
('BR15',)
('BR16',)
('BR18',)
('BR19',)
('BR22',)
('BR24',)
('BR27',)
('BR28',)
('BR29',)
('BR30',)
('BR31',)
('BR32',)
('BR33',)
('BR34',)
('BR35',)
('BR36',)
('BR37',)
('BR38',)
('BR39',)
('BR42',)
('BR43',)
('BR44',)
('BR45',)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred

('BR46',)
('BR48',)
('BR49',)
('BR50',)
('BR51',)
('BR52',)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)


('BR02',)
('BR03',)
('BR04',)
('BR05',)
('BR06',)
('BR07',)
('BR08',)
('BR09',)
('BR10',)
('BR11',)
('BR12',)
('BR13',)
('BR14',)
('BR15',)
('BR16',)
('BR17',)
('BR18',)
('BR19',)
('BR20',)
('BR22',)
('BR23',)
('BR24',)
('BR26',)
('BR27',)
('BR28',)
('BR29',)
('BR30',)
('BR31',)
('BR32',)
('BR33',)
('BR34',)
('BR35',)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred

('BR36',)
('BR37',)
('BR38',)
('BR39',)
('BR42',)
('BR43',)
('BR44',)
('BR45',)
('BR46',)
('BR48',)
('BR49',)
('BR50',)
('BR51',)
('BR52',)


  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)), axis=0)
  result = np.mean(np.abs(y_pred

In [7]:
metrics_all_files_df.to_csv(f'./results/{dataset}/results_combined.csv')
metrics_all_files_df

Unnamed: 0,building_name,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE,filename
0,BR02,0.037335,1.256903e+14,0.003569,0.059738,0.002662,0.051594,4.432255,1.703798,1.000000,Bareilly_2021.csv
0,BR04,0.078633,5.039031e+13,0.013392,0.115722,0.008481,0.092093,0.613257,1.047258,1.000000,Bareilly_2021.csv
0,BR05,0.068097,1.478606e+14,0.011467,0.107082,0.008107,0.090039,1.633945,0.931142,1.000000,Bareilly_2021.csv
0,BR06,0.089252,2.122870e+13,0.016847,0.129794,0.010139,0.100694,0.625958,1.284235,0.220528,Bareilly_2021.csv
0,BR08,0.035425,1.215799e+14,0.003145,0.056082,0.002190,0.046792,3.067773,1.100475,1.000000,Bareilly_2021.csv
...,...,...,...,...,...,...,...,...,...,...,...
0,BR48,0.057819,5.738910e+13,0.009802,0.099003,0.006024,0.077617,0.915366,0.817169,1.000000,Bareilly_2020.csv
0,BR49,0.059765,1.654043e+13,0.009715,0.098566,0.005991,0.077400,0.666762,0.751306,1.000000,Bareilly_2020.csv
0,BR50,0.079957,7.237399e+13,0.014764,0.121506,0.009794,0.098967,0.899994,1.103203,1.000000,Bareilly_2020.csv
0,BR51,0.117952,4.377427e+13,0.029983,0.173155,0.016222,0.127364,0.605140,0.908497,1.000000,Bareilly_2020.csv


In [8]:
metrics_all_files_df.describe()*100

  sqr = _ensure_numeric((avg - values) ** 2)


Unnamed: 0,mae,mape,mse,rmse,msle,rmsle,nrmse,nrmse_eve,sMAPE
count,8400.0,8400.0,8400.0,8400.0,8400.0,8400.0,8400.0,8400.0,8400.0
mean,6.762985,7205939000000000.0,1.311893,10.75863,0.805848,8.511681,inf,120.299168,90.952066
std,2.717005,4927470000000000.0,0.890419,3.953124,0.50882,2.869517,,69.190573,24.888392
min,2.735863,402604400000000.0,0.086108,2.934409,0.083436,2.888536,30.499864,60.708473,13.714342
25%,4.918905,2711880000000000.0,0.653107,8.081464,0.462895,6.800917,66.741042,85.042152,100.0
50%,6.332844,6791762000000000.0,1.016836,10.083812,0.656375,8.101686,83.465642,103.48939,100.0
75%,8.663483,1.06938e+16,1.732583,13.162731,1.084644,10.414609,135.507054,126.745198,100.0
max,13.742429,2.087746e+16,3.748475,19.360979,2.282204,15.106965,inf,449.732535,100.0
