In [1]:
cd lag-llama

/home/user/energygpt/lagllama/lag-llama


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import os
import glob
from collections import defaultdict
from datetime import datetime
from itertools import islice

from matplotlib import pyplot as plt
import matplotlib.dates as mdates

import torch
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset

from gluonts.dataset.pandas import PandasDataset
import pandas as pd

from lag_llama.gluon.estimator import LagLlamaEstimator

import warnings  
warnings.filterwarnings('ignore') 


In [3]:
def get_lag_llama_predictions(dataset, prediction_length, device, context_length=168, use_rope_scaling=False, num_samples=100):
    ckpt = torch.load("./checkpoints/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.
    estimator_args = ckpt["hyper_parameters"]["model_kwargs"]

    rope_scaling_arguments = {
        "type": "linear",
        "factor": max(1.0, (context_length + prediction_length) / estimator_args["context_length"]),
    }

    estimator = LagLlamaEstimator(
        ckpt_path="./checkpoints/lag-llama.ckpt",
        prediction_length=prediction_length,
        context_length=context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length

        # estimator args
        input_size=estimator_args["input_size"],
        n_layer=estimator_args["n_layer"],
        n_embd_per_head=estimator_args["n_embd_per_head"],
        n_head=estimator_args["n_head"],
        scaling=estimator_args["scaling"],
        time_feat=estimator_args["time_feat"],
        rope_scaling=rope_scaling_arguments if use_rope_scaling else None,

        batch_size=32,
        num_parallel_samples=100,
        device=device,
    )

    lightning_module = estimator.create_lightning_module()
    transformation = estimator.create_transformation()
    predictor = estimator.create_predictor(transformation, lightning_module)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=num_samples
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)

    return forecasts, tss

In [None]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      #examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      #examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      #examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df.index[start:(context_end := start + context_len)])
      examples["outputs_ts"].append(sub_df.index[context_end:(context_end + horizon_len)])

    return examples

In [None]:
def forecast_building(df):

    # Set numerical columns as float32
    for col in df.columns:
        # Check if column is not of string type
        if df[col].dtype != 'object' and pd.api.types.is_string_dtype(df[col]) == False:
            df[col] = df[col].astype('float32')
    
    # Create the Pandas
    dataset = PandasDataset.from_long_dataframe(df, target="target", item_id="item_id")
    
    backtest_dataset = dataset
    prediction_length = 24  # Define your prediction length. We use 24 here since the data is of hourly frequency
    num_samples = 10 # number of samples sampled from the probability distribution for each timestep
    device = torch.device("cuda:0") # You can switch this to CPU or other GPUs if you'd like, depending on your environment    
    
    ckpt = torch.load("./checkpoints/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.

    forecasts, tss = get_lag_llama_predictions(backtest_dataset, prediction_length, device, num_samples=num_samples)

    evaluator = Evaluator()
    agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))     
    
    res_all = []
    for ts, fc in zip(tss, forecasts):
        res = ts[ts.index.isin(fc.index)]
        res.columns = ['y_true']
        res.insert(1, 'y_pred', fc.median)
        res_all.append(res)
    res_all_df = pd.concat(res_all).sort_index()
    return res_all_df, agg_metrics, ts_metrics 

    
def process_building(df): 
    building_name = df.columns[0]
    df.columns = ['y']
    input_data = get_batched_data_fn(df, batch_size=500)
    
    windows_all = []
    counter = 1
    for inputs_ts, inputs, outputs_ts, outputs in zip(input_data['inputs_ts'], 
                                                      input_data['inputs'], 
                                                      input_data['outputs_ts'], 
                                                      input_data['outputs']):
        
        input_df = pd.DataFrame({'timestamp': inputs_ts, 
                                 'target': inputs})
        
        output_df = pd.DataFrame({'timestamp': outputs_ts, 
                                 'target': outputs})
        combined = pd.concat([input_df, output_df], axis=0)
        combined['item_id'] = str(building_name) + '_' + str(counter)
        combined['item_id_no'] = counter
        counter += 1
        windows_all.append(combined)
        
    windows_all_df = pd.concat(windows_all)
    windows_all_df.timestamp = pd.to_datetime(windows_all_df.timestamp)
    windows_all_df.set_index('timestamp', inplace=True)

    res, agg_metrics, ts_metrics = forecast_building(windows_all_df)
    return res, agg_metrics, ts_metrics


# Benchmark
batch_size = 32
context_len = 168
horizon_len = 24

def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    res_all = []
    agg_metrics_all = []
    ts_metrics_all = []
    
    i = 0
    for building_name in df.columns:
        print(datetime.now(), i, '/', len(df.columns), building_name, flush=True)
        df1 = df[[building_name]]#.head(24*200)

        res, agg_metrics, ts_metrics = process_building(df1)
        res['building'] = building_name
        res['filename'] = filename
        res_all.append(res)
        
        ts_metrics.insert(0, 'building', building_name)
        ts_metrics.insert(0, 'filename', filename)
        ts_metrics = ts_metrics.sort_values(['forecast_start'])
        ts_metrics_all.append(ts_metrics)
        
        agg_metrics_df = pd.DataFrame([agg_metrics])
        agg_metrics_df.insert(0, 'building', building_name)
        agg_metrics_df.insert(0, 'filename', filename)
        agg_metrics_all.append(agg_metrics_df)

        i += 1
        if i % 10 == 0:
            print(datetime.now(), 'Saving...')
            res_all_df = pd.concat(res_all).round(6)
            res_all_df = res_all_df.reset_index()
            res_all_df = res_all_df.rename(columns={res_all_df.columns[0]: "timestamp" })
            res_all_df.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)            

            ts_metrics_all_df = pd.concat(ts_metrics_all).round(6)
            ts_metrics_all_df.to_csv(f'../results/{dataset}/ts_metrics_{os.path.basename(filename)}', index=False)            

            agg_metrics_all_df = pd.concat(agg_metrics_all).round(6)            
            agg_metrics_all_df.to_csv(f'../results/{dataset}/agg_metrics_{os.path.basename(filename)}', index=False)            
    
    
    res_all_df = pd.concat(res_all).round(6)
    res_all_df = res_all_df.reset_index()
    res_all_df = res_all_df.rename(columns={res_all_df.columns[0]: "timestamp" })
    res_all_df.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)            

    ts_metrics_all_df = pd.concat(ts_metrics_all).round(6)    
    ts_metrics_all_df.to_csv(f'../results/{dataset}/ts_metrics_{os.path.basename(filename)}', index=False)            

    agg_metrics_all_df = pd.concat(agg_metrics_all).round(6)   
    agg_metrics_all_df.to_csv(f'../results/{dataset}/agg_metrics_{os.path.basename(filename)}', index=False)                

    return res_all_df, ts_metrics_all_df, agg_metrics_all_df

In [18]:
files_list = glob.glob('/home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/*.csv')

dataset = 'Enernoc'
os.makedirs(f'../forecasts/{dataset}/', exist_ok = True)
os.makedirs(f'../results/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
    # if results is not None:
    #     results.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)
    print('')

2024-10-26 09:14:41.001636 /home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/enernoc.csv
2024-10-26 09:14:41.120945 (8785, 100)
2024-10-26 09:14:41.121935 0 / 100 767


Running evaluation: 359it [00:00, 7185.49it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error


2024-10-26 09:15:20.119480 1 / 100 304


Running evaluation: 359it [00:00, 8140.80it/s]


2024-10-26 09:15:59.311033 2 / 100 399


Running evaluation: 359it [00:00, 8086.24it/s]


2024-10-26 09:16:38.337258 3 / 100 21


Running evaluation: 359it [00:00, 8454.26it/s]


2024-10-26 09:17:15.821050 4 / 100 805


Running evaluation: 359it [00:00, 8471.20it/s]


2024-10-26 09:17:53.495742 5 / 100 14


Running evaluation: 359it [00:00, 7860.04it/s]


2024-10-26 09:18:30.981611 6 / 100 404


Running evaluation: 359it [00:00, 7562.69it/s]


2024-10-26 09:19:08.319520 7 / 100 78


Running evaluation: 359it [00:00, 7647.39it/s]


2024-10-26 09:19:45.719769 8 / 100 731


Running evaluation: 359it [00:00, 7142.58it/s]


2024-10-26 09:20:23.021996 9 / 100 218


Running evaluation: 359it [00:00, 7706.53it/s]


2024-10-26 09:21:00.525866 Saving...
2024-10-26 09:21:01.211069 10 / 100 366


Running evaluation: 359it [00:00, 7071.36it/s]


2024-10-26 09:21:38.682412 11 / 100 766


Running evaluation: 359it [00:00, 7889.11it/s]


2024-10-26 09:22:15.962937 12 / 100 197


Running evaluation: 359it [00:00, 7791.51it/s]


2024-10-26 09:22:53.522421 13 / 100 30


Running evaluation: 359it [00:00, 7211.09it/s]


2024-10-26 09:23:30.996074 14 / 100 742


Running evaluation: 359it [00:00, 7349.99it/s]


2024-10-26 09:24:08.236741 15 / 100 32


Running evaluation: 359it [00:00, 7519.53it/s]


2024-10-26 09:24:45.634137 16 / 100 137


Running evaluation: 359it [00:00, 7894.57it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:25:23.212426 17 / 100 36


Running evaluation: 359it [00:00, 7436.89it/s]


2024-10-26 09:26:01.332486 18 / 100 9


Running evaluation: 359it [00:00, 7828.00it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:26:39.568330 19 / 100 808


Running evaluation: 359it [00:00, 7994.20it/s]


2024-10-26 09:27:16.913583 Saving...
2024-10-26 09:27:18.242584 20 / 100 391


Running evaluation: 359it [00:00, 8072.41it/s]


2024-10-26 09:27:55.474530 21 / 100 213


Running evaluation: 359it [00:00, 8016.93it/s]


2024-10-26 09:28:34.340434 22 / 100 236


Running evaluation: 359it [00:00, 8027.78it/s]


2024-10-26 09:29:12.061688 23 / 100 6


Running evaluation: 359it [00:00, 7496.50it/s]


2024-10-26 09:29:50.610193 24 / 100 224


Running evaluation: 359it [00:00, 7928.78it/s]


2024-10-26 09:30:29.193223 25 / 100 45


Running evaluation: 359it [00:00, 8584.84it/s]


2024-10-26 09:31:06.867238 26 / 100 771


Running evaluation: 359it [00:00, 7018.59it/s]


2024-10-26 09:31:46.826132 27 / 100 492


Running evaluation: 359it [00:00, 7606.44it/s]


2024-10-26 09:32:27.563125 28 / 100 384


Running evaluation: 359it [00:00, 8176.25it/s]


2024-10-26 09:33:04.859523 29 / 100 153


Running evaluation: 359it [00:00, 8228.08it/s]


2024-10-26 09:33:43.545356 Saving...
2024-10-26 09:33:45.436390 30 / 100 136


Running evaluation: 359it [00:00, 7929.16it/s]


2024-10-26 09:34:25.406422 31 / 100 386


Running evaluation: 359it [00:00, 7644.90it/s]


2024-10-26 09:35:04.635624 32 / 100 51


Running evaluation: 359it [00:00, 8585.13it/s]


2024-10-26 09:35:43.697803 33 / 100 472


Running evaluation: 359it [00:00, 7444.87it/s]


2024-10-26 09:36:22.318891 34 / 100 281


Running evaluation: 359it [00:00, 7177.30it/s]


2024-10-26 09:37:00.999083 35 / 100 474


Running evaluation: 359it [00:00, 8231.95it/s]


2024-10-26 09:37:38.399341 36 / 100 697


Running evaluation: 359it [00:00, 8313.53it/s]


2024-10-26 09:38:15.273499 37 / 100 49


Running evaluation: 359it [00:00, 7604.52it/s]


2024-10-26 09:38:52.362330 38 / 100 755


Running evaluation: 359it [00:00, 7729.35it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:39:29.438300 39 / 100 228


Running evaluation: 359it [00:00, 7992.84it/s]


2024-10-26 09:40:06.390718 Saving...
2024-10-26 09:40:08.775744 40 / 100 427


Running evaluation: 359it [00:00, 7763.27it/s]


2024-10-26 09:40:45.726272 41 / 100 454


Running evaluation: 359it [00:00, 8125.43it/s]


2024-10-26 09:41:22.837373 42 / 100 690


Running evaluation: 359it [00:00, 7808.39it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:41:59.681426 43 / 100 703


Running evaluation: 359it [00:00, 7417.11it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:42:37.618752 44 / 100 259


Running evaluation: 359it [00:00, 7571.21it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:43:15.278938 45 / 100 648


Running evaluation: 359it [00:00, 7935.34it/s]


2024-10-26 09:43:52.955334 46 / 100 341


Running evaluation: 359it [00:00, 7415.47it/s]


2024-10-26 09:44:31.362557 47 / 100 44


Running evaluation: 359it [00:00, 8119.47it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:45:11.467221 48 / 100 275


Running evaluation: 359it [00:00, 7720.00it/s]


2024-10-26 09:45:50.672100 49 / 100 718


Running evaluation: 359it [00:00, 7799.82it/s]


2024-10-26 09:46:29.032237 Saving...
2024-10-26 09:46:32.176921 50 / 100 25


Running evaluation: 359it [00:00, 7212.85it/s]


2024-10-26 09:47:12.547765 51 / 100 65


Running evaluation: 359it [00:00, 8347.22it/s]


2024-10-26 09:47:53.201052 52 / 100 455


Running evaluation: 359it [00:00, 7605.71it/s]


2024-10-26 09:48:33.017275 53 / 100 101


Running evaluation: 359it [00:00, 7935.26it/s]


2024-10-26 09:49:11.832177 54 / 100 673


Running evaluation: 359it [00:00, 8290.78it/s]


2024-10-26 09:49:50.827919 55 / 100 31


Running evaluation: 359it [00:00, 7838.31it/s]


2024-10-26 09:50:29.522515 56 / 100 10


Running evaluation: 359it [00:00, 8155.88it/s]


2024-10-26 09:51:08.042967 57 / 100 761


Running evaluation: 359it [00:00, 7951.77it/s]


2024-10-26 09:51:45.988480 58 / 100 478


Running evaluation: 359it [00:00, 7584.94it/s]


2024-10-26 09:52:24.567425 59 / 100 100


Running evaluation: 359it [00:00, 8137.33it/s]


2024-10-26 09:53:03.710013 Saving...
2024-10-26 09:53:07.407310 60 / 100 22


Running evaluation: 359it [00:00, 7879.08it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error


2024-10-26 09:53:46.229754 61 / 100 41


Running evaluation: 359it [00:00, 7997.17it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:54:23.267789 62 / 100 496


Running evaluation: 359it [00:00, 7870.39it/s]


2024-10-26 09:55:00.255517 63 / 100 217


Running evaluation: 359it [00:00, 8157.78it/s]


2024-10-26 09:55:37.364656 64 / 100 144


Running evaluation: 359it [00:00, 7552.45it/s]


2024-10-26 09:56:14.672953 65 / 100 214


Running evaluation: 359it [00:00, 8513.49it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-26 09:56:52.263392 66 / 100 512


Running evaluation: 359it [00:00, 7472.73it/s]


2024-10-26 09:57:32.137761 67 / 100 654


Running evaluation: 359it [00:00, 7735.79it/s]


2024-10-26 09:58:12.499523 68 / 100 363


Running evaluation: 359it [00:00, 7720.55it/s]


2024-10-26 09:58:53.385654 69 / 100 109


Running evaluation: 359it [00:00, 7795.90it/s]


2024-10-26 09:59:33.799856 Saving...
2024-10-26 09:59:38.318969 70 / 100 116


Running evaluation: 359it [00:00, 8692.08it/s]


2024-10-26 10:00:17.619749 71 / 100 111


Running evaluation: 359it [00:00, 6973.48it/s]


2024-10-26 10:00:54.746963 72 / 100 55


Running evaluation: 359it [00:00, 7363.72it/s]


2024-10-26 10:01:32.077814 73 / 100 832


Running evaluation: 359it [00:00, 7911.20it/s]


2024-10-26 10:02:09.969137 74 / 100 401


Running evaluation: 359it [00:00, 8134.60it/s]


2024-10-26 10:02:47.149666 75 / 100 99


Running evaluation: 359it [00:00, 8079.91it/s]


2024-10-26 10:03:24.876830 76 / 100 270


Running evaluation: 359it [00:00, 7428.78it/s]


2024-10-26 10:04:02.104554 77 / 100 29


Running evaluation: 359it [00:00, 7456.23it/s]


2024-10-26 10:04:39.066677 78 / 100 400


Running evaluation: 359it [00:00, 7478.00it/s]


2024-10-26 10:05:16.190711 79 / 100 786


Running evaluation: 359it [00:00, 7764.35it/s]


2024-10-26 10:05:53.390822 Saving...
2024-10-26 10:05:58.454650 80 / 100 186


Running evaluation: 359it [00:00, 8018.63it/s]


2024-10-26 10:06:38.974292 81 / 100 285


Running evaluation: 359it [00:00, 7616.44it/s]


2024-10-26 10:07:17.183489 82 / 100 12


Running evaluation: 359it [00:00, 7611.32it/s]


2024-10-26 10:07:55.350914 83 / 100 88


Running evaluation: 359it [00:00, 7375.33it/s]


2024-10-26 10:08:33.742301 84 / 100 744


Running evaluation: 359it [00:00, 8099.20it/s]


2024-10-26 10:09:14.558604 85 / 100 745


Running evaluation: 359it [00:00, 8473.10it/s]


2024-10-26 10:09:52.701634 86 / 100 13


Running evaluation: 359it [00:00, 8033.48it/s]


2024-10-26 10:10:30.865895 87 / 100 56


Running evaluation: 359it [00:00, 7081.34it/s]


2024-10-26 10:11:08.686824 88 / 100 103


Running evaluation: 359it [00:00, 8043.74it/s]


2024-10-26 10:11:47.298753 89 / 100 737


Running evaluation: 359it [00:00, 7706.76it/s]


2024-10-26 10:12:26.778555 Saving...
2024-10-26 10:12:32.793206 90 / 100 484


Running evaluation: 359it [00:00, 8426.40it/s]


2024-10-26 10:13:11.397742 91 / 100 674


Running evaluation: 359it [00:00, 8260.49it/s]


2024-10-26 10:13:49.139100 92 / 100 8


Running evaluation: 359it [00:00, 8158.75it/s]


2024-10-26 10:14:27.405258 93 / 100 92


Running evaluation: 359it [00:00, 7692.55it/s]


2024-10-26 10:15:05.989108 94 / 100 339


Running evaluation: 359it [00:00, 7417.55it/s]


2024-10-26 10:15:44.219171 95 / 100 887


Running evaluation: 359it [00:00, 7742.99it/s]


2024-10-26 10:16:24.085028 96 / 100 42


Running evaluation: 359it [00:00, 8064.41it/s]


2024-10-26 10:17:03.337845 97 / 100 475


Running evaluation: 359it [00:00, 7630.76it/s]


2024-10-26 10:17:42.300404 98 / 100 765


Running evaluation: 359it [00:00, 8096.68it/s]


2024-10-26 10:18:21.355677 99 / 100 716


Running evaluation: 359it [00:00, 7760.22it/s]


2024-10-26 10:19:01.897572 Saving...



## Metrics

In [19]:
#!pip install permetrics

In [20]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import root_mean_squared_log_error
from permetrics.regression import RegressionMetric

dataset = 'Enernoc'
files_list = glob.glob(f'../forecasts/{dataset}/*.csv')

metrics_all_files = []

for filename in files_list:
    res = pd.read_csv(filename)
    metrics_all = []
    for (g, data) in res.groupby(['building']):
        data = data.dropna()
        data = data[data.y_pred >= 0]
        #data = [data.y_pred >= 0]
        print(g)  
        if data.empty:
            continue 
            
        rmse= root_mean_squared_error(data.y_true, data.y_pred)
        mae= mean_absolute_error(data.y_true, data.y_pred)
        mape = mean_absolute_percentage_error(data.y_true, data.y_pred)
        mse= mean_squared_error(data.y_true, data.y_pred)
        msle= mean_squared_log_error(data.y_true, data.y_pred)
        rmsle= root_mean_squared_log_error(data.y_true, data.y_pred)

        evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
        nrmse = evaluator.normalized_root_mean_square_error()
    
        metrics = pd.DataFrame({'building_name': [g[0]], 
                           'mae': [mae],
                            'mape': [mape],
                           'mse': [mse], 'rmse': [rmse], 'msle': [msle], 'rmsle': [rmsle], 'nrmse' : [nrmse]})
        metrics_all.append(metrics)
    
    metrics_all_df = pd.concat(metrics_all)
    metrics_all_df.to_csv(f'../results/{dataset}/{os.path.basename(filename)}')

    metrics_all_df['filename'] = os.path.basename(filename)
    metrics_all_files.append(metrics_all_df)

metrics_all_files_df = pd.concat(metrics_all_files)

(6,)
(8,)
(9,)
(10,)
(12,)
(13,)
(14,)
(21,)
(22,)
(25,)
(29,)
(30,)
(31,)
(32,)
(36,)
(41,)
(42,)
(44,)
(45,)
(49,)
(51,)
(55,)
(56,)
(65,)
(78,)
(88,)
(92,)
(99,)
(100,)
(101,)
(103,)
(109,)
(111,)
(116,)
(136,)
(137,)
(144,)
(153,)
(186,)
(197,)
(213,)
(214,)
(217,)
(218,)
(224,)
(228,)
(236,)
(259,)
(270,)
(275,)
(281,)
(285,)
(304,)
(339,)
(341,)
(363,)
(366,)
(384,)
(386,)
(391,)
(399,)
(400,)
(401,)
(404,)
(427,)
(454,)
(455,)
(472,)
(474,)
(475,)
(478,)
(484,)
(492,)
(496,)
(512,)
(648,)
(654,)
(673,)
(674,)
(690,)
(697,)
(703,)
(716,)
(718,)
(731,)
(737,)
(742,)
(744,)
(745,)
(755,)
(761,)
(765,)
(766,)
(767,)
(771,)
(786,)
(805,)
(808,)
(832,)
(887,)


In [21]:
metrics_all_files_df.to_csv(f'../results/{dataset}/results_combined.csv')
metrics_all_files_df

Unnamed: 0,building_name,mae,mape,mse,rmse,msle,rmsle,nrmse,filename
0,6,77.071449,9.821077e+13,10183.713092,100.914385,0.125911,0.354840,0.804247,enernoc.csv
0,8,374.541271,4.610198e-01,309599.841100,556.416967,0.607808,0.779620,0.774691,enernoc.csv
0,9,88.059858,4.942137e+15,17615.122528,132.721975,0.328371,0.573036,1.156207,enernoc.csv
0,10,476.128321,1.500199e-01,431395.043480,656.806702,0.045551,0.213427,1.021069,enernoc.csv
0,12,107.583218,2.260965e-01,20520.742289,143.250628,0.169054,0.411162,1.018745,enernoc.csv
...,...,...,...,...,...,...,...,...,...
0,786,309.241782,4.337602e-01,312206.533010,558.754448,0.207229,0.455224,1.744368,enernoc.csv
0,805,40.535842,4.507198e-01,3351.147771,57.889099,0.781825,0.884208,1.056898,enernoc.csv
0,808,35.605432,5.054595e-01,3186.486935,56.448976,0.490049,0.700035,1.018001,enernoc.csv
0,832,335.700551,4.934140e-01,253328.744558,503.317737,0.718636,0.847724,0.799737,enernoc.csv
