In [1]:
!python --version

Python 3.10.12


In [2]:
cd uni2ts

/home/user/energygpt/moirai/uni2ts


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
import os
import torch
import matplotlib.pyplot as plt
import glob
import pandas as pd
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from tqdm.autonotebook import tqdm
import matplotlib.dates as mdates
from itertools import islice
from collections import defaultdict
import gluonts
from datetime import datetime

  from tqdm.autonotebook import tqdm


In [4]:
from uni2ts.eval_util.plot import plot_single
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
from uni2ts.eval_util.evaluation import evaluate_model

In [5]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      #examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      #examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      #examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df.index[start:(context_end := start + context_len)])
      examples["outputs_ts"].append(sub_df.index[context_end:(context_end + horizon_len)])

    return examples

In [None]:
def forecast_building(df):

    # Set numerical columns as float32
    for col in df.columns:
        # Check if column is not of string type
        if df[col].dtype != 'object' and pd.api.types.is_string_dtype(df[col]) == False:
            df[col] = df[col].astype('float32')
    
    # Create the Pandas
    dataset = PandasDataset.from_long_dataframe(df, target="target", item_id="item_id")

    backtest_dataset = dataset
    prediction_length = 24  # Define your prediction length. We use 24 here since the data is of hourly frequency
    num_samples = 100

    model = MoiraiForecast(
    module=MoiraiModule.from_pretrained(f"Salesforce/moirai-1.0-R-small"),
    prediction_length=prediction_length,
    context_length=168,
    patch_size='auto',
    target_dim=1,
    feat_dynamic_real_dim=backtest_dataset.num_feat_dynamic_real,
    past_feat_dynamic_real_dim=backtest_dataset.num_past_feat_dynamic_real,
)
    predictor = model.create_predictor(batch_size=32, device="cuda:0")

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=backtest_dataset,
        predictor=predictor,
        num_samples=num_samples
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)

    evaluator = Evaluator()
    agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))

    res_all = []
    for ts, fc in zip(tss, forecasts):
        res = ts[ts.index.isin(fc.index)]
        res.columns = ['y_true']
        res.insert(1, 'y_pred', fc.median)        
        res_all.append(res)
        #print(gt.shape)
        #break
    res_all_df = pd.concat(res_all).sort_index()
    return res_all_df, agg_metrics, ts_metrics 


In [7]:
def process_building(df): 
    building_name = df.columns[0]
    df.columns = ['y']
    input_data = get_batched_data_fn(df, batch_size=500)
    # print(input_data)
    
    windows_all = []
    counter = 1
    for inputs_ts, inputs, outputs_ts, outputs in zip(input_data['inputs_ts'], 
                                                      input_data['inputs'], 
                                                      input_data['outputs_ts'], 
                                                      input_data['outputs']):
        
        input_df = pd.DataFrame({'timestamp': inputs_ts, 
                                 'target': inputs})
        
        output_df = pd.DataFrame({'timestamp': outputs_ts, 
                                 'target': outputs})
        combined = pd.concat([input_df, output_df], axis=0)
        combined['item_id'] = str(building_name) + '_' + str(counter)
        combined['item_id_no'] = counter
        counter += 1
        windows_all.append(combined)
        
    windows_all_df = pd.concat(windows_all)
    windows_all_df.timestamp = pd.to_datetime(windows_all_df.timestamp)
    windows_all_df.set_index('timestamp', inplace=True)
    windows_all_df.to_csv('test.csv')

    res, agg_metrics, ts_metrics = forecast_building(windows_all_df)
    return res, agg_metrics, ts_metrics

In [None]:
def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    res_all = []
    agg_metrics_all = []
    ts_metrics_all = []
    
    i = 0
    for building_name in df.columns:
        print(datetime.now(), i, '/', len(df.columns), building_name, flush=True)
        df1 = df[[building_name]]

        res, agg_metrics, ts_metrics = process_building(df1)
        res['building'] = building_name
        res['filename'] = filename
        res_all.append(res)
        
        ts_metrics.insert(0, 'building', building_name)
        ts_metrics.insert(0, 'filename', filename)
        ts_metrics = ts_metrics.sort_values(['forecast_start'])
        ts_metrics_all.append(ts_metrics)
        
        agg_metrics_df = pd.DataFrame([agg_metrics])
        agg_metrics_df.insert(0, 'building', building_name)
        agg_metrics_df.insert(0, 'filename', filename)
        agg_metrics_all.append(agg_metrics_df)

        i += 1
        if i % 10 == 0:
            print(datetime.now(), 'Saving...')
            res_all_df = pd.concat(res_all).round(6)
            res_all_df = res_all_df.reset_index()
            res_all_df = res_all_df.rename(columns={res_all_df.columns[0]: "timestamp" })
            res_all_df.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)            

            ts_metrics_all_df = pd.concat(ts_metrics_all).round(6)
            ts_metrics_all_df.to_csv(f'../results/{dataset}/ts_metrics_{os.path.basename(filename)}', index=False)            

            agg_metrics_all_df = pd.concat(agg_metrics_all).round(6)            
            agg_metrics_all_df.to_csv(f'../results/{dataset}/agg_metrics_{os.path.basename(filename)}', index=False)            
    
    
    res_all_df = pd.concat(res_all).round(6)
    res_all_df = res_all_df.reset_index()
    res_all_df = res_all_df.rename(columns={res_all_df.columns[0]: "timestamp" })
    res_all_df.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)            

    ts_metrics_all_df = pd.concat(ts_metrics_all).round(6)    
    ts_metrics_all_df.to_csv(f'../results/{dataset}/ts_metrics_{os.path.basename(filename)}', index=False)            

    agg_metrics_all_df = pd.concat(agg_metrics_all).round(6)   
    agg_metrics_all_df.to_csv(f'../results/{dataset}/agg_metrics_{os.path.basename(filename)}', index=False)                

    return res_all_df, ts_metrics_all_df, agg_metrics_all_df

In [10]:
files_list = glob.glob('/home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/*.csv')

dataset = 'Enernoc'
os.makedirs(f'../forecasts/{dataset}/', exist_ok = True)
os.makedirs(f'../results/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
    # if results is not None:
    #     results.to_csv(f'../forecasts/{dataset}/{os.path.basename(filename)}', index=False)
    print('')

2024-10-23 10:57:32.658389 /home/user/New_Buildings_Datasets/Enernoc/csv-only/processed/enernoc.csv
2024-10-23 10:57:32.781264 (8785, 100)
2024-10-23 10:57:32.782156 0 / 100 767


Running evaluation: 359it [00:00, 5737.61it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return arr.astype(dtype, copy=True)


2024-10-23 10:57:39.476665 1 / 100 304


Running evaluation: 359it [00:00, 7637.46it/s]


2024-10-23 10:57:44.633441 2 / 100 399


Running evaluation: 359it [00:00, 7714.07it/s]


2024-10-23 10:57:50.089505 3 / 100 21


Running evaluation: 359it [00:00, 7337.24it/s]


2024-10-23 10:57:55.121645 4 / 100 805


Running evaluation: 359it [00:00, 8125.51it/s]


2024-10-23 10:58:00.545646 5 / 100 14


Running evaluation: 359it [00:00, 7676.78it/s]


2024-10-23 10:58:05.622474 6 / 100 404


Running evaluation: 359it [00:00, 7989.40it/s]


2024-10-23 10:58:10.779858 7 / 100 78


Running evaluation: 359it [00:00, 8391.65it/s]


2024-10-23 10:58:15.856785 8 / 100 731


Running evaluation: 359it [00:00, 7823.81it/s]


2024-10-23 10:58:21.173233 9 / 100 218


Running evaluation: 359it [00:00, 8069.73it/s]


2024-10-23 10:58:26.299251 Saving...
2024-10-23 10:58:26.958954 10 / 100 366


Running evaluation: 359it [00:00, 8251.39it/s]


2024-10-23 10:58:31.909946 11 / 100 766


Running evaluation: 359it [00:00, 7867.59it/s]


2024-10-23 10:58:37.475670 12 / 100 197


Running evaluation: 359it [00:00, 7939.40it/s]


2024-10-23 10:58:42.470797 13 / 100 30


Running evaluation: 359it [00:00, 7381.59it/s]


2024-10-23 10:58:47.462024 14 / 100 742


Running evaluation: 359it [00:00, 7698.88it/s]


2024-10-23 10:58:52.681657 15 / 100 32


Running evaluation: 359it [00:00, 8233.84it/s]


2024-10-23 10:58:57.982655 16 / 100 137


Running evaluation: 359it [00:00, 8095.98it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 10:59:03.089997 17 / 100 36


Running evaluation: 359it [00:00, 7506.03it/s]


2024-10-23 10:59:08.094732 18 / 100 9


Running evaluation: 359it [00:00, 7599.30it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 10:59:13.443661 19 / 100 808


Running evaluation: 359it [00:00, 7661.86it/s]


2024-10-23 10:59:18.383277 Saving...
2024-10-23 10:59:19.674925 20 / 100 391


Running evaluation: 359it [00:00, 7483.69it/s]


2024-10-23 10:59:24.627281 21 / 100 213


Running evaluation: 359it [00:00, 7554.50it/s]


2024-10-23 10:59:29.997806 22 / 100 236


Running evaluation: 359it [00:00, 8042.45it/s]


2024-10-23 10:59:34.947321 23 / 100 6


Running evaluation: 359it [00:00, 7247.29it/s]


2024-10-23 10:59:40.082714 24 / 100 224


Running evaluation: 359it [00:00, 7570.18it/s]


2024-10-23 10:59:45.405998 25 / 100 45


Running evaluation: 359it [00:00, 7299.29it/s]


2024-10-23 10:59:50.473783 26 / 100 771


Running evaluation: 359it [00:00, 7269.51it/s]


2024-10-23 10:59:56.018656 27 / 100 492


Running evaluation: 359it [00:00, 7656.95it/s]


2024-10-23 11:00:01.082976 28 / 100 384


Running evaluation: 359it [00:00, 8280.43it/s]


2024-10-23 11:00:06.445006 29 / 100 153


Running evaluation: 359it [00:00, 7905.01it/s]


2024-10-23 11:00:11.643753 Saving...
2024-10-23 11:00:13.439621 30 / 100 136


Running evaluation: 359it [00:00, 7301.94it/s]


2024-10-23 11:00:18.569071 31 / 100 386


Running evaluation: 359it [00:00, 7354.91it/s]


2024-10-23 11:00:24.070043 32 / 100 51


Running evaluation: 359it [00:00, 7244.71it/s]


2024-10-23 11:00:28.991524 33 / 100 472


Running evaluation: 359it [00:00, 7418.10it/s]


2024-10-23 11:00:34.218104 34 / 100 281


Running evaluation: 359it [00:00, 8240.60it/s]


2024-10-23 11:00:39.347021 35 / 100 474


Running evaluation: 359it [00:00, 7513.07it/s]


2024-10-23 11:00:44.277742 36 / 100 697


Running evaluation: 359it [00:00, 7765.59it/s]


2024-10-23 11:00:49.439195 37 / 100 49


Running evaluation: 359it [00:00, 7307.61it/s]


2024-10-23 11:00:54.420130 38 / 100 755


Running evaluation: 359it [00:00, 7859.13it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 11:00:59.565386 39 / 100 228


Running evaluation: 359it [00:00, 7769.75it/s]


2024-10-23 11:01:04.682966 Saving...
2024-10-23 11:01:06.985071 40 / 100 427


Running evaluation: 359it [00:00, 7634.32it/s]


2024-10-23 11:01:12.066560 41 / 100 454


Running evaluation: 359it [00:00, 7778.58it/s]


2024-10-23 11:01:16.963208 42 / 100 690


Running evaluation: 359it [00:00, 7643.16it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-23 11:01:22.088293 43 / 100 703


  return arr.astype(dtype, copy=True)
Running evaluation: 359it [00:00, 7306.44it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 11:01:26.976287 44 / 100 259


Running evaluation: 359it [00:00, 7949.50it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 11:01:32.485100 45 / 100 648


Running evaluation: 359it [00:00, 7965.23it/s]


2024-10-23 11:01:37.425643 46 / 100 341


Running evaluation: 359it [00:00, 7712.17it/s]


2024-10-23 11:01:42.825635 47 / 100 44


Running evaluation: 359it [00:00, 7190.43it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-23 11:01:47.870118 48 / 100 275


  return arr.astype(dtype, copy=True)
Running evaluation: 359it [00:00, 7369.16it/s]


2024-10-23 11:01:52.936929 49 / 100 718


Running evaluation: 359it [00:00, 7567.71it/s]


2024-10-23 11:01:58.388216 Saving...
2024-10-23 11:02:00.949893 50 / 100 25


Running evaluation: 359it [00:00, 7446.93it/s]


2024-10-23 11:02:05.966175 51 / 100 65


Running evaluation: 359it [00:00, 7552.19it/s]


2024-10-23 11:02:10.769060 52 / 100 455


Running evaluation: 359it [00:00, 7308.11it/s]


2024-10-23 11:02:16.017738 53 / 100 101


Running evaluation: 359it [00:00, 7446.93it/s]


2024-10-23 11:02:21.042047 54 / 100 673


Running evaluation: 359it [00:00, 6978.49it/s]


2024-10-23 11:02:26.622663 55 / 100 31


Running evaluation: 359it [00:00, 7095.52it/s]


2024-10-23 11:02:32.173802 56 / 100 10


Running evaluation: 359it [00:00, 7939.99it/s]


2024-10-23 11:02:37.493854 57 / 100 761


Running evaluation: 359it [00:00, 8042.92it/s]


2024-10-23 11:02:42.520591 58 / 100 478


Running evaluation: 359it [00:00, 7601.75it/s]


2024-10-23 11:02:47.777061 59 / 100 100


Running evaluation: 359it [00:00, 7840.35it/s]


2024-10-23 11:02:52.894530 Saving...
2024-10-23 11:02:56.206326 60 / 100 22


Running evaluation: 359it [00:00, 7951.69it/s]
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  return numerator / seasonal_error
  return np.mean(np.abs(target - forecast)) / seasonal_error
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return numerator / seasonal_error


2024-10-23 11:03:01.280729 61 / 100 41


  return arr.astype(dtype, copy=True)
Running evaluation: 359it [00:00, 8261.12it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  return arr.astype(dtype, copy=True)


2024-10-23 11:03:06.572286 62 / 100 496


Running evaluation: 359it [00:00, 7864.72it/s]


2024-10-23 11:03:11.819374 63 / 100 217


Running evaluation: 359it [00:00, 7123.55it/s]


2024-10-23 11:03:17.087243 64 / 100 144


Running evaluation: 359it [00:00, 7414.48it/s]


2024-10-23 11:03:22.049329 65 / 100 214


Running evaluation: 359it [00:00, 7949.50it/s]
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(
  metrics["ND"] = cast(float, metrics["abs_error"]) / cast(


2024-10-23 11:03:27.284258 66 / 100 512


  return arr.astype(dtype, copy=True)
Running evaluation: 359it [00:00, 7587.31it/s]


2024-10-23 11:03:32.297189 67 / 100 654


Running evaluation: 359it [00:00, 7862.58it/s]


2024-10-23 11:03:37.484971 68 / 100 363


Running evaluation: 359it [00:00, 7740.80it/s]


2024-10-23 11:03:42.760311 69 / 100 109


Running evaluation: 359it [00:00, 7238.30it/s]


2024-10-23 11:03:47.809929 Saving...
2024-10-23 11:03:51.827361 70 / 100 116


Running evaluation: 359it [00:00, 7614.94it/s]


2024-10-23 11:03:56.943977 71 / 100 111


Running evaluation: 359it [00:00, 7725.51it/s]


2024-10-23 11:04:02.279030 72 / 100 55


Running evaluation: 359it [00:00, 7091.65it/s]


2024-10-23 11:04:07.455339 73 / 100 832


Running evaluation: 359it [00:00, 7638.00it/s]


2024-10-23 11:04:12.812306 74 / 100 401


Running evaluation: 359it [00:00, 7491.32it/s]


2024-10-23 11:04:17.886311 75 / 100 99


Running evaluation: 359it [00:00, 7888.16it/s]


2024-10-23 11:04:23.428157 76 / 100 270


Running evaluation: 359it [00:00, 7890.89it/s]


2024-10-23 11:04:28.584810 77 / 100 29


Running evaluation: 359it [00:00, 8295.99it/s]


2024-10-23 11:04:33.532612 78 / 100 400


Running evaluation: 359it [00:00, 7877.22it/s]


2024-10-23 11:04:38.928429 79 / 100 786


Running evaluation: 359it [00:00, 8044.08it/s]


2024-10-23 11:04:43.852693 Saving...
2024-10-23 11:04:48.295946 80 / 100 186


Running evaluation: 359it [00:00, 8354.12it/s]


2024-10-23 11:04:53.853998 81 / 100 285


Running evaluation: 359it [00:00, 7316.95it/s]


2024-10-23 11:04:58.954862 82 / 100 12


Running evaluation: 359it [00:00, 7884.48it/s]


2024-10-23 11:05:04.032226 83 / 100 88


Running evaluation: 359it [00:00, 7475.14it/s]


2024-10-23 11:05:09.412862 84 / 100 744


Running evaluation: 359it [00:00, 7997.08it/s]


2024-10-23 11:05:14.357971 85 / 100 745


Running evaluation: 359it [00:00, 7945.60it/s]


2024-10-23 11:05:19.515157 86 / 100 13


Running evaluation: 359it [00:00, 7495.50it/s]


2024-10-23 11:05:24.719700 87 / 100 56


Running evaluation: 359it [00:00, 8087.46it/s]


2024-10-23 11:05:30.253612 88 / 100 103


Running evaluation: 359it [00:00, 7351.82it/s]


2024-10-23 11:05:35.362444 89 / 100 737


Running evaluation: 359it [00:00, 7191.01it/s]


2024-10-23 11:05:40.819129 Saving...
2024-10-23 11:05:45.782044 90 / 100 484


Running evaluation: 359it [00:00, 7855.98it/s]


2024-10-23 11:05:51.133562 91 / 100 674


Running evaluation: 359it [00:00, 7791.91it/s]


2024-10-23 11:05:56.139350 92 / 100 8


Running evaluation: 359it [00:00, 7786.95it/s]


2024-10-23 11:06:01.249646 93 / 100 92


Running evaluation: 359it [00:00, 6747.70it/s]


2024-10-23 11:06:06.744841 94 / 100 339


Running evaluation: 359it [00:00, 7514.39it/s]


2024-10-23 11:06:11.997977 95 / 100 887


Running evaluation: 359it [00:00, 7622.19it/s]


2024-10-23 11:06:17.473730 96 / 100 42


Running evaluation: 359it [00:00, 7606.71it/s]


2024-10-23 11:06:22.569135 97 / 100 475


Running evaluation: 359it [00:00, 7993.99it/s]


2024-10-23 11:06:27.802895 98 / 100 765


Running evaluation: 359it [00:00, 7557.19it/s]


2024-10-23 11:06:32.951143 99 / 100 716


Running evaluation: 359it [00:00, 7505.51it/s]


2024-10-23 11:06:38.010426 Saving...

