In [1]:
import sys
import os

# Add project root to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
from forcateri.model.dartsmodels.dartstcnmodel import DartsTCNModel
from forcateri.baltbestapi.baltbestaggregatedapidata import BaltBestAggregatedAPIData
import pandas as pd
from forcateri.data.dataprovider import DataProvider, SeriesRole
from darts.models import TCNModel
from darts.utils.likelihood_models import QuantileRegression
import forcateri

In [3]:
%load_ext autoreload

In [4]:
baltbest = BaltBestAggregatedAPIData(
    name='test', 
    url="baltbest_url", 
    local_copy="/home/user/DFKI/forcateri/_data/showcase_data.csv",
    target = 'q_hca',
    group_col = 'room_id',
    time_col = 'datetime',
    known = 'temperature_outdoor_avg',
    observed = ['temperature_1_max', 'temperature_2_max','temperature_room_avg'],)

In [5]:
baltbest.get_data()

datetime


[TimeSeries(data=feature                          q_hca temperature_1_max temperature_2_max  \
 representation                   value             value             value   
 offset time_stamp                                                            
 0 days 2021-01-01 00:00:00+00:00   0.0             16.78             16.54   
        2021-01-01 01:00:00+00:00   0.0             16.75             16.52   
        2021-01-01 02:00:00+00:00   0.0             16.74             16.50   
        2021-01-01 03:00:00+00:00   0.0             16.72             16.49   
        2021-01-01 04:00:00+00:00   0.0             16.71             16.47   
 ...                                ...               ...               ...   
        2021-01-09 19:00:00+00:00   0.0             16.40             16.12   
        2021-01-09 20:00:00+00:00   0.0             16.42             16.14   
        2021-01-09 21:00:00+00:00   0.0             16.43             16.16   
        2021-01-09 22:00:00+00:00   

In [6]:
from forcateri.data.timeseries import TimeSeries

In [7]:
# roles = {
#     SeriesRole.TARGET: 'q_hca', 
#     SeriesRole.KNOWN: 'temperature_outdoor_avg', 
#     SeriesRole.OBSERVED: ['temperature_1_max', 'temperature_2_max','temperature_room_avg'],}

In [8]:
roles = {
    'q_hca': SeriesRole.TARGET, 
    'temperature_outdoor_avg':SeriesRole.KNOWN, 
    'temperature_1_max':SeriesRole.OBSERVED, 
    'temperature_2_max':SeriesRole.OBSERVED,
    'temperature_room_avg':SeriesRole.OBSERVED,}
#['temperature_1_max', 'temperature_2_max','temperature_room_avg']

In [9]:
start = pd.Timestamp(2021, 1, 3, 4,tz=0)
end = pd.Timestamp(2021, 1, 8, 4,tz=0)
dataprovider = DataProvider(data_sources=[baltbest], roles=roles,splits=(100, 150))

datetime


In [10]:
dataprovider.get_test_set()

[(feature                          q_hca
  representation                   value
  offset time_stamp                     
  0 days 2021-01-07 06:00:00+00:00   0.0
         2021-01-07 07:00:00+00:00   0.0
         2021-01-07 08:00:00+00:00   0.0
         2021-01-07 09:00:00+00:00   0.0
         2021-01-07 10:00:00+00:00   0.0
  ...                                ...
         2021-01-09 19:00:00+00:00   0.0
         2021-01-09 20:00:00+00:00   0.0
         2021-01-09 21:00:00+00:00   0.0
         2021-01-09 22:00:00+00:00   0.0
         2021-01-09 23:00:00+00:00   0.0
  
  [66 rows x 1 columns],
  feature                          temperature_outdoor_avg
  representation                                     value
  offset time_stamp                                       
  0 days 2021-01-07 06:00:00+00:00                     NaN
         2021-01-07 07:00:00+00:00                     NaN
         2021-01-07 08:00:00+00:00                     NaN
         2021-01-07 09:00:00+00:00          

In [11]:
dataprovider.get_train_set()

[(feature                          q_hca
  representation                   value
  offset time_stamp                     
  0 days 2021-01-01 00:00:00+00:00   0.0
         2021-01-01 01:00:00+00:00   0.0
         2021-01-01 02:00:00+00:00   0.0
         2021-01-01 03:00:00+00:00   0.0
         2021-01-01 04:00:00+00:00   0.0
  ...                                ...
         2021-01-04 23:00:00+00:00   0.0
         2021-01-05 00:00:00+00:00   0.0
         2021-01-05 01:00:00+00:00   0.0
         2021-01-05 02:00:00+00:00   0.0
         2021-01-05 03:00:00+00:00   0.0
  
  [100 rows x 1 columns],
  feature                          temperature_outdoor_avg
  representation                                     value
  offset time_stamp                                       
  0 days 2021-01-01 00:00:00+00:00                     NaN
         2021-01-01 01:00:00+00:00                     NaN
         2021-01-01 02:00:00+00:00                     NaN
         2021-01-01 03:00:00+00:00         

In [12]:
dataprovider.get_val_set()

[(feature                          q_hca
  representation                   value
  offset time_stamp                     
  0 days 2021-01-05 04:00:00+00:00   0.0
         2021-01-05 05:00:00+00:00   0.0
         2021-01-05 06:00:00+00:00   0.0
         2021-01-05 07:00:00+00:00   0.0
         2021-01-05 08:00:00+00:00   0.0
         2021-01-05 09:00:00+00:00   0.0
         2021-01-05 10:00:00+00:00   0.0
         2021-01-05 11:00:00+00:00   0.0
         2021-01-05 12:00:00+00:00   0.0
         2021-01-05 13:00:00+00:00   0.0
         2021-01-05 14:00:00+00:00   0.0
         2021-01-05 15:00:00+00:00   0.0
         2021-01-05 16:00:00+00:00   0.0
         2021-01-05 17:00:00+00:00   0.0
         2021-01-05 18:00:00+00:00   0.0
         2021-01-05 19:00:00+00:00   0.0
         2021-01-05 20:00:00+00:00   0.0
         2021-01-05 21:00:00+00:00   0.0
         2021-01-05 22:00:00+00:00   0.0
         2021-01-05 23:00:00+00:00   0.0
         2021-01-06 00:00:00+00:00   0.0
         2021-01

In [13]:
df = pd.read_csv('/home/user/DFKI/forcateri/_data/hourly_data.csv')
df['rounded_ts'] = pd.to_datetime(df['rounded_ts'])
ts,ts_dict = TimeSeries.from_group_df(df, time_col='rounded_ts', group_col='room_id',value_cols=['delta','max_temperature_1','max_temperature_2'], freq='h')

In [14]:
ts

[TimeSeries(data=feature                    delta max_temperature_1 max_temperature_2
 representation             value             value             value
 offset time_stamp                                                   
 0 days 2019-06-01 00:00:00   0.0         20.615333         20.782000
        2019-06-01 01:00:00   0.0         20.603846         20.756923
        2019-06-01 02:00:00   0.0         20.578750         20.731250
        2019-06-01 03:00:00   0.0         20.581429         20.711429
        2019-06-01 04:00:00   0.0         20.619231         20.716154
 ...                          ...               ...               ...
        2022-04-18 20:00:00   0.0         18.198667         18.234667
        2022-04-18 21:00:00   0.0         18.257059         18.297647
        2022-04-18 22:00:00   0.0         18.283000         18.329000
        2022-04-18 23:00:00   0.0         18.297143         18.337857
        2022-04-19 00:00:00   0.0         18.310909         18.343636
 
 [