In [1]:
import sys
import os

# Add project root to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import sys
sys.path.append("..")

from IPython.display import display, Markdown
import numpy as np
import pandas as pd

from forcateri import TimeSeries

def mprint(s): display(Markdown(s))

In [3]:
import logging

# Get the logger you used in your TimeSeries class
logger = logging.getLogger("forcateri.data.timeseries")
logger.setLevel(logging.INFO)  # or logging.DEBUG for more verbosity

# Create handler if it doesn't exist
if not logger.handlers:
    handler = logging.StreamHandler()
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

# Optional: prevent propagation to avoid double logs
logger.propagate = False


In [4]:
%reload_ext autoreload

In [5]:
df = pd.read_csv("/home/dior00002/dfki/forcateri/_data/hourly_data.csv",index_col=0)

In [6]:

df_978 = df[df['room_id'] == 978]
df_978 = df_978.drop(columns=['room_id'])
df_978['rounded_ts'] =pd.to_datetime(df_978['rounded_ts'])

In [7]:
df_978.set_index('rounded_ts', inplace=True)

In [8]:
df_978

Unnamed: 0_level_0,sin_time_of_day,cos_time_of_day,sin_time_of_year,cos_time_of_year,Mon,Tue,Wed,Thu,Fri,Sat,Sun,max_temperature_1,max_temperature_2,outside_temp,room_temperature,delta
rounded_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-06-01 00:00:00,0.500000,1.000000,0.753708,0.069149,False,False,False,False,False,True,False,22.616923,22.709231,11.0,21.081304,0.0
2019-06-01 01:00:00,0.629410,0.982963,0.753708,0.069149,False,False,False,False,False,True,False,22.522000,22.626667,11.0,21.079091,0.0
2019-06-01 02:00:00,0.750000,0.933013,0.753708,0.069149,False,False,False,False,False,True,False,22.419423,22.545385,10.0,21.075185,0.0
2019-06-01 03:00:00,0.853553,0.853553,0.753708,0.069149,False,False,False,False,False,True,False,22.487115,22.570769,10.0,21.078500,0.0
2019-06-01 04:00:00,0.933013,0.750000,0.753708,0.069149,False,False,False,False,False,True,False,22.644615,22.668582,10.0,21.101111,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-18 20:00:00,0.066987,0.750000,0.980075,0.360257,True,False,False,False,False,False,False,23.859406,25.335315,14.0,20.499355,0.0
2022-04-18 21:00:00,0.146447,0.853553,0.980075,0.360257,True,False,False,False,False,False,False,24.066047,25.581538,12.0,20.510811,0.0
2022-04-18 22:00:00,0.250000,0.933013,0.980075,0.360257,True,False,False,False,False,False,False,24.100556,25.333889,7.0,20.440870,0.0
2022-04-18 23:00:00,0.370590,0.982963,0.980075,0.360257,True,False,False,False,False,False,False,23.485500,24.606500,6.0,20.396154,0.0


In [9]:
ts = TimeSeries(data = df_978,representation='value')

2025-05-22 15:49:19,446 - INFO - TimeSeries initialized from compatible-format DataFrame.


In [10]:
expected_index_names = ['offset', 'time_stamp']
expected_column_names = ['feature', 'representation']
TimeSeries.is_matching_format(df_978)

False

In [11]:
test = ts.get_feature_slice(index=['max_temperature_1','max_temperature_2','outside_temp']).data[:5000]
len(test)

2025-05-22 15:49:19,465 - INFO - TimeSeries initialized from internal-format DataFrame.


5000

In [12]:
rng = np.random.default_rng()
n_cols, n_rows = 3, 12
index = pd.date_range(start="2000-01-01", freq="h", periods=n_rows)


In [13]:
raw_df = pd.DataFrame(
    data=rng.random(n_cols * n_rows).reshape(n_rows, n_cols)
)
mprint("### Not compatible\nNo time information is provided:")
raw_df

### Not compatible
No time information is provided:

Unnamed: 0,0,1,2
0,0.991651,0.030234,0.486247
1,0.778222,0.968755,0.521583
2,0.86979,0.496439,0.012754
3,0.535557,0.72496,0.368568
4,0.920516,0.214438,0.989725
5,0.679227,0.437131,0.863927
6,0.892936,0.233465,0.38659
7,0.125556,0.859694,0.025554
8,0.570513,0.983784,0.31587
9,0.292036,0.605057,0.665735


In [14]:
dt_indexed_df = raw_df.copy()
dt_indexed_df.set_index(index, inplace=True)
mprint("### Compatible\nThe column index represents deterministic features, the row index represents time steps")
dt_indexed_df

### Compatible
The column index represents deterministic features, the row index represents time steps

Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.991651,0.030234,0.486247
2000-01-01 01:00:00,0.778222,0.968755,0.521583
2000-01-01 02:00:00,0.86979,0.496439,0.012754
2000-01-01 03:00:00,0.535557,0.72496,0.368568
2000-01-01 04:00:00,0.920516,0.214438,0.989725
2000-01-01 05:00:00,0.679227,0.437131,0.863927
2000-01-01 06:00:00,0.892936,0.233465,0.38659
2000-01-01 07:00:00,0.125556,0.859694,0.025554
2000-01-01 08:00:00,0.570513,0.983784,0.31587
2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [15]:
ambiguous_col_df = dt_indexed_df.copy()
ambiguous_col_df.columns = pd.MultiIndex.from_product([["delta"], [1, 5, 9]])
mprint("""
### Compatible but...\n
Unclear how to interpret the inner column index: As samples? As quantiles? Which quantiles?
Thus, the compatibility check should succeed but an error can still be thrown by the constructor
if `representation` and/or `quantiles` are not provided.
""")
ambiguous_col_df


### Compatible but...

Unclear how to interpret the inner column index: As samples? As quantiles? Which quantiles?
Thus, the compatibility check should succeed but an error can still be thrown by the constructor
if `representation` and/or `quantiles` are not provided.


Unnamed: 0_level_0,delta,delta,delta
Unnamed: 0_level_1,1,5,9
2000-01-01 00:00:00,0.991651,0.030234,0.486247
2000-01-01 01:00:00,0.778222,0.968755,0.521583
2000-01-01 02:00:00,0.86979,0.496439,0.012754
2000-01-01 03:00:00,0.535557,0.72496,0.368568
2000-01-01 04:00:00,0.920516,0.214438,0.989725
2000-01-01 05:00:00,0.679227,0.437131,0.863927
2000-01-01 06:00:00,0.892936,0.233465,0.38659
2000-01-01 07:00:00,0.125556,0.859694,0.025554
2000-01-01 08:00:00,0.570513,0.983784,0.31587
2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [16]:
expected_column_names = ['feature', 'representation']
ambiguous_col_df.columns.names = expected_column_names
ambiguous_col_df

feature,delta,delta,delta
representation,1,5,9
2000-01-01 00:00:00,0.991651,0.030234,0.486247
2000-01-01 01:00:00,0.778222,0.968755,0.521583
2000-01-01 02:00:00,0.86979,0.496439,0.012754
2000-01-01 03:00:00,0.535557,0.72496,0.368568
2000-01-01 04:00:00,0.920516,0.214438,0.989725
2000-01-01 05:00:00,0.679227,0.437131,0.863927
2000-01-01 06:00:00,0.892936,0.233465,0.38659
2000-01-01 07:00:00,0.125556,0.859694,0.025554
2000-01-01 08:00:00,0.570513,0.983784,0.31587
2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [18]:
ts_ambiguous = TimeSeries(ambiguous_col_df,representation='quantile', quantiles = [0.1, 0.5, 0.9])

2025-05-22 15:49:35,347 - INFO - TimeSeries initialized from compatible-format DataFrame.


In [19]:
ts_ambiguous.data

Unnamed: 0_level_0,feature,delta,delta,delta
Unnamed: 0_level_1,representation,0.1,0.5,0.9
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0 days,2000-01-01 00:00:00,0.991651,0.030234,0.486247
0 days,2000-01-01 01:00:00,0.778222,0.968755,0.521583
0 days,2000-01-01 02:00:00,0.86979,0.496439,0.012754
0 days,2000-01-01 03:00:00,0.535557,0.72496,0.368568
0 days,2000-01-01 04:00:00,0.920516,0.214438,0.989725
0 days,2000-01-01 05:00:00,0.679227,0.437131,0.863927
0 days,2000-01-01 06:00:00,0.892936,0.233465,0.38659
0 days,2000-01-01 07:00:00,0.125556,0.859694,0.025554
0 days,2000-01-01 08:00:00,0.570513,0.983784,0.31587
0 days,2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [21]:
dt_quant = dt_indexed_df.copy()
dt_quant

Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.991651,0.030234,0.486247
2000-01-01 01:00:00,0.778222,0.968755,0.521583
2000-01-01 02:00:00,0.86979,0.496439,0.012754
2000-01-01 03:00:00,0.535557,0.72496,0.368568
2000-01-01 04:00:00,0.920516,0.214438,0.989725
2000-01-01 05:00:00,0.679227,0.437131,0.863927
2000-01-01 06:00:00,0.892936,0.233465,0.38659
2000-01-01 07:00:00,0.125556,0.859694,0.025554
2000-01-01 08:00:00,0.570513,0.983784,0.31587
2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [22]:
QUANTILES = [0.1, 0.5, 0.9]
ts_q = TimeSeries(dt_quant,representation='quantiles', quantiles=QUANTILES)
ts_q.data

2025-05-22 15:49:59,662 - INFO - TimeSeries initialized from compatible-format DataFrame.


Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.991651,0.030234,0.486247
2000-01-01 01:00:00,0.778222,0.968755,0.521583
2000-01-01 02:00:00,0.86979,0.496439,0.012754
2000-01-01 03:00:00,0.535557,0.72496,0.368568
2000-01-01 04:00:00,0.920516,0.214438,0.989725
2000-01-01 05:00:00,0.679227,0.437131,0.863927
2000-01-01 06:00:00,0.892936,0.233465,0.38659
2000-01-01 07:00:00,0.125556,0.859694,0.025554
2000-01-01 08:00:00,0.570513,0.983784,0.31587
2000-01-01 09:00:00,0.292036,0.605057,0.665735


In [None]:
dt_quant.columns = pd.MultiIndex.from_product([['target'],QUANTILES], names=expected_column_names)

In [None]:
dt_quant[:10]

feature,target,target,target
representation,0.1,0.5,0.9
2000-01-01 00:00:00,0.107522,0.41865,0.000366
2000-01-01 01:00:00,0.088974,0.015173,0.132114
2000-01-01 02:00:00,0.166463,0.365682,0.895873
2000-01-01 03:00:00,0.425247,0.385115,0.409578
2000-01-01 04:00:00,0.634288,0.950057,0.23577
2000-01-01 05:00:00,0.888428,0.710986,0.206225
2000-01-01 06:00:00,0.474942,0.341105,0.130066
2000-01-01 07:00:00,0.469205,0.824974,0.951154
2000-01-01 08:00:00,0.712546,0.690872,0.265396
2000-01-01 09:00:00,0.407924,0.492407,0.607698
