In [2]:
import sys
import os

# Add project root to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [3]:
import sys
sys.path.append("..")

from IPython.display import display, Markdown
import numpy as np
import pandas as pd

from forcateri import TimeSeries

def mprint(s): display(Markdown(s))

In [4]:
import logging

# Get the logger you used in your TimeSeries class
logger = logging.getLogger("forcateri.data.timeseries")
logger.setLevel(logging.INFO)  # or logging.DEBUG for more verbosity

# Create handler if it doesn't exist
if not logger.handlers:
    handler = logging.StreamHandler()
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

# Optional: prevent propagation to avoid double logs
logger.propagate = False


In [5]:
%reload_ext autoreload

In [9]:
df = pd.read_csv("/home/user/DFKI/forcateri/_data/hourly_data.csv",index_col=0)

In [10]:

df_978 = df[df['room_id'] == 978]
df_978 = df_978.drop(columns=['room_id'])
df_978['rounded_ts'] =pd.to_datetime(df_978['rounded_ts'])

In [11]:
df_978.set_index('rounded_ts', inplace=True)

In [12]:
df_978

Unnamed: 0_level_0,sin_time_of_day,cos_time_of_day,sin_time_of_year,cos_time_of_year,day_of_week,max_temperature_1,max_temperature_2,outside_temp,room_temperature,delta
rounded_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-06-01 00:00:00,0.500000,1.000000,0.753708,0.069149,5,22.616923,22.709231,10.8,21.081304,0.0
2019-06-01 01:00:00,0.629410,0.982963,0.753708,0.069149,5,22.522000,22.626667,10.8,21.079091,0.0
2019-06-01 02:00:00,0.750000,0.933013,0.753708,0.069149,5,22.413846,22.545385,9.8,21.075185,0.0
2019-06-01 03:00:00,0.853553,0.853553,0.753708,0.069149,5,22.425000,22.540000,9.8,21.078500,0.0
2019-06-01 04:00:00,0.933013,0.750000,0.753708,0.069149,5,22.549231,22.601538,9.8,21.101111,0.0
...,...,...,...,...,...,...,...,...,...,...
2022-04-18 20:00:00,0.066987,0.750000,0.980075,0.360257,0,23.687273,25.089091,9.2,20.499355,0.0
2022-04-18 21:00:00,0.146447,0.853553,0.980075,0.360257,0,24.031538,25.581538,9.3,20.510811,0.0
2022-04-18 22:00:00,0.250000,0.933013,0.980075,0.360257,0,24.100556,25.333889,8.3,20.440870,0.0
2022-04-18 23:00:00,0.370590,0.982963,0.980075,0.360257,0,23.381000,24.433000,6.8,20.396154,0.0


In [13]:
ts = TimeSeries(data = df_978,representation='value')

2025-05-28 15:06:08,728 - INFO - TimeSeries initialized from compatible-format DataFrame.


In [14]:
expected_index_names = ['offset', 'time_stamp']
expected_column_names = ['feature', 'representation']
TimeSeries.is_matching_format(df_978)

False

In [15]:
test = ts.get_feature_slice(index=['max_temperature_1','max_temperature_2','outside_temp']).data[:5000]
len(test)

2025-05-28 15:06:08,754 - INFO - TimeSeries initialized from compatible-format DataFrame.


5000

In [16]:
rng = np.random.default_rng()
n_cols, n_rows = 3, 12
index = pd.date_range(start="2000-01-01", freq="h", periods=n_rows)


In [17]:
raw_df = pd.DataFrame(
    data=rng.random(n_cols * n_rows).reshape(n_rows, n_cols)
)
mprint("### Not compatible\nNo time information is provided:")
raw_df

### Not compatible
No time information is provided:

Unnamed: 0,0,1,2
0,0.538511,0.154564,0.124714
1,0.706765,0.560033,0.324112
2,0.633571,0.50806,0.503798
3,0.1216,0.880068,0.257616
4,0.060897,0.33811,0.75586
5,0.773727,0.719406,0.736971
6,0.177364,0.897868,0.44287
7,0.567419,0.557371,0.490774
8,0.859099,0.200336,0.8594
9,0.001572,0.486803,0.3656


In [18]:
dt_indexed_df = raw_df.copy()
dt_indexed_df.set_index(index, inplace=True)
mprint("### Compatible\nThe column index represents deterministic features, the row index represents time steps")
dt_indexed_df

### Compatible
The column index represents deterministic features, the row index represents time steps

Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.538511,0.154564,0.124714
2000-01-01 01:00:00,0.706765,0.560033,0.324112
2000-01-01 02:00:00,0.633571,0.50806,0.503798
2000-01-01 03:00:00,0.1216,0.880068,0.257616
2000-01-01 04:00:00,0.060897,0.33811,0.75586
2000-01-01 05:00:00,0.773727,0.719406,0.736971
2000-01-01 06:00:00,0.177364,0.897868,0.44287
2000-01-01 07:00:00,0.567419,0.557371,0.490774
2000-01-01 08:00:00,0.859099,0.200336,0.8594
2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [19]:
ambiguous_col_df = dt_indexed_df.copy()
ambiguous_col_df.columns = pd.MultiIndex.from_product([["delta"], [1, 5, 9]])
mprint("""
### Compatible but...\n
Unclear how to interpret the inner column index: As samples? As quantiles? Which quantiles?
Thus, the compatibility check should succeed but an error can still be thrown by the constructor
if `representation` and/or `quantiles` are not provided.
""")
ambiguous_col_df


### Compatible but...

Unclear how to interpret the inner column index: As samples? As quantiles? Which quantiles?
Thus, the compatibility check should succeed but an error can still be thrown by the constructor
if `representation` and/or `quantiles` are not provided.


Unnamed: 0_level_0,delta,delta,delta
Unnamed: 0_level_1,1,5,9
2000-01-01 00:00:00,0.538511,0.154564,0.124714
2000-01-01 01:00:00,0.706765,0.560033,0.324112
2000-01-01 02:00:00,0.633571,0.50806,0.503798
2000-01-01 03:00:00,0.1216,0.880068,0.257616
2000-01-01 04:00:00,0.060897,0.33811,0.75586
2000-01-01 05:00:00,0.773727,0.719406,0.736971
2000-01-01 06:00:00,0.177364,0.897868,0.44287
2000-01-01 07:00:00,0.567419,0.557371,0.490774
2000-01-01 08:00:00,0.859099,0.200336,0.8594
2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [20]:
expected_column_names = ['feature', 'representation']
ambiguous_col_df.columns.names = expected_column_names
ambiguous_col_df

feature,delta,delta,delta
representation,1,5,9
2000-01-01 00:00:00,0.538511,0.154564,0.124714
2000-01-01 01:00:00,0.706765,0.560033,0.324112
2000-01-01 02:00:00,0.633571,0.50806,0.503798
2000-01-01 03:00:00,0.1216,0.880068,0.257616
2000-01-01 04:00:00,0.060897,0.33811,0.75586
2000-01-01 05:00:00,0.773727,0.719406,0.736971
2000-01-01 06:00:00,0.177364,0.897868,0.44287
2000-01-01 07:00:00,0.567419,0.557371,0.490774
2000-01-01 08:00:00,0.859099,0.200336,0.8594
2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [21]:
ts_ambiguous = TimeSeries(ambiguous_col_df,representation='quantile', quantiles = [0.1, 0.5, 0.9])

2025-05-28 15:06:08,889 - INFO - TimeSeries initialized from compatible-format DataFrame.


In [22]:
ts_ambiguous.data

Unnamed: 0_level_0,feature,delta,delta,delta
Unnamed: 0_level_1,representation,0.1,0.5,0.9
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0 days,2000-01-01 00:00:00,0.538511,0.154564,0.124714
0 days,2000-01-01 01:00:00,0.706765,0.560033,0.324112
0 days,2000-01-01 02:00:00,0.633571,0.50806,0.503798
0 days,2000-01-01 03:00:00,0.1216,0.880068,0.257616
0 days,2000-01-01 04:00:00,0.060897,0.33811,0.75586
0 days,2000-01-01 05:00:00,0.773727,0.719406,0.736971
0 days,2000-01-01 06:00:00,0.177364,0.897868,0.44287
0 days,2000-01-01 07:00:00,0.567419,0.557371,0.490774
0 days,2000-01-01 08:00:00,0.859099,0.200336,0.8594
0 days,2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [23]:
dt_quant = dt_indexed_df.copy()
dt_quant

Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.538511,0.154564,0.124714
2000-01-01 01:00:00,0.706765,0.560033,0.324112
2000-01-01 02:00:00,0.633571,0.50806,0.503798
2000-01-01 03:00:00,0.1216,0.880068,0.257616
2000-01-01 04:00:00,0.060897,0.33811,0.75586
2000-01-01 05:00:00,0.773727,0.719406,0.736971
2000-01-01 06:00:00,0.177364,0.897868,0.44287
2000-01-01 07:00:00,0.567419,0.557371,0.490774
2000-01-01 08:00:00,0.859099,0.200336,0.8594
2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [24]:
QUANTILES = [0.1, 0.5, 0.9]
ts_q = TimeSeries(dt_quant,representation='quantiles', quantiles=QUANTILES)
ts_q.data

2025-05-28 15:06:08,937 - INFO - TimeSeries initialized from compatible-format DataFrame.


Unnamed: 0,0,1,2
2000-01-01 00:00:00,0.538511,0.154564,0.124714
2000-01-01 01:00:00,0.706765,0.560033,0.324112
2000-01-01 02:00:00,0.633571,0.50806,0.503798
2000-01-01 03:00:00,0.1216,0.880068,0.257616
2000-01-01 04:00:00,0.060897,0.33811,0.75586
2000-01-01 05:00:00,0.773727,0.719406,0.736971
2000-01-01 06:00:00,0.177364,0.897868,0.44287
2000-01-01 07:00:00,0.567419,0.557371,0.490774
2000-01-01 08:00:00,0.859099,0.200336,0.8594
2000-01-01 09:00:00,0.001572,0.486803,0.3656


In [25]:
dt_quant.columns = pd.MultiIndex.from_product([['target'],QUANTILES], names=expected_column_names)

In [26]:
import numpy as np
import pandas as pd

from forcateri import TimeSeries

rows = pd.MultiIndex.from_product(
    [[pd.Timedelta(0), pd.Timedelta(1, "h")], pd.date_range(start=pd.Timestamp("2000-01-01"), freq="h", periods=4)],
    names=["offset", "time_stamp"]
)
cols = ["feat0", "feat1"]

data = np.zeros(shape=(len(rows), len(cols)))

df = pd.DataFrame(index=rows, columns=cols, data=data)
print(df)

ts = TimeSeries(data=df)
print(ts.data)

print(ts.representation)

2025-05-28 15:06:08,990 - INFO - Check index structure
2025-05-28 15:06:08,992 - INFO - Index is MultiIndex with datetime values.
2025-05-28 15:06:08,993 - INFO - Casting the index to datetime format
2025-05-28 15:06:08,996 - INFO - TimeSeries initialized from compatible-format DataFrame.


                                     feat0  feat1
offset          time_stamp                       
0 days 00:00:00 2000-01-01 00:00:00    0.0    0.0
                2000-01-01 01:00:00    0.0    0.0
                2000-01-01 02:00:00    0.0    0.0
                2000-01-01 03:00:00    0.0    0.0
0 days 01:00:00 2000-01-01 00:00:00    0.0    0.0
                2000-01-01 01:00:00    0.0    0.0
                2000-01-01 02:00:00    0.0    0.0
                2000-01-01 03:00:00    0.0    0.0
feature                             feat0 feat1
representation                      value value
offset          time_stamp                     
0 days 00:00:00 2000-01-01 00:00:00   0.0   0.0
                2000-01-01 01:00:00   0.0   0.0
                2000-01-01 02:00:00   0.0   0.0
                2000-01-01 03:00:00   0.0   0.0
0 days 01:00:00 2000-01-01 00:00:00   0.0   0.0
                2000-01-01 01:00:00   0.0   0.0
                2000-01-01 02:00:00   0.0   0.0
                2000

In [27]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,feat0,feat1
offset,time_stamp,Unnamed: 2_level_1,Unnamed: 3_level_1
0 days 00:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 03:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 03:00:00,0.0,0.0


In [28]:
TimeSeries.is_compatible_format(df)

2025-05-28 15:06:09,027 - INFO - Check index structure
2025-05-28 15:06:09,029 - INFO - Index is MultiIndex with datetime values.


True

In [29]:
asd = TimeSeries(df,representation='determ')
asd.data

2025-05-28 15:06:09,041 - INFO - Check index structure
2025-05-28 15:06:09,043 - INFO - Index is MultiIndex with datetime values.
2025-05-28 15:06:09,043 - INFO - Casting the index to datetime format
2025-05-28 15:06:09,047 - INFO - TimeSeries initialized from compatible-format DataFrame.


Unnamed: 0_level_0,feature,feat0,feat1
Unnamed: 0_level_1,representation,value,value
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2
0 days 00:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 03:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 03:00:00,0.0,0.0


In [30]:
df.index = df.index.set_levels(df.index.levels[1].astype(int),level=1)
df.columns = pd.MultiIndex.from_product([df.columns, ["value"]], names=["feature", "representation"])

In [31]:
df

Unnamed: 0_level_0,feature,feat0,feat1
Unnamed: 0_level_1,representation,value,value
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2
0 days 00:00:00,946684800000000000,0.0,0.0
0 days 00:00:00,946688400000000000,0.0,0.0
0 days 00:00:00,946692000000000000,0.0,0.0
0 days 00:00:00,946695600000000000,0.0,0.0
0 days 01:00:00,946684800000000000,0.0,0.0
0 days 01:00:00,946688400000000000,0.0,0.0
0 days 01:00:00,946692000000000000,0.0,0.0
0 days 01:00:00,946695600000000000,0.0,0.0


In [32]:
qq = TimeSeries(df,representation='determ')
qq.data

2025-05-28 15:06:09,097 - INFO - Check index structure
2025-05-28 15:06:09,099 - INFO - Check columns MultiIndex structure. One caveat is that df.index is not DateTimeIndex, casting to datetime is done in align_format().
2025-05-28 15:06:09,100 - INFO - Casting the index to datetime format
2025-05-28 15:06:09,102 - INFO - TimeSeries initialized from compatible-format DataFrame.


Unnamed: 0_level_0,feature,feat0,feat1
Unnamed: 0_level_1,representation,value,value
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2
0 days 00:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 03:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 03:00:00,0.0,0.0


In [33]:
qq.data.index.get_level_values('time_stamp').astype('datetime64[ns]')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:00:00',
               '2000-01-01 02:00:00', '2000-01-01 03:00:00',
               '2000-01-01 00:00:00', '2000-01-01 01:00:00',
               '2000-01-01 02:00:00', '2000-01-01 03:00:00'],
              dtype='datetime64[ns]', name='time_stamp', freq=None)

In [34]:
qq.data.index = pd.MultiIndex.from_arrays([qq.data.index.get_level_values('offset'), pd.to_datetime(qq.data.index.get_level_values('time_stamp'))], names=qq.data.index.names)
qq.data

Unnamed: 0_level_0,feature,feat0,feat1
Unnamed: 0_level_1,representation,value,value
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2
0 days 00:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 00:00:00,2000-01-01 03:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 00:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 01:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 02:00:00,0.0,0.0
0 days 01:00:00,2000-01-01 03:00:00,0.0,0.0
