In [1]:
import sys
sys.path.append("..")

from typing import Any, Callable, List, OrderedDict as OrderedDictType, Sequence, Union
from collections import OrderedDict

import numpy as np
import pandas as pd

from forcateri import TimeSeries

In [2]:
OFFSET, TIME_STEP = TimeSeries.ROW_INDEX_NAMES
FEATURE, REPRESENTATION = TimeSeries.COL_INDEX_NAMES

### Example Data

In [3]:
n_time_steps = 4
n_offsets = 2
rows = pd.MultiIndex.from_product(
    [[pd.Timedelta(i, "h") for i in range(n_offsets)], pd.date_range(start=pd.Timestamp("2000-01-01"), freq="h", periods=n_time_steps)],
    names=TimeSeries.ROW_INDEX_NAMES
)

det_cols = pd.MultiIndex.from_product(
    [["feat0", "feat1"], ["value"]], 
    names=TimeSeries.COL_INDEX_NAMES
)

prob_cols = pd.MultiIndex.from_product(
    [["feat0", "feat1"], [.1, .9]], 
    names=TimeSeries.COL_INDEX_NAMES
)


f0 = (np.arange(n_time_steps + n_offsets - 1) + 1) * 10
f1 = f0 * 10

det_data = np.array([f0, f1])
det_data = np.lib.stride_tricks.sliding_window_view(det_data, window_shape=(2, n_time_steps))
det_data = np.concatenate(np.concatenate(det_data, axis=0), axis=1).transpose()
pred_data = np.array([
    det_data[:, 0] * 0.8,
    det_data[:, 0] * 1.2,
    det_data[:, 1] * 0.8,
    det_data[:, 1] * 1.2,
]).transpose()

ts_gt = TimeSeries(
    data=pd.DataFrame(index=rows, columns=det_cols, data=det_data)    
)

ts_pred = TimeSeries(
    data=pd.DataFrame(index=rows, columns=prob_cols, data=pred_data),
    representation=TimeSeries.QUANTILE_REP, 
    quantiles=[.1, .9]
)

display(ts_gt.data)
display(ts_pred.data)

Unnamed: 0_level_0,feature,feat0,feat1
Unnamed: 0_level_1,representation,value,value
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2
0 days 00:00:00,2000-01-01 00:00:00,10,100
0 days 00:00:00,2000-01-01 01:00:00,20,200
0 days 00:00:00,2000-01-01 02:00:00,30,300
0 days 00:00:00,2000-01-01 03:00:00,40,400
0 days 01:00:00,2000-01-01 00:00:00,20,200
0 days 01:00:00,2000-01-01 01:00:00,30,300
0 days 01:00:00,2000-01-01 02:00:00,40,400
0 days 01:00:00,2000-01-01 03:00:00,50,500


Unnamed: 0_level_0,feature,feat0,feat0,feat1,feat1
Unnamed: 0_level_1,representation,0.1,0.9,0.1,0.9
offset,time_stamp,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0 days 00:00:00,2000-01-01 00:00:00,8.0,12.0,80.0,120.0
0 days 00:00:00,2000-01-01 01:00:00,16.0,24.0,160.0,240.0
0 days 00:00:00,2000-01-01 02:00:00,24.0,36.0,240.0,360.0
0 days 00:00:00,2000-01-01 03:00:00,32.0,48.0,320.0,480.0
0 days 01:00:00,2000-01-01 00:00:00,16.0,24.0,160.0,240.0
0 days 01:00:00,2000-01-01 01:00:00,24.0,36.0,240.0,360.0
0 days 01:00:00,2000-01-01 02:00:00,32.0,48.0,320.0,480.0
0 days 01:00:00,2000-01-01 03:00:00,40.0,60.0,400.0,600.0


### Flatten the data, to be able to treat rows and columns equally

In [4]:
flat_pred = ts_pred.data.copy().stack(level=0, future_stack=True)
flat_gt = ts_gt.data.copy().stack(level=0, future_stack=True)
display(flat_pred)
display(flat_gt)
print(type(flat_gt))

Unnamed: 0_level_0,Unnamed: 1_level_0,representation,0.1,0.9
offset,time_stamp,feature,Unnamed: 3_level_1,Unnamed: 4_level_1
0 days 00:00:00,2000-01-01 00:00:00,feat0,8.0,12.0
0 days 00:00:00,2000-01-01 00:00:00,feat1,80.0,120.0
0 days 00:00:00,2000-01-01 01:00:00,feat0,16.0,24.0
0 days 00:00:00,2000-01-01 01:00:00,feat1,160.0,240.0
0 days 00:00:00,2000-01-01 02:00:00,feat0,24.0,36.0
0 days 00:00:00,2000-01-01 02:00:00,feat1,240.0,360.0
0 days 00:00:00,2000-01-01 03:00:00,feat0,32.0,48.0
0 days 00:00:00,2000-01-01 03:00:00,feat1,320.0,480.0
0 days 01:00:00,2000-01-01 00:00:00,feat0,16.0,24.0
0 days 01:00:00,2000-01-01 00:00:00,feat1,160.0,240.0


Unnamed: 0_level_0,Unnamed: 1_level_0,representation,value
offset,time_stamp,feature,Unnamed: 3_level_1
0 days 00:00:00,2000-01-01 00:00:00,feat0,10
0 days 00:00:00,2000-01-01 00:00:00,feat1,100
0 days 00:00:00,2000-01-01 01:00:00,feat0,20
0 days 00:00:00,2000-01-01 01:00:00,feat1,200
0 days 00:00:00,2000-01-01 02:00:00,feat0,30
0 days 00:00:00,2000-01-01 02:00:00,feat1,300
0 days 00:00:00,2000-01-01 03:00:00,feat0,40
0 days 00:00:00,2000-01-01 03:00:00,feat1,400
0 days 01:00:00,2000-01-01 00:00:00,feat0,20
0 days 01:00:00,2000-01-01 00:00:00,feat1,200


<class 'pandas.core.frame.DataFrame'>


### Helper method to get level values from rows or columns in a uniform way
May be unnecessary after falttening

In [5]:
def get_level_values(df, axis):
    if axis in df.index.names:
        return df.index.get_level_values(axis)
    elif axis in df.columns.names:
        return df.columns.get_level_values(axis)
    else:
        raise ValueError("Axis not found neither in row nor in column index.")

### Actual metric computation
Note, that the arguments for the presented method are not arguments for `Metric.__call__()` but for `Metric.__init__()`.
`Metric.__call__()` expects the prediction and the ground truth.

In [6]:
def compute_metric(
    axes: List[str],
    reduction: Callable[[np.ndarray, np.ndarray], Union[np.ndarray, float]],
): 
 
    print(f"Reducing axes {axes}")
    group_by = sorted(list(
        {OFFSET, TIME_STEP, FEATURE, REPRESENTATION} 
        - set([*axes, REPRESENTATION]) # representation gets special treatment because of possible dimension mismatch
    ))

    if len(group_by) == 0:
        print("No axes left for grouping. Reducing entire data frames.")
        reduced = reduction(flat_gt.values, flat_pred.values)
        return pd.DataFrame(data=reduced.reshape(1,2), columns=get_level_values(flat_pred, REPRESENTATION))
        
    else:
        print(f"=> grouping_by {group_by}")
        
        reduced_index = pd.MultiIndex.from_product(
            [get_level_values(ts_gt.data, axis).unique() for axis in group_by]
        )
        reduced_df = pd.DataFrame(
            index=reduced_index, 
            columns=get_level_values(flat_pred, REPRESENTATION) # quantile loss would have only one column
        )
        
        for (gt_label, gt), (pred_label, pred), in zip(flat_gt.groupby(group_by), flat_pred.groupby(group_by)):
            assert gt_label == pred_label # due to the identical structure before grouping and the same group_by
            reduced = reduction(gt.values, pred.values)
            reduced_df.loc[pred_label] = reduced
    
        return reduced_df

### Examples of how the computation works
Note, that a reduction that returns only one value, rather than on per representation needs to be supported as well. 
One prominent example for it would be the quantile loss.
The above implementation will need to be adjusted to account for that.

In [7]:
def column_wise_mae(gt: np.ndarray, pred: np.ndarray) -> np.ndarray:
    err = gt - pred
    sq_err = np.abs(err)
    mse = sq_err.mean(axis=0)
    return mse

display(compute_metric(axes=[OFFSET], reduction=column_wise_mae))
print("\n")

display(compute_metric(axes=[TIME_STEP], reduction=column_wise_mae))
print("\n")

display(compute_metric(axes=[OFFSET, TIME_STEP], reduction=column_wise_mae))
print("\n")

display(compute_metric(axes=[TIME_STEP, FEATURE], reduction=column_wise_mae))
print("\n")

display(compute_metric(axes=[OFFSET, TIME_STEP, FEATURE], reduction=column_wise_mae))
print("\n")

Reducing axes ['offset']
=> grouping_by ['feature', 'time_stamp']


Unnamed: 0_level_0,representation,0.1,0.9
feature,time_stamp,Unnamed: 2_level_1,Unnamed: 3_level_1
feat0,2000-01-01 00:00:00,3.0,3.0
feat0,2000-01-01 01:00:00,5.0,5.0
feat0,2000-01-01 02:00:00,7.0,7.0
feat0,2000-01-01 03:00:00,9.0,9.0
feat1,2000-01-01 00:00:00,30.0,30.0
feat1,2000-01-01 01:00:00,50.0,50.0
feat1,2000-01-01 02:00:00,70.0,70.0
feat1,2000-01-01 03:00:00,90.0,90.0




Reducing axes ['time_stamp']
=> grouping_by ['feature', 'offset']


Unnamed: 0_level_0,representation,0.1,0.9
feature,offset,Unnamed: 2_level_1,Unnamed: 3_level_1
feat0,0 days 00:00:00,5.0,5.0
feat0,0 days 01:00:00,7.0,7.0
feat1,0 days 00:00:00,50.0,50.0
feat1,0 days 01:00:00,70.0,70.0




Reducing axes ['offset', 'time_stamp']
=> grouping_by ['feature']


representation,0.1,0.9
feature,Unnamed: 1_level_1,Unnamed: 2_level_1
feat0,6.0,6.0
feat1,60.0,60.0




Reducing axes ['time_stamp', 'feature']
=> grouping_by ['offset']


representation,0.1,0.9
offset,Unnamed: 1_level_1,Unnamed: 2_level_1
0 days 00:00:00,27.5,27.5
0 days 01:00:00,38.5,38.5




Reducing axes ['offset', 'time_stamp', 'feature']
No axes left for grouping. Reducing entire data frames.


representation,0.1,0.9
0,33.0,33.0






In [8]:
from forcateri.reporting.dimwiseaggregatedmetric import DimwiseAggregatedMetric
from forcateri.reporting.metric import Metric

In [9]:
dimwise = DimwiseAggregatedMetric(reductions = {'mae':Metric.column_wise_mae})

In [11]:
dimwise.compute_metric(axes=[DimwiseAggregatedMetric.TIME_STEP],reduction=Metric.column_wise_mae, ts_gt=ts_gt,ts_pred=ts_pred)

Reducing axes ['time_stamp']
=> grouping_by ['feature', 'offset']


Unnamed: 0_level_0,representation,0.1,0.9
feature,offset,Unnamed: 2_level_1,Unnamed: 3_level_1
feat0,0 days 00:00:00,5.0,5.0
feat0,0 days 01:00:00,7.0,7.0
feat1,0 days 00:00:00,50.0,50.0
feat1,0 days 01:00:00,70.0,70.0
