
# CumulantTransformer: Evaluation

## Setup

In [None]:
import plotly.express as px
from culearn.data import *
from culearn.learn import *

# Prepare the data source:
source = LCL('../data/LCL')
# You can also try:
# source = REFIT('../data/REFIT')
# source = SGSC('../data/SGSC')
# source = UMass('../data/UMass')

# Load the dataset from the data source:
ds = source.dataset()  # This might take a while the first time.

# Prepare time encoders that will be used to aggregate time series values before clustering:
transform_encoders = TimeEncoders(MonthOfYear(), DayType(source.calendar), TimeOfDay())

# Configure a transformer that will approximate and cluster time series values:
transformer = CumulantTransform(encoder=transform_encoders)

# Optionally, process the input data as streaming time series by adjusting the 'approx' parameter:
# approx=lambda _: StreamApproximator(struct=MultiSeriesCSV(_, f'{source.directory}/approx'))
# This is particularly useful for high-resolution time series data such as REFIT data.

## Transformations

### Time series of cluster-level cumulants

In [None]:
cumulants = transformer.fit_transform(ds.y, TimeResolution(minutes=30), source.interval)
for c in cumulants:
    print(c.ts_id)
    display(c)

### Time series of cluster-level prediction intervals

In [None]:
cumulants[0].replace(0, np.nan).dropna()

In [None]:
intervals = transformer.inverse_transform(cumulants, p=[0.5, 0.75, 0.99])
for i in intervals:
    print(i.ts_id)
    display(i.to_frame())

## Evaluation

In [None]:
# The transformer will be evaluated at percentile level:
p = [_ / 100 for _ in range(1, 100)]
pinball_score, winkler_score = transformer.evaluate(cumulants, p)

### Pinball Score

In [None]:
pinball_score.mean(axis=0).plot(legend=False)

### Winkler Score

In [None]:
winkler_score.mean(axis=0).plot(legend=False)

### Clustering score

In [None]:
px.bar(transformer.clustering_score.reset_index(), x='k', y='score', color='selected')

### Feature extraction score

In [None]:
px.bar(transformer.extractor_score.reset_index(), x='feature', y='score', color='selected')

### Cluster-level prediction intervals for the last week

In [None]:
last_week = TimeInterval(source.interval.end - timedelta(7), source.interval.end)
fig = transformer.figure([_.select(last_week) for _ in cumulants], p=[0.5, 0.75, 0.99])
# If you also want to show load measurements simply add 'show_actual=True'.
# However, note that it might consume a lot of memory for large clusters.
fig.show()