# CumulantLearner: Evaluation

## Setup

In [None]:
import plotly.express as px
from culearn.data import *
from culearn.learn import *

# This is a trial key for using weather data from worldweatheronline.com via WorldWeather class.
wwo_key = '436e5017a4b34bc5bbb182353223011'
# Alternatively, you can use weather data from meteostat.net via MeteoWeather class (without the key).

# Prepare the data source:
source = LCL('../data/LCL', WorldWeather, api_key=wwo_key)
# You can also try:
# source = LCL('../data/LCL')  # Uses MeteoWeather by default.
# source = REFIT('../data/REFIT', WorldWeather, api_key=wwo_key)
# source = REFIT('../data/REFIT')  # Uses MeteoWeather by default.
# source = SGSC('../data/SGSC', WorldWeather, api_key=wwo_key)
# source = SGSC('../data/SGSC')  # Uses MeteoWeather by default.
# source = UMass('../data/UMass')  # Uses UMass weather data.

# Load the dataset from the data source:
ds = source.dataset()  # This might take a while the first time.

# Prepare time encoders that will be used to aggregate time series values before clustering:
transform_encoders = TimeEncoders(MonthOfYear(), DayType(source.calendar), TimeOfDay())

# Configure a transformer that will approximate and cluster time series values:
transformer = CumulantTransform(encoder=transform_encoders)
# Optionally, process the input data as streaming time series by adjusting the 'approx' parameter:
# approx=lambda _: StreamApproximator(struct=MultiSeriesCSV(_, f'{source.directory}/approx'))
# This is particularly useful for high-resolution time series data such as REFIT data.

# Prepare time encoders that will be used to obtain input time features for regression:
regressor_encoders = TimeEncoders(MonthOfYear(), DayOfWeek(), TimeOfDay(), Holiday(source.calendar))

# Configure regression method to predict time series patterns for 48 time steps ahead:
regressor = lambda: TimeSeriesRegressor(48, t_encoder=regressor_encoders)
# You can change the underlying regression model by modifying the 'base' parameter.

# Configure learner to predict half-hour cluster-level cumulants:
learner = CumulantLearner(ds, TimeResolution(minutes=30), transformer, regressor)
# With the regressor that predicts 48 time steps ahead, the learner will provide day-ahead forecast.

## Evaluation

In [None]:
# The learner will use 80% of history for initial training and 20% for testing, with incremental updates every 15 days:
fit_interval = TimeInterval(source.interval.start,
                            source.interval.start + timedelta(int(source.interval.delta.days * 0.8)))
pred_interval = TimeInterval(fit_interval.end, source.interval.end)
update_interval = 15  # every 15 prediction intervals (every 15 days)

# The learner will be evaluated at percentile level:
p = [_ / 100 for _ in range(1, 100)]

# Evaluation (might take a while):
e = learner.evaluate(fit_interval, pred_interval, update_interval, p)

# Optionally, save results to CSV:
# e.to_csv(source.directory, type(learner).__name__)

### Pinball score

In [None]:
e.pinball_score.mean(axis=0).plot(legend=False)

### Winkler score

In [None]:
e.winkler_score.mean(axis=0).plot(legend=False)

### Clustering score

In [None]:
px.bar(e.clustering_score.reset_index(), x='k', y='score', color='selected')

### Feature extraction score

In [None]:
px.bar(e.extractor_score.reset_index(), x='feature', y='score', color='selected')

### Feature selection score

In [None]:
px.scatter_3d(e.x_selector_score.reset_index(), x='x', y='cluster', z='score', color='selected')

### Lag selection score

In [None]:
px.scatter_3d(e.y_selector_score.reset_index(), x='lag', y='cluster', z='score', color='selected')

### Regressor scores

In [None]:
# Show the regressor scores obtained for each cluster during initial training and incremental updates:

rs = e.regressor_score.reset_index()
ax_cols = list(sorted(set(rs.iloc[:, 0])))
ax_rows = list(sorted(set(rs.iloc[:, 1])))
ax_value = 3

rs_fig = make_subplots(rows=len(ax_rows), cols=len(ax_cols))

for i_col in range(len(ax_cols)):
    for i_row in range(len(ax_rows)):
        rs_values = rs[(rs.iloc[:, 0] == ax_cols[i_col]) & (rs.iloc[:, 1] == ax_rows[i_row])].iloc[:, ax_value]
        rs_fig.add_scatter(y=rs_values, row=i_row + 1, col=i_col + 1,
                           name=f'{rs.columns[0]}={i_col}, {rs.columns[1]}={i_row}')

rs_fig.update_layout(height=800)
rs_fig.show()

## Cluster-level prediction intervals for the last day

In [None]:
fig = learner.figure(source.interval.end - timedelta(1), p=[0.5, 0.75, 0.99])
# If you also want to show load measurements simply add 'show_actual=True'.
# However, note that it might consume a lot of memory for large clusters.
fig.show()