# Evaluate

In [None]:
%load_ext autoreload
%autoreload 2

from datetime import datetime, timedelta, timezone
import pandas as pd
import numpy as np
import cufflinks
import plotly.express as px
from IPython.display import Markdown, display
import util.performance_metrics

cufflinks.go_offline()

## Settings

In [None]:
# Select time range
start_time = datetime(2020,1,1, tzinfo=timezone.utc)
end_time = datetime(2020, 10, 26, tzinfo=timezone.utc)

# Set time horizon for forecasts (should match the forecast horizon from the backtest)
known_ahead_targets = [timedelta(hours=24)]

# Select forecast percentile
percentile = 10
quantile_column_name = "quantile_P10"

# Set case name
case_name = "sun_heavy"

# Scale congestion limit to more reasonable limit (I don't know which one is used where)
limit_scale = 0.9
limit = -2.5

# Evaluation window for metrics
eval_window = 14



## Loading data

In [None]:
forecast = pd.read_pickle("data/forecast_raycast.pkl")[quantile_column_name]
measurement = pd.read_pickle("data/forecast_raycast.pkl")["realised"]

# Slice forecast and measurement on start_time and end_time
forecast = forecast[start_time:end_time]
measurement = measurement[start_time:end_time]

In [None]:
fig = px.line(pd.concat([measurement, forecast], axis=1), title=f'Forecast and measurement in {case_name}<br><sup>Forecast percentile = {percentile}</sup>')
fig.add_hline(y=limit * limit_scale)

## Performance for entire date range

In [None]:
precision, recall, _, number_peak_days, _ = util.performance_metrics.get_performance_metrics(measurement, forecast, limit * limit_scale)
display(Markdown(f"There were <b>{number_peak_days} days</b> with measured peaks using <b>{limit_scale * 100}%</b> of the technical limit."))
display(Markdown(f"<b>{round(precision * 100, 1)}% of predicted peaks correspond to actual peaks</b>. Precision is {precision}"))
display(Markdown(f"<b>{round(recall * 100, 1)}% of actual peaks are predicted</b>. Recall is {recall}"))


## Performance evolution

In [None]:
start_days = [start_time + timedelta(days=x) for x in range((end_time - start_time).days - eval_window)]
performance_metrics_list = []
for start_day in start_days:
    measurement_window = measurement[start_day:start_day+timedelta(days=eval_window)]
    forecast_window = forecast[start_day:start_day+timedelta(days=eval_window)]
    metrics = util.performance_metrics.get_performance_metrics(measurement_window, forecast_window, limit * limit_scale)
    performance_metrics_list.append(metrics)


performance_metrics = pd.DataFrame(performance_metrics_list, columns=["precision", "recall", "F10", "number of peaks", "number of predicted peaks"])
performance_metrics.index = [start_day + timedelta(days=eval_window) for start_day in start_days]

In [None]:
precision_recall_plot = px.bar(performance_metrics[["precision", "recall"]],
                               barmode='group',
                               title=f"Precision and recall of peaks in the previous {eval_window} days in {case_name}<br><sup>Forecast percentile = {percentile}</sup><br><sup>T-ahead = {known_ahead_targets[0].total_seconds() // 3600} hours</sup><br><sup>Peak = exceeding {limit_scale * 100}% of the congestion limit</sup>")
f10_plot = px.bar(performance_metrics[["F10"]],
                  text=performance_metrics["F10"].apply(lambda x: np.round(x, 3)),
                  title=f"F10 score in the previous {eval_window} days in {case_name}<br><sup>Forecast percentile = {percentile}</sup><br><sup>T-ahead = {known_ahead_targets[0].total_seconds() // 3600} hours</sup><br><sup>Peak = exceeding {limit_scale * 100}% of the congestion limit</sup>")
number_peaks_plot = px.bar(performance_metrics[["number of peaks", "number of predicted peaks"]],
                           barmode='group',
                           title=f"Number of peaks in the previous {eval_window} days in {case_name}<br><sup>Forecast percentile = {percentile}</sup><br><sup>T-ahead = {known_ahead_targets[0].total_seconds() // 3600} hours</sup><br><sup>Peak = exceeding {limit_scale * 100}% of the congestion limit</sup>")
precision_recall_plot.show()
f10_plot.show()
number_peaks_plot.show()