### Base evaluation template

In [None]:
import pfceval
import polars as pl

INDEX_COLS = ["valid_time", "station_id", "step"]
EVAL_THS = [5, 10]
CONSTRUCT_EVAL_REPORTS = True
SAVE_DIR = "."

def get_base_evaluation_report(forecast, experiment_name):

    calc = pfceval.Calculator(forecast, INDEX_COLS)
    calc.add_absolute_error()
    calc.add_squared_error()
    # Add probabilistic metrics
    calc.add_spread()
    calc.add_crps()
    # Add threshold based metrics
    for th in EVAL_THS:
        calc.add_twcrps(th)
        calc.add_brier(th)

    report = pfceval.Evaluation.fill_evaluation(
        experiment_name=experiment_name,
        calculator=calc, 
        lead_time_col="step",
        location_id_col="station_id",
        bootstrap=True, 
        n_iter=1000, 
        CI=0.9,
        location_metrics=True,
    )

    return report

if CONSTRUCT_EVAL_REPORTS:
    forecast_paths = pfceval.utils.get_example_forecast_paths()
    for i, forecast_path in enumerate(forecast_paths):

        forecast = pfceval.Forecast(
            fc_path=forecast_path,
            ensemble_prefix="pred_q",
            obs_col="wind_speed",
            bootstrap_cols="run_id",
        )

        exp_name = f"BaseEvaluationTemplate_{i}"
        report = get_base_evaluation_report(
            forecast=forecast, 
            experiment_name=exp_name
        )
        # Get metrics for the stations that is present in training data
        seen_report = get_base_evaluation_report(
            forecast=forecast.filter(pl.col("unseen_sta") == False), 
            experiment_name=exp_name
        )
        # Get metrics for the stations that is not present the training data
        unseen_report = get_base_evaluation_report(
            forecast=forecast.filter(pl.col("unseen_sta") == True), 
            experiment_name=exp_name
        )
        # Get a common report for all filters
        report.extend(seen_report, "seen")
        report.extend(unseen_report, "unseen")
        # Save the report to disk
        report.save_results(f"{SAVE_DIR}/{exp_name}")

In [None]:
exp_names = ["BaseEvaluationTemplate_1", "BaseEvaluationTemplate_2"]
reports = [pfceval.Evaluation.load_report(f"./{path}" for path in exp_names)]

overall = pfceval.plotting.stack_overall_metrics(*reports)
overall.sort("mae")

In [None]:
unseen_overall = pfceval.plotting.stack_overall_metrics(reports, table_name="unseen_overall_metrics")
unseen_overall.sort("mae")

In [None]:
seen_overall = pfceval.plotting.stack_overall_metrics(reports, table_name="seen_overall_metrics")
seen_overall.sort("mae")

In [None]:
pfceval.plotting.plot_lead_time_metrics(*reports, table_name="unseen_bootstraped_lead_time_metrics")