# Combine multiple horizons/time slice predictions into one file for market evaluation

## Setup

In [None]:
import pickle
from pathlib import Path

import pandas as pd

from rich.progress import track

from forecasting_cfs.eval_model import ForecastingResult
from proprietary_data import KEY_FEATURE_NAMES, get_adjusted_inverse_transform


In [None]:
OUT_PATH = Path("forecast_baselines") / "all_features_False-static_covariates_False"


## Load data

In [None]:
predictions: list[dict[str, list[ForecastingResult]]] = []

for split_directory in track(
    list(sorted(OUT_PATH.iterdir())), description="Loading predictions"
):
    # Try to parse name as int
    try:
        int(split_directory.name)
    except ValueError:
        continue

    with open(split_directory / "result_data" / "example_predictions.pkl", "rb") as f:
        predictions.append(pickle.load(f))

len(predictions)


In [None]:
inverse_transform = get_adjusted_inverse_transform(features=KEY_FEATURE_NAMES)


In [None]:
all_models = list(predictions[0].keys())
all_models


In [None]:
model = "ARMA_1Q"


In [None]:
all_for_model = [entry for time_span in predictions for entry in time_span[model]]
assert len(all_for_model) == len(predictions) * 2485
model, len(all_for_model)


## Process forecasts

In [None]:
forecasts = pd.concat(
    {
        (
            int(entry.meta_data["companyid"]),
            entry.ts_past.time_index[-1],
        ): entry.ts_forecast.pd_dataframe()
        for entry in all_for_model
    },
    names=["companyid", "last_known_date"],
)
forecasts


In [None]:
forecasts_original = pd.DataFrame(inverse_transform(forecasts.to_numpy()), columns=forecasts.columns, index=forecasts.index)
forecasts_original

In [None]:
forecasts_original.query("companyid == 18527")

## Save forecasts

In [None]:
path_aggregated = OUT_PATH / "aggregated"
path_aggregated.mkdir(exist_ok=True)
forecasts_original.to_pickle(path_aggregated / f"{model}_forecasts_original.pkl")

!ls -lsh {path_aggregated}
