In [None]:
import optuna, random
import numpy as np
import pandas as pd

from utils.data_preprocessing import preprocess_data
from utils.data_processing import get_signs_from_returns
from utils.evaluation import evaluate_return_predictions, evaluate_sign_predictions

### Define Functions

In [None]:
def validate(mts, n_validations, sp) -> tuple[float, float]:
    test_days = len(mts.y_test)
    mae_lst, rmse_lst, f1_lst = [], [], []
    for i, ts_name in enumerate(mts.names):
        print(f"Validating arima_{ts_name}")
        for _ in range(n_validations):
            trial_i = random.randint(0, len(mts.x_train) - 1 - test_days)
            model = AutoARIMA(sp=sp, stationary=True, suppress_warnings=True)
            y_true = mts.x_train[trial_i + 1 : trial_i + 1 + test_days, -1, i]
            y_pred = np.squeeze(
                model.fit_predict(
                    mts.x_train[trial_i, :, i], fh=range(1, test_days + 1)
                )
            )
            assert y_true.shape == y_pred.shape == (test_days,)
            gt = mts.get_returns_from_features(y_true)
            pr = mts.get_returns_from_features(y_pred)
            metrics = evaluate_return_predictions(gt, pr)
            metrics_sign = evaluate_sign_predictions(
                get_signs_from_returns(gt), get_signs_from_returns(pr)
            )
            mae_lst.append(metrics["MAE"])
            rmse_lst.append(metrics["RMSE"])
            f1_lst.append(metrics_sign["F1"])
    return float(np.mean(mae_lst)), float(np.mean(rmse_lst)), float(np.mean(f1_lst))

In [None]:
def objective(trial):
    lbws = trial.suggest_int("look_back_window_size", 20, 1040, 5)
    sp = trial.suggest_int("seasonal_period", 1, 5, 1)
    mts = preprocess_data("exp.csv", look_back_window_size=lbws)
    mae, _, f1 = validate(mts, 100, sp)
    return mae, f1

### Run Study

In [None]:
study = optuna.create_study(
    directions=["minimize", "maximize"],
    sampler=optuna.samplers.GridSampler(
        {
            "look_back_window_size": [20, 65, 130, 260, 520, 1040],
            "seasonal_period": [1, 5],
        }
    ),
)
study.optimize(objective)

### Process Results

In [None]:
df = study.trials_dataframe()
df["Model"] = "arima"
df = df[["Model", "params_lock_back_window_size", "params_seasonal_period", "values_0", "values_1"]]
df.columns = ["Model", "LookBackWindowSize", "SeasonalPeriod", "MAE", "F1-Score"]
df.sort_values("F1-Score", inplace=True)
pd.to_csv("../results/tuning/autoarima.csv", sep=";", index=False)
