In [None]:
from pathlib import Path

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

from helpers.evals import evaluate_component_model
from helpers.features import get_min_max_sparam, process_dataset
from helpers.loaders import prepare_data_for_extrapolation
from helpers.loss import huber_logcosh_loss
from helpers.models import ComponentModel
from helpers.trainers import train_component_model
from helpers.types import ActivationTypes, Hyperparameters, ModelDict, SchedulerTypes

### Config


In [None]:
ANALYSIS = False
VERBOSE = True

DATASET_FILE_PATH = "dataset.csv"

GRAPH_FOLDER = "graphs"
MODELS = "models"
PREDICTIONS = "predictions"
SUBFOLDER = "baseline"

### Data


In [None]:
df = pd.read_csv(DATASET_FILE_PATH)

(
    X_train,
    Y_train,
    X_test,
    Y_test,
    voltage_scaler,
    freq_scaler,
    freq_idx,
    other_idx,
) = process_dataset(df, split_mode="extrapolation", mute=True)

In [None]:
real_max, real_min, imag_min, imag_max = get_min_max_sparam(X_train, Y_train)


### Training


In [None]:
model_dir = Path(MODELS) / SUBFOLDER
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
s11_params: Hyperparameters = {
    "hidden_sizes": [256, 512, 1024, 512],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.001,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 200,
    "patience": 30,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}
s12_params: Hyperparameters = {
    "hidden_sizes": [384, 768, 1536, 768, 384],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 300,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}

s21_real_params: Hyperparameters = {
    "hidden_sizes": [2048, 2048, 2048, 1024, 512, 256],
    "freq_hidden_sizes": [512, 1024, 2048, 1024],
    "other_hidden_sizes": [1024, 2048, 2048, 1024],
    "learning_rate": 0.001,
    "dropout_rate": 0.3,
    "weight_decay": 1e-5 * 2,
    "batch_size": 1024,
    "epochs": 350,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.COSINE_ANNEALING,
    "activation": ActivationTypes.GELU,
}

s21_imag_params: Hyperparameters = {
    "hidden_sizes": [2048, 2048, 2048, 2048, 1024, 512],
    "freq_hidden_sizes": [512, 1024, 2048, 1024],
    "other_hidden_sizes": [1024, 2048, 2048, 1024],
    "learning_rate": 0.001,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5 * 2,
    "batch_size": 1024,
    "epochs": 350,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.ONE_CYCLE,
    "activation": ActivationTypes.GELU,
}

s22_params: Hyperparameters = {
    "hidden_sizes": [1024, 1536, 2048, 1536, 1024],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 200,
    "patience": 30,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}

In [None]:
models_to_train: list[ModelDict] = [
    {
        "model_name": "S11_real",
        "label": "S_deemb(1,1)_real",
        "hparams": s11_params,
    },
    {
        "model_name": "S11_imag",
        "label": "S_deemb(1,1)_imag",
        "hparams": s11_params,
    },
    {
        "model_name": "S12_real",
        "label": "S_deemb(1,2)_real",
        "hparams": s12_params,
    },
    {
        "model_name": "S12_imag",
        "label": "S_deemb(1,2)_imag",
        "hparams": s12_params,
    },
    {
        "model_name": "S21_real",
        "label": "S_deemb(2,1)_real",
        "hparams": s21_real_params,
    },
    {
        "model_name": "S21_imag",
        "label": "S_deemb(2,1)_imag",
        "hparams": s21_imag_params,
    },
    {
        "model_name": "S22_real",
        "label": "S_deemb(2,2)_real",
        "hparams": s22_params,
    },
    {
        "model_name": "S22_imag",
        "label": "S_deemb(2,2)_imag",
        "hparams": s22_params,
    },
]

In [None]:
results = {}
predictions = {}

for model_to_train in models_to_train:
    label = model_to_train["label"]
    y_train_pair = Y_train[[label]]
    y_test_pair = Y_test[[label]]

    if model_to_train["model_name"] == "S21_real":
        model_to_train["hparams"]["dropout_rate"] = 0.3
        model_to_train["hparams"]["lr_scheduler_type"] = SchedulerTypes.COSINE_ANNEALING

    print(f"Training {model_to_train['model_name']} model")

    (
        X_train_tensor,
        Y_train_tensor,
        X_test_tensor,
        Y_test_tensor,
        loader,
        x_scaler,
        y_scaler,
    ) = prepare_data_for_extrapolation(
        X_train,
        y_train_pair,
        X_test,
        y_test_pair,
        batch_size=model_to_train["hparams"]["batch_size"],
    )

    model = ComponentModel(
        model_to_train["hparams"]["hidden_sizes"],
        model_to_train["hparams"]["dropout_rate"],
        freq_idx,
        other_idx,
        model_to_train["hparams"]["activation"],
        model_to_train["model_name"],
        freq_hidden_sizes=model_to_train["hparams"]["freq_hidden_sizes"],
        other_hidden_sizes=model_to_train["hparams"]["other_hidden_sizes"],
    )

    optimizer = optim.AdamW(
        model.parameters(),
        lr=model_to_train["hparams"]["learning_rate"],
        weight_decay=model_to_train["hparams"]["weight_decay"],
    )
    if model_to_train["model_name"] == "S21_real":
        criterion = nn.SmoothL1Loss(beta=0.05)
    else:
        criterion = huber_logcosh_loss

    trained_model = train_component_model(
        model,
        model_to_train["model_name"],
        loader,
        X_test_tensor,
        Y_test_tensor,
        criterion,
        optimizer,
        device,
        model_to_train["hparams"]["epochs"],
        model_to_train["hparams"]["patience"],
        model_to_train["hparams"]["lr_scheduler_type"],
    )

    metrics, model_predictions = evaluate_component_model(
        trained_model,
        X_test_tensor,
        Y_test,
        [label],
        model_to_train["model_name"],
        real_min,
        real_max,
        imag_min,
        imag_max,
        device,
        y_scaler,
    )
    results[model_to_train["model_name"]] = metrics
    predictions[model_to_train["model_name"]] = model_predictions

### Results


In [None]:
for model_to_train in models_to_train:
    model_name = model_to_train["model_name"]

    metrics = results[model_name]

    print("--" * 20)
    print("--" * 20)
    print(f"Performance metrics for {model_name}:")
    print(f"\tRMSE: {metrics['rmse']:.6f}")
    print(f"\tR²: {metrics['r2']:.6f}")
    print(f"\tMAE: {metrics['mae']:.6f}")
    if "smape" in metrics:
        print(f"\tSMAPE: {metrics['smape']:.2f}%")
    else:
        print(f"\tMAPE: {metrics['mape']:.2f}%")

print("--" * 20)
print("--" * 20)