In [1]:
from enum import Enum
from pathlib import Path
from typing import TypedDict

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

from helpers.evals import evaluate_component_model
from helpers.features import process_dataset
from helpers.loaders import prepare_data_for_extrapolation
from helpers.loss import huber_logcosh_loss
from helpers.models import ComponentModel
from helpers.trainers import train_component_model

### Config


In [2]:
ANALYSIS = False
VERBOSE = True

DATASET_FILE_PATH = "dataset.csv"

GRAPH_FOLDER = "graphs"
MODELS = "models"
PREDICTIONS = "predictions"
SUBFOLDER = "baseline"

### Data


In [3]:
df = pd.read_csv(DATASET_FILE_PATH)

(
    X_train,
    Y_train,
    X_test,
    Y_test,
    voltage_scaler,
    freq_scaler,
    freq_idx,
    other_idx,
) = process_dataset(df, split_mode="extrapolation")

Found 74 unique frequency values from 0.10 GHz to 65.00 GHz
Training on 49 unique frequencies from 0.10 GHz to 40.00 GHz
Training set: 103439 samples
Testing on 25 unique frequencies: [41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55. 56. 57. 58.
 59. 60. 61. 62. 63. 64. 65.] GHz
Test set: 52775 samples
Identified 19 frequency-related features and 13 other features


In [4]:
s21_real_train = Y_train["S_deemb(2,1)_real"].values
s21_imag_train = Y_train["S_deemb(2,1)_imag"].values

print(
    f"S21 real training range: {s21_real_train.min():.6f} to {s21_real_train.max():.6f}"  # type: ignore
)
print(
    f"S21 imag training range: {s21_imag_train.min():.6f} to {s21_imag_train.max():.6f}"  # type: ignore
)

S21 real training range: -39.844800 to 0.557864
S21 imag training range: -2.177070 to 19.843800


In [5]:
X_train_s21 = X_train.copy()
X_train_s21["S21_real"] = s21_real_train
X_train_s21["S21_imag"] = s21_imag_train

high_freq_threshold = np.percentile(X_train_s21["freq"], 80)
high_freq_data = X_train_s21[X_train_s21["freq"] >= high_freq_threshold]

print(
    f"Using high-frequency training data (>{high_freq_threshold / 1e9:.1f} GHz) for stabilization"
)

high_freq_real_mean = high_freq_data["S21_real"].mean()
high_freq_imag_mean = high_freq_data["S21_imag"].mean()

print(
    f"High-frequency S21 means: real={high_freq_real_mean:.6f}, imag={high_freq_imag_mean:.6f}"
)

# Calculate different bounds for real and imaginary parts
# For real part - tighter bounds due to problems with this component
real_p10 = np.percentile(s21_real_train, 10)  # type: ignore
real_p90 = np.percentile(s21_real_train, 90)  # type: ignore
real_range = real_p90 - real_p10
real_min = real_p10 - 0.2 * real_range  # Tighter bound for real
real_max = real_p90 + 0.2 * real_range

# For imaginary part - more relaxed bounds since it's behaving better
imag_p05 = np.percentile(s21_imag_train, 5)  # type: ignore
imag_p95 = np.percentile(s21_imag_train, 95)  # type: ignore
imag_range = imag_p95 - imag_p05
imag_min = imag_p05 - 0.3 * imag_range  # More relaxed bound
imag_max = imag_p95 + 0.3 * imag_range

print("Setting component-specific bounds:")
print(f"  Real: [{real_min:.6f}, {real_max:.6f}]")
print(f"  Imaginary: [{imag_min:.6f}, {imag_max:.6f}]")


Using high-frequency training data (>31.0 GHz) for stabilization
High-frequency S21 means: real=-0.232408, imag=1.771155
Setting component-specific bounds:
  Real: [-9.855051, 1.816362]
  Imaginary: [-2.254964, 9.799980]


### Training


In [6]:
model_dir = Path(MODELS) / SUBFOLDER
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
class SchedulerTypes(str, Enum):
    REDUCE_ON_PLATEAU = "reduce_on_plateau"
    STEP = "step"
    COSINE_ANNEALING = "cosine_annealing"
    ONE_CYCLE = "one_cycle"
    EXPONENTIAL = "exponential"
    NONE = "none"


class ActivationTypes(str, Enum):
    GELU = "gelu"
    RELU = "relu"
    SILU = "silu"


class Hyperparameters(TypedDict):
    hidden_sizes: list[int]
    freq_hidden_sizes: list[int] | None
    other_hidden_sizes: list[int] | None
    dropout_rate: float
    learning_rate: float
    activation: ActivationTypes
    lr_scheduler_type: SchedulerTypes
    weight_decay: float
    epochs: int
    patience: int
    batch_size: int


class ModelDict(TypedDict):
    model_name: str
    label: str
    hparams: Hyperparameters


In [8]:
s11_params: Hyperparameters = {
    "hidden_sizes": [256, 512, 1024, 512],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.001,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 200,
    "patience": 30,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}
s12_params: Hyperparameters = {
    "hidden_sizes": [384, 768, 1536, 768, 384],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 300,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}

s21_real_params: Hyperparameters = {
    "hidden_sizes": [2048, 2048, 2048, 1024, 512, 256],
    "freq_hidden_sizes": [512, 1024, 2048, 1024],
    "other_hidden_sizes": [1024, 2048, 2048, 1024],
    "learning_rate": 0.001,
    "dropout_rate": 0.3,
    "weight_decay": 1e-5 * 2,
    "batch_size": 1024,
    "epochs": 350,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.COSINE_ANNEALING,
    "activation": ActivationTypes.GELU,
}

s21_imag_params: Hyperparameters = {
    "hidden_sizes": [2048, 2048, 2048, 2048, 1024, 512],
    "freq_hidden_sizes": [512, 1024, 2048, 1024],
    "other_hidden_sizes": [1024, 2048, 2048, 1024],
    "learning_rate": 0.001,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5 * 2,
    "batch_size": 1024,
    "epochs": 350,
    "patience": 40,
    "lr_scheduler_type": SchedulerTypes.ONE_CYCLE,
    "activation": ActivationTypes.GELU,
}

s22_params: Hyperparameters = {
    "hidden_sizes": [1024, 1536, 2048, 1536, 1024],
    "freq_hidden_sizes": None,
    "other_hidden_sizes": None,
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "weight_decay": 1e-5,
    "batch_size": 1024,
    "epochs": 200,
    "patience": 30,
    "lr_scheduler_type": SchedulerTypes.REDUCE_ON_PLATEAU,
    "activation": ActivationTypes.GELU,
}

In [9]:
models_to_train: list[ModelDict] = [
    {
        "model_name": "S11_real",
        "label": "S_deemb(1,1)_real",
        "hparams": s11_params,
    },
    {
        "model_name": "S11_imag",
        "label": "S_deemb(1,1)_imag",
        "hparams": s11_params,
    },
    {
        "model_name": "S12_real",
        "label": "S_deemb(1,2)_real",
        "hparams": s12_params,
    },
    {
        "model_name": "S12_imag",
        "label": "S_deemb(1,2)_imag",
        "hparams": s12_params,
    },
    {
        "model_name": "S21_real",
        "label": "S_deemb(2,1)_real",
        "hparams": s21_real_params,
    },
    {
        "model_name": "S21_imag",
        "label": "S_deemb(2,1)_imag",
        "hparams": s21_imag_params,
    },
    {
        "model_name": "S22_real",
        "label": "S_deemb(2,2)_real",
        "hparams": s22_params,
    },
    {
        "model_name": "S22_imag",
        "label": "S_deemb(2,2)_imag",
        "hparams": s22_params,
    },
]

In [10]:
results = {}
predictions = {}

for model_to_train in models_to_train:
    label = model_to_train["label"]
    y_train_pair = Y_train[[label]]
    y_test_pair = Y_test[[label]]

    if model_to_train["model_name"] == "S21_real":
        model_to_train["hparams"]["dropout_rate"] = 0.3
        model_to_train["hparams"]["lr_scheduler_type"] = SchedulerTypes.COSINE_ANNEALING

    print(f"Training {model_to_train['model_name']} model")

    (
        X_train_tensor,
        Y_train_tensor,
        X_test_tensor,
        Y_test_tensor,
        loader,
        x_scaler,
        y_scaler,
    ) = prepare_data_for_extrapolation(
        X_train,
        y_train_pair,
        X_test,
        y_test_pair,
        batch_size=model_to_train["hparams"]["batch_size"],
    )

    model = ComponentModel(
        model_to_train["hparams"]["hidden_sizes"],
        model_to_train["hparams"]["dropout_rate"],
        freq_idx,
        other_idx,
        model_to_train["hparams"]["activation"],
        model_to_train["model_name"],
        freq_hidden_sizes=model_to_train["hparams"]["freq_hidden_sizes"],
        other_hidden_sizes=model_to_train["hparams"]["other_hidden_sizes"],
    )

    optimizer = optim.AdamW(
        model.parameters(),
        lr=model_to_train["hparams"]["learning_rate"],
        weight_decay=model_to_train["hparams"]["weight_decay"],
    )
    if model_to_train["model_name"] == "S21_real":
        criterion = nn.SmoothL1Loss(beta=0.05)
    else:
        criterion = huber_logcosh_loss

    trained_model = train_component_model(
        model,
        model_to_train["model_name"],
        loader,
        X_test_tensor,
        Y_test_tensor,
        criterion,
        optimizer,
        device,
        model_to_train["hparams"]["epochs"],
        model_to_train["hparams"]["patience"],
        model_to_train["hparams"]["lr_scheduler_type"],
    )

    metrics, model_predictions = evaluate_component_model(
        trained_model,
        X_test_tensor,
        Y_test,
        [label],
        model_to_train["model_name"],
        real_min,
        real_max,
        imag_min,
        imag_max,
        device,
        y_scaler,
    )
    results[model_to_train["model_name"]] = metrics
    predictions[model_to_train["model_name"]] = model_predictions

Training S11_real model


Training Epochs:  18%|█▊        | 36/200 [00:23<01:47,  1.53it/s, Epoch=36, Val Loss=0.023775, Best=0.015972, LR=6.25e-5] 


Early stopping triggered.
Training S11_imag model


Training Epochs:  18%|█▊        | 36/200 [00:22<01:42,  1.60it/s, Epoch=36, Val Loss=0.028711, Best=0.018593, LR=6.25e-5] 


Early stopping triggered.
Training S12_real model


Training Epochs:  15%|█▌        | 46/300 [00:30<02:48,  1.51it/s, Epoch=46, Val Loss=0.132343, Best=0.084533, LR=3.13e-5] 


Early stopping triggered.
Training S12_imag model


Training Epochs:  21%|██        | 63/300 [00:43<02:42,  1.46it/s, Epoch=63, Val Loss=0.063551, Best=0.048017, LR=1.56e-5] 


Early stopping triggered.
Training S21_real model


Training Epochs:  28%|██▊       | 99/350 [02:28<06:17,  1.50s/it, Epoch=99, Val Loss=0.028103, Best=0.007482, LR=0.001]


Early stopping triggered.
Training S21_imag model


Training Epochs:  30%|███       | 105/350 [02:17<05:20,  1.31s/it, Epoch=105, Val Loss=0.007031, Best=0.003586, LR=0.01]   


Early stopping triggered.
Training S22_real model


Training Epochs:  16%|█▋        | 33/200 [00:25<02:07,  1.31it/s, Epoch=33, Val Loss=0.028101, Best=0.021849, LR=0.000125]


Early stopping triggered.
Training S22_imag model


Training Epochs:  34%|███▍      | 69/200 [00:52<01:40,  1.31it/s, Epoch=69, Val Loss=0.049927, Best=0.031266, LR=1.56e-5] 

Early stopping triggered.





### Results


In [None]:
for model_to_train in models_to_train:
    model_name = model_to_train["model_name"]

    metrics = results[model_name]

    print("--" * 20)
    print("--" * 20)
    print(f"Performance metrics for {model_name}:")
    print(f"\tRMSE: {metrics['rmse']:.6f}")
    print(f"\tR²: {metrics['r2']:.6f}")
    print(f"\tMAE: {metrics['mae']:.6f}")
    if "smape" in metrics:
        print(f"\tSMAPE: {metrics['smape']:.2f}%")
    else:
        print(f"\tMAPE: {metrics['mape']:.2f}%")

print("--" * 20)
print("--" * 20)

ValueError: too many values to unpack (expected 2)