In [None]:
import time
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

from helpers.split import create_frequency_based_split

### Configuration


In [None]:
ANALYSIS = False

DATASET_FILE_PATH = "dataset.csv"

GRAPH_FOLDER = "graphs"
MODELS = "models"
PREDICTIONS = "predictions"
SUBFOLDER = "baseline"

VERBOSE = True
EPOCHS = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Loading and Preprocessing


In [None]:
# Load the dataset
df = pd.read_csv(DATASET_FILE_PATH)

In [None]:
# Selected features and targets
features = ["freq", "vb", "vc", "DEV_GEOM_L", "NUM_OF_TRANS_RF"]
targets = [
    "S_deemb(1,1)_real",
    "S_deemb(1,1)_imag",
    "S_deemb(1,2)_real",
    "S_deemb(1,2)_imag",
    "S_deemb(2,1)_real",
    "S_deemb(2,1)_imag",
    "S_deemb(2,2)_real",
    "S_deemb(2,2)_imag",
]

In [None]:
if ANALYSIS:
    print("Checking for null values in features:")
    feature_nulls = df[features].isnull().sum()
    print(feature_nulls[feature_nulls > 0])  # Only show features with nulls

    print("\nChecking for null values in labels:")
    label_nulls = df[targets].isnull().sum()
    print(label_nulls)

In [None]:
# Filter rows with any null values in features or labels
df_clean = df.dropna(subset=features + targets)

if ANALYSIS:
    print(f"\nOriginal dataset shape: {df.shape}")
    print(f"Cleaned dataset shape: {df_clean.shape}")
    print(f"Removed {df.shape[0] - df_clean.shape[0]} rows with null values")

In [None]:
# Apply improved frequency-based split
train_mask, test_mask = create_frequency_based_split(
    df_clean, test_size=0.2, random_state=42
)


# Create separate dataframes for features and labels
X = df_clean[features].copy()
Y = df_clean[targets].copy()

# Encode categorical features
X["DEV_GEOM_L"] = X["DEV_GEOM_L"].astype("category").cat.codes
X["NUM_OF_TRANS_RF"] = X["NUM_OF_TRANS_RF"].astype("category").cat.codes

# Scale freq, vb, vc
scaler = MinMaxScaler()
X[["freq", "vb", "vc"]] = scaler.fit_transform(X[["freq", "vb", "vc"]])

if ANALYSIS:
    print(f"\nFeature dataset shape: {X.shape}")
    print(f"S-parameter labels shape: {Y.shape}")

    print("\nFeature statistics (first 5 columns):")
    print(X.iloc[:, :5].describe())

    print("\nS-parameter statistics (first 4 columns):")
    print(Y.iloc[:, :4].describe())

    print("\nFeature and label separation complete!")

In [None]:
# Split dataset
Y = df_clean[targets]
X_train, X_test = X[train_mask], X[test_mask]
Y_train, Y_test = Y[train_mask], Y[test_mask]

In [None]:
# Optional Y-scaler for targets (for improved convergence)
y_scaler = StandardScaler()
Y_train_scaled = y_scaler.fit_transform(Y_train)
Y_test_scaled = y_scaler.transform(Y_test)

### Model


In [None]:
hyperparams = {
    "hidden_sizes": [384, 768, 1536],
    "dropout_rate": 0.1,
    "activation": "gelu",
    "lr": 0.002,
    "epochs": 300,
    "patience": 40,
    "batch_size": 512,
    "lr_scheduler_type": "reduce_on_plateau",
}

In [None]:
class FrequencyAwareNetwork(nn.Module):
    def __init__(
        self,
        freq_features,
        other_features,
        hidden_sizes,
        dropout_rate=0.2,
        activation="gelu",
    ):
        super().__init__()
        if activation == "silu":
            act_fn = nn.SiLU()
        elif activation == "relu":
            act_fn = nn.ReLU()
        elif activation == "gelu":
            act_fn = nn.GELU()
        else:
            raise ValueError(f"Unsupported activation function: {activation}")

        self.freq_net = nn.Sequential(nn.Linear(freq_features, hidden_sizes[0]), act_fn)

        self.other_net = nn.Sequential(
            nn.Linear(other_features, hidden_sizes[0]), act_fn
        )

        self.shared = nn.Sequential(
            nn.Linear(2 * hidden_sizes[0], hidden_sizes[1]),
            act_fn,
            nn.Linear(hidden_sizes[1], hidden_sizes[2]),
            act_fn,
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_sizes[2], 8),
        )

        self.freq_indices = None
        self.other_indices = None

    def set_feature_indices(self, freq_indices, other_indices):
        self.freq_indices = freq_indices
        self.other_indices = other_indices

    def forward(self, x):
        x_freq = x[:, self.freq_indices]
        x_other = x[:, self.other_indices]
        f_out = self.freq_net(x_freq)
        o_out = self.other_net(x_other)
        x_cat = torch.cat((f_out, o_out), dim=1)
        return self.shared(x_cat)

In [None]:
# Split feature indices
freq_indices = [X.columns.get_loc("freq")]
other_indices = [i for i in range(X.shape[1]) if i not in freq_indices]

# Convert to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train_scaled, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test_scaled, dtype=torch.float32)

train_loader = DataLoader(
    TensorDataset(X_train_tensor, Y_train_tensor), batch_size=512, shuffle=True
)

### Training


In [None]:
# Initialize model
model = FrequencyAwareNetwork(
    freq_features=len(freq_indices),
    other_features=len(other_indices),
    hidden_sizes=hyperparams["hidden_sizes"],
    dropout_rate=hyperparams["dropout_rate"],
    activation=hyperparams["activation"],
).to(device)
model.set_feature_indices(freq_indices, other_indices)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=hyperparams["lr"])

if hyperparams["lr_scheduler_type"] == "reduce_on_plateau":
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.85, patience=5, min_lr=5e-7
    )
elif hyperparams["lr_scheduler_type"] == "cosine_annealing":
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=EPOCHS, eta_min=1e-6
    )
elif hyperparams["lr_scheduler_type"] == "one_cycle":
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=optimizer.param_groups[0]["lr"],
        steps_per_epoch=len(train_loader),
        epochs=EPOCHS,
    )
else:
    scheduler = None

In [None]:
# Training loop
best_loss = float("inf")
best_model_state = None
counter = 0
start_time = time.time()

for epoch in range(hyperparams["epochs"]):
    model.train()
    total_loss = 0
    loop = tqdm(
        train_loader, desc=f"Epoch [{epoch + 1}/{hyperparams['epochs']}]", leave=False
    )
    for xb, yb in loop:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}: Avg Loss = {avg_loss:.6f}", end="")

    if scheduler:
        scheduler.step(avg_loss)

    if avg_loss < best_loss:
        best_loss = avg_loss
        best_model_state = model.state_dict()
        counter = 0
    else:
        counter += 1
        if counter >= hyperparams["patience"]:
            print(f"Early stopping at epoch {epoch + 1}")
            break

print(
    f"Training complete in {time.time() - start_time:.2f} seconds. Best loss: {best_loss:.6f}"
)

                                                                              

Epoch 1: Avg Loss = 0.051038

                                                                              

Epoch 2: Avg Loss = 0.051200

                                                                              

Epoch 3: Avg Loss = 0.050686

                                                                              

Epoch 4: Avg Loss = 0.051306

                                                                              

Epoch 5: Avg Loss = 0.049064

                                                                              

Epoch 6: Avg Loss = 0.051155

                                                                              

Epoch 7: Avg Loss = 0.049329

                                                                              

Epoch 8: Avg Loss = 0.049317

                                                                              

Epoch 9: Avg Loss = 0.048813

Epoch [10/300]:  31%|███       | 77/252 [00:00<00:00, 251.72it/s, loss=0.0473]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7faca6389a90>>
Traceback (most recent call last):
  File "/home/w01f/ml4rf/env/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 
                                                                               

Epoch 10: Avg Loss = 0.049101

                                                                               

Epoch 11: Avg Loss = 0.049042

                                                                               

Epoch 12: Avg Loss = 0.049999

                                                                               

Epoch 13: Avg Loss = 0.050266

                                                                               

Epoch 14: Avg Loss = 0.047869

                                                                               

Epoch 15: Avg Loss = 0.048905

                                                                               

Epoch 16: Avg Loss = 0.047888

                                                                               

Epoch 17: Avg Loss = 0.049683

                                                                               

Epoch 18: Avg Loss = 0.048878

                                                                               

Epoch 19: Avg Loss = 0.048019

                                                                               

Epoch 20: Avg Loss = 0.048153

Epoch [21/300]:  90%|█████████ | 227/252 [00:00<00:00, 278.51it/s, loss=0.0508]

In [None]:
# Directory to save models
model_dir = Path(MODELS) / SUBFOLDER
model_dir.mkdir(parents=True, exist_ok=True)
model_path = model_dir / "baseline_model.pt"

torch.save(model.state_dict(), model_path)

### Evaluation


In [None]:
# Evaluation
with torch.no_grad():
    preds_scaled = model(X_test_tensor.to(device)).cpu().numpy()
    preds = y_scaler.inverse_transform(preds_scaled)

r2 = r2_score(Y_test, preds, multioutput="raw_values")
rmse = np.sqrt(mean_squared_error(Y_test, preds, multioutput="raw_values"))
mae = mean_absolute_error(Y_test, preds, multioutput="raw_values")

for i, name in enumerate(targets):
    print(f"{name}: R²={r2[i]:.4f}, RMSE={rmse[i]:.4f}, MAE={mae[i]:.4f}")