In [1]:
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
# Step 1: Load the dataset
file_path = "dataset.csv"
df = pd.read_csv(file_path)

In [4]:
# Step 2: Define features and labels
# Features - based on your provided images (excluding ib and ic which are now labels)
feature_columns = [
    "freq",  # Frequency
    "vb",
    "vc",  # Voltage parameters
    "DEV_GEOM_L",
    "NUM_OF_TRANS_RF",  # Device geometry
]

# Labels - de-embedded S-parameters

s_parameter_labels = [
    "S_deemb(1,1)_real",
    "S_deemb(1,1)_imag",
    "S_deemb(1,2)_real",
    "S_deemb(1,2)_imag",
    "S_deemb(2,1)_real",
    "S_deemb(2,1)_imag",
    "S_deemb(2,2)_real",
    "S_deemb(2,2)_imag",
]

In [5]:
# Step 3: Check for null values in both features and labels
print("Checking for null values in features:")
feature_nulls = df[feature_columns].isnull().sum()
print(feature_nulls[feature_nulls > 0])  # Only show features with nulls

print("\nChecking for null values in labels:")
label_nulls = df[s_parameter_labels].isnull().sum()
print(label_nulls)

Checking for null values in features:
Series([], dtype: int64)

Checking for null values in labels:
S_deemb(1,1)_real    39886
S_deemb(1,1)_imag    39886
S_deemb(1,2)_real    39886
S_deemb(1,2)_imag    39886
S_deemb(2,1)_real    39886
S_deemb(2,1)_imag    39886
S_deemb(2,2)_real    39886
S_deemb(2,2)_imag    39886
dtype: int64


In [6]:
# Step 4: Filter rows with any null values in features or labels
df_clean = df.dropna(subset=feature_columns + s_parameter_labels)

print(f"\nOriginal dataset shape: {df.shape}")
print(f"Cleaned dataset shape: {df_clean.shape}")
print(f"Removed {df.shape[0] - df_clean.shape[0]} rows with null values")


Original dataset shape: (196100, 49)
Cleaned dataset shape: (156214, 49)
Removed 39886 rows with null values


In [7]:
# Step 4: Filter rows with any null values in features or labels
df_clean = df.dropna(subset=feature_columns + s_parameter_labels)

print(f"\nOriginal dataset shape: {df.shape}")
print(f"Cleaned dataset shape: {df_clean.shape}")
print(f"Removed {df.shape[0] - df_clean.shape[0]} rows with null values")


Original dataset shape: (196100, 49)
Cleaned dataset shape: (156214, 49)
Removed 39886 rows with null values


In [8]:
# Step 5: Create separate dataframes for features and labels
X = df_clean[feature_columns].copy()
Y = df_clean[s_parameter_labels].copy()

# Print shapes to confirm
print(f"\nFeature dataset shape: {X.shape}")
print(f"S-parameter labels shape: {Y.shape}")

# Step 6: Basic statistics for all datasets
print("\nFeature statistics (first 5 columns):")
print(X.iloc[:, :5].describe())


print("\nS-parameter statistics (first 4 columns):")
print(Y.iloc[:, :4].describe())

# Optional: Save cleaned datasets to files
# X.to_csv("hbt_features.csv", index=False)
# Y.to_csv("hbt_sparam_labels.csv", index=False)

print("\nFeature and label separation complete!")


Feature dataset shape: (156214, 5)
S-parameter labels shape: (156214, 8)

Feature statistics (first 5 columns):
               freq             vb             vc     DEV_GEOM_L  \
count  1.562140e+05  156214.000000  156214.000000  156214.000000   
mean   2.904730e+10       0.462686       0.859285       2.833728   
std    2.054359e+10       0.640656       0.646469       1.751900   
min    1.000000e+08      -1.800000      -0.600000       0.900000   
25%    1.000000e+10       0.000000       0.280000       0.900000   
50%    2.850000e+10       0.770000       1.000000       2.500000   
75%    4.700000e+10       0.860000       1.310000       5.000000   
max    6.500000e+10       1.040000       2.370000       5.000000   

       NUM_OF_TRANS_RF  
count    156214.000000  
mean          1.476551  
std           0.775818  
min           1.000000  
25%           1.000000  
50%           1.000000  
75%           2.000000  
max           4.000000  

S-parameter statistics (first 4 columns):
      

In [9]:
def plot_feature_vs_label_correlations(X, y, target_names, filename):
    """Create a heatmap of correlations between features and labels"""
    # Calculate correlations
    combined = pd.concat([X, y], axis=1)
    correlation = combined.corr()

    # Extract only the correlations between features and labels
    feature_target_corr = correlation.loc[X.columns, target_names]

    # Plot heatmap
    plt.figure(figsize=(12, 10))
    sns.heatmap(feature_target_corr, annot=True, cmap="coolwarm", fmt=".2f")
    plt.title("Feature-Target Correlations")
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

    return feature_target_corr

In [10]:
# Plot correlations for selected S-parameters (using just S11 as example)
s11_labels = ["S_deemb(1,1)_real", "S_deemb(1,1)_imag"]
s11_corr = plot_feature_vs_label_correlations(
    X, Y[s11_labels], s11_labels, "s11_correlations.png"
)
print("\nTop 5 features correlated with S11 parameters:")
for label in s11_labels:
    top_features = s11_corr[label].abs().sort_values(ascending=False).head(5)
    print(f"\nTop features for {label}:")
    print(top_features)


Top 5 features correlated with S11 parameters:

Top features for S_deemb(1,1)_real:
freq               0.650297
vb                 0.443714
vc                 0.306241
NUM_OF_TRANS_RF    0.092477
DEV_GEOM_L         0.077022
Name: S_deemb(1,1)_real, dtype: float64

Top features for S_deemb(1,1)_imag:
freq               0.451187
vb                 0.082900
vc                 0.042034
DEV_GEOM_L         0.021004
NUM_OF_TRANS_RF    0.000268
Name: S_deemb(1,1)_imag, dtype: float64


In [11]:
def create_frequency_based_split(df, test_size=0.2, random_state=42):
    """
    Create a train-test split where:
    1. No two consecutive frequency values are in the test set
    2. Test set frequencies are evenly distributed across frequency bands

    Parameters:
    -----------
    df : pandas DataFrame
        DataFrame containing a 'freq' column
    test_size : float, default=0.2
        Proportion of unique frequency values to include in test set
    random_state : int, default=42
        Random seed for reproducibility

    Returns:
    --------
    train_mask : numpy array
        Boolean mask for training data
    test_mask : numpy array
        Boolean mask for test data
    """
    # Set random seed for reproducibility
    np.random.seed(random_state)

    # Get sorted unique frequency values
    unique_freqs = np.sort(df["freq"].unique())
    n_freqs = len(unique_freqs)
    print(f"Found {n_freqs} unique frequency values")

    # Define band boundaries
    band_boundaries = [
        (0, 1e9),  # Band 1: < 1 GHz
        (1e9, 6e9),  # Band 2: 1-6 GHz
        (6e9, 20e9),  # Band 3: 6-20 GHz
        (20e9, 40e9),  # Band 4: 20-40 GHz
        (40e9, float("inf")),  # Band 5: > 40 GHz
    ]

    # Assign frequencies to bands
    freq_bands = np.zeros(n_freqs, dtype=int)

    for i, freq in enumerate(unique_freqs):
        for band_idx, (lower, upper) in enumerate(band_boundaries):
            if lower <= freq < upper or (band_idx == 4 and freq >= lower):
                freq_bands[i] = band_idx
                break

    # Count frequencies in each band
    band_counts = np.zeros(5, dtype=int)
    for band in freq_bands:
        band_counts[band] += 1

    for band_idx, count in enumerate(band_counts):
        print(f"Band {band_idx + 1}: {count} frequency values")

    # Simple but effective approach: select every k-th frequency as test set
    # This guarantees no consecutive frequencies in test set
    k = int(1 / test_size)  # If test_size is 0.2, k=5 means select every 5th frequency

    # Start with a base selection
    test_indices = np.arange(0, n_freqs, k)
    print(f"Base selection gives {len(test_indices)} test frequencies (every {k}th)")

    # Calculate target test frequencies per band
    target_per_band = np.zeros(5, dtype=int)
    for i, count in enumerate(band_counts):
        target_per_band[i] = max(1, int(round(count * test_size)))

    print("Target test frequencies per band:")
    for i, target in enumerate(target_per_band):
        print(f"Band {i + 1}: {target}")

    # Calculate how many frequencies we actually selected per band
    actual_per_band = np.zeros(5, dtype=int)
    for idx in test_indices:
        band = freq_bands[idx]
        actual_per_band[band] += 1

    print("Actual initial test frequencies per band:")
    for i, actual in enumerate(actual_per_band):
        print(f"Band {i + 1}: {actual}")

    # Adjust selection to better match target distribution
    # First, identify bands that need more frequencies
    for band in range(5):
        if actual_per_band[band] < target_per_band[band]:
            # Get candidate indices in this band that aren't already selected
            band_candidates = [
                i
                for i in range(n_freqs)
                if freq_bands[i] == band
                and i not in test_indices
                and i - 1 not in test_indices
                and i + 1 not in test_indices
            ]

            # How many more do we need?
            n_needed = target_per_band[band] - actual_per_band[band]

            # Select additional frequencies if we have enough candidates
            if len(band_candidates) >= n_needed:
                # Choose candidates with roughly equal spacing
                step = max(1, len(band_candidates) // n_needed)
                selected = band_candidates[::step][:n_needed]
                test_indices = np.append(test_indices, selected)
                actual_per_band[band] += len(selected)

    # If we over-selected in some bands, remove frequencies to match target
    for band in range(5):
        if actual_per_band[band] > target_per_band[band]:
            # How many to remove
            n_remove = actual_per_band[band] - target_per_band[band]

            # Get indices in this band that were selected
            band_selected = [i for i in test_indices if freq_bands[i] == band]

            # Choose which ones to remove (spaced out)
            if band_selected:
                step = max(1, len(band_selected) // n_remove)
                to_remove = band_selected[::step][:n_remove]
                test_indices = np.array([i for i in test_indices if i not in to_remove])
                actual_per_band[band] -= len(to_remove)

    print("Final test frequencies per band after adjustment:")
    for i, actual in enumerate(actual_per_band):
        print(f"Band {i + 1}: {actual} (target: {target_per_band[i]})")

    # Sort the indices
    test_indices = np.sort(test_indices)

    # Verify that no consecutive frequencies are in test set
    for i in range(len(test_indices) - 1):
        if test_indices[i + 1] - test_indices[i] == 1:
            print(
                f"WARNING: Consecutive frequencies in test set: {unique_freqs[test_indices[i]]} and {unique_freqs[test_indices[i + 1]]}"
            )

    # Create test frequencies set
    test_freqs = unique_freqs[test_indices]

    # Create train and test masks
    test_mask = df["freq"].isin(test_freqs)
    train_mask = ~test_mask

    print(f"Final training set: {train_mask.sum()} samples")
    print(f"Final test set: {test_mask.sum()} samples")

    return train_mask, test_mask

In [12]:
# Replace your current train-test split with the frequency-based approach
train_mask, test_mask = create_frequency_based_split(
    df_clean, test_size=0.2, random_state=42
)

# Use the masks to split features and labels
X_raw_train = X[train_mask].copy()
X_raw_test = X[test_mask].copy()
Y_raw_train = Y[train_mask].copy()
Y_raw_test = Y[test_mask].copy()

Found 74 unique frequency values
Band 1: 9 frequency values
Band 2: 5 frequency values
Band 3: 14 frequency values
Band 4: 20 frequency values
Band 5: 26 frequency values
Base selection gives 15 test frequencies (every 5th)
Target test frequencies per band:
Band 1: 2
Band 2: 1
Band 3: 3
Band 4: 4
Band 5: 5
Actual initial test frequencies per band:
Band 1: 2
Band 2: 1
Band 3: 3
Band 4: 4
Band 5: 5
Final test frequencies per band after adjustment:
Band 1: 2 (target: 2)
Band 2: 1 (target: 1)
Band 3: 3 (target: 3)
Band 4: 4 (target: 4)
Band 5: 5 (target: 5)
Final training set: 124549 samples
Final test set: 31665 samples


In [13]:
# For training data
X_train = X_raw_train.copy()
X_train["vb_is_zero"] = (X_train["vb"] == 0).astype(int)
X_train["vb_is_high"] = ((X_train["vb"] >= 0.7) & (X_train["vb"] <= 0.9)).astype(int)
X_train["vc_is_zero"] = (X_train["vc"] == 0).astype(int)
X_train["vc_is_1_2V"] = ((X_train["vc"] >= 1.1) & (X_train["vc"] <= 1.3)).astype(int)
X_train["vc_is_1_5V"] = ((X_train["vc"] >= 1.4) & (X_train["vc"] <= 1.6)).astype(int)

# For test data
X_test = X_raw_test.copy()
X_test["vb_is_zero"] = (X_test["vb"] == 0).astype(int)
X_test["vb_is_high"] = ((X_test["vb"] >= 0.7) & (X_test["vb"] <= 0.9)).astype(int)
X_test["vc_is_zero"] = (X_test["vc"] == 0).astype(int)
X_test["vc_is_1_2V"] = ((X_test["vc"] >= 1.1) & (X_test["vc"] <= 1.3)).astype(int)
X_test["vc_is_1_5V"] = ((X_test["vc"] >= 1.4) & (X_test["vc"] <= 1.6)).astype(int)

# STEP 3: Initialize and fit scaler ONLY on training data
voltage_scaler = MinMaxScaler(feature_range=(-1, 1))
voltage_scaler.fit(X_train[["vb", "vc"]])  # Fit only on training data

# STEP 4: Transform both datasets using the fitted scaler
X_train[["vb", "vc"]] = voltage_scaler.transform(X_train[["vb", "vc"]])
X_test[["vb", "vc"]] = voltage_scaler.transform(X_test[["vb", "vc"]])

# STEP 5: Save the scaler for future use
import joblib

joblib.dump(voltage_scaler, "voltage_scaler.pkl")

['voltage_scaler.pkl']

In [14]:
# Process training data
X_train = X_raw_train.copy()
X_train.loc[:, "DEV_L_0_9um"] = (X_train["DEV_GEOM_L"] == 0.9).astype(int)
X_train.loc[:, "DEV_L_2_5um"] = (X_train["DEV_GEOM_L"] == 2.5).astype(int)
X_train.loc[:, "DEV_L_5_0um"] = (X_train["DEV_GEOM_L"] == 5.0).astype(int)

# Drop the original column from training data
X_train = X_train.drop("DEV_GEOM_L", axis=1)

# Process test data with the same transformations
X_test = X_raw_test.copy()
X_test.loc[:, "DEV_L_0_9um"] = (X_test["DEV_GEOM_L"] == 0.9).astype(int)
X_test.loc[:, "DEV_L_2_5um"] = (X_test["DEV_GEOM_L"] == 2.5).astype(int)
X_test.loc[:, "DEV_L_5_0um"] = (X_test["DEV_GEOM_L"] == 5.0).astype(int)

# Drop the original column from test data
X_test = X_test.drop("DEV_GEOM_L", axis=1)

In [15]:
# STEP 2: Process training data
X_train = X_raw_train.copy()
X_train.loc[:, "TRANS_1"] = (X_train["NUM_OF_TRANS_RF"] == 1).astype(int)
X_train.loc[:, "TRANS_2"] = (X_train["NUM_OF_TRANS_RF"] == 2).astype(int)
X_train.loc[:, "TRANS_4"] = (X_train["NUM_OF_TRANS_RF"] == 4).astype(int)

# Drop the original column from training data
X_train = X_train.drop("NUM_OF_TRANS_RF", axis=1)

# STEP 3: Process test data with the same transformations
X_test = X_raw_test.copy()
X_test.loc[:, "TRANS_1"] = (X_test["NUM_OF_TRANS_RF"] == 1).astype(int)
X_test.loc[:, "TRANS_2"] = (X_test["NUM_OF_TRANS_RF"] == 2).astype(int)
X_test.loc[:, "TRANS_4"] = (X_test["NUM_OF_TRANS_RF"] == 4).astype(int)

# Drop the original column from test data
X_test = X_test.drop("NUM_OF_TRANS_RF", axis=1)

In [16]:
import importlib

import frequency_preprocessing

importlib.reload(frequency_preprocessing)
from frequency_preprocessing import preprocess_frequency

# Then try using it
X_train, X_test = preprocess_frequency(X_train, X_test, fit_mode=True)

In [17]:
# Fill NaN values with 0 for freq_pos_in_band columns
for i in range(1, 6):
    X_train[f"freq_pos_in_band_{i}"] = X_train[f"freq_pos_in_band_{i}"].fillna(0)
    if X_test is not None:
        X_test[f"freq_pos_in_band_{i}"] = X_test[f"freq_pos_in_band_{i}"].fillna(0)

# Fill any remaining NaN values in other columns
X_train = X_train.fillna(0)
if X_test is not None:
    X_test = X_test.fillna(0)

In [30]:
import os

import pandas as pd
import torch
from sklearn.discriminant_analysis import StandardScaler

# Create directory for results
os.makedirs("freq_aware_results", exist_ok=True)


# Define SMAPE function for better handling of small values
def symmetric_mean_absolute_percentage_error(y_true, y_pred, epsilon=1e-10):
    """Calculate SMAPE with protection against division by zero."""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0 + epsilon
    numerator = np.abs(y_true - y_pred)
    smape = numerator / denominator
    return np.mean(smape) * 100


# Define mean absolute percentage error function
def mean_absolute_percentage_error(y_true, y_pred, epsilon=1e-10):
    """Calculate MAPE with protection against division by zero."""
    non_zero = np.abs(y_true) > epsilon
    if non_zero.sum() == 0:
        return np.nan
    percentage_errors = (
        np.abs(
            (y_true[non_zero] - y_pred[non_zero]) / (np.abs(y_true[non_zero]) + epsilon)
        )
        * 100
    )
    return np.mean(percentage_errors)


# Frequency-aware neural network
class FrequencyAwareNetwork(nn.Module):
    def __init__(
        self,
        freq_features,
        other_features,
        hidden_sizes=[64, 128, 256],
        dropout_rate=0.2,
        activation="silu",
    ):
        super().__init__()

        if activation == "silu":
            activation_fn = nn.SiLU()
        elif activation == "relu":
            activation_fn = nn.ReLU()
        elif activation == "gelu":
            activation_fn = nn.GELU()
        else:
            raise ValueError(f"Unsupported activation function: {activation}")

        # Frequency-specific processing branch
        freq_layers = []
        prev_size = freq_features
        for h_size in hidden_sizes[:2]:  # First two hidden sizes for branches
            freq_layers.append(nn.Linear(prev_size, h_size))
            freq_layers.append(
                activation_fn
            )  # Using SiLU (Swish) activation for better performance
            freq_layers.append(nn.BatchNorm1d(h_size))
            freq_layers.append(nn.Dropout(dropout_rate))
            prev_size = h_size

        self.freq_branch = nn.Sequential(*freq_layers)

        # Other parameters branch
        other_layers = []
        prev_size = other_features
        for h_size in hidden_sizes[:2]:
            other_layers.append(nn.Linear(prev_size, h_size))
            other_layers.append(activation_fn)
            other_layers.append(nn.BatchNorm1d(h_size))
            other_layers.append(nn.Dropout(dropout_rate))
            prev_size = h_size

        self.other_branch = nn.Sequential(*other_layers)

        # Combined processing with residual connections
        combined_layers = []
        prev_size = hidden_sizes[1] * 2  # Output size from both branches combined

        for h_size in hidden_sizes[2:]:
            combined_layers.append(nn.Linear(prev_size, h_size))
            combined_layers.append(activation_fn)
            combined_layers.append(nn.BatchNorm1d(h_size))
            combined_layers.append(nn.Dropout(dropout_rate))
            prev_size = h_size

        # Final output layer for real and imaginary components
        combined_layers.append(nn.Linear(prev_size, 2))

        self.combined = nn.Sequential(*combined_layers)

        # Store feature indices for processing
        self.freq_indices = None
        self.other_indices = None

    def forward(self, x):
        # Split input into frequency and other features
        if self.freq_indices is None or self.other_indices is None:
            raise ValueError(
                "Feature indices not set. Call set_feature_indices() first."
            )

        freq_input = x[:, self.freq_indices]
        other_input = x[:, self.other_indices]

        # Process through branches
        freq_features = self.freq_branch(freq_input)
        other_features = self.other_branch(other_input)

        # Combine and output
        combined = torch.cat([freq_features, other_features], dim=1)
        return self.combined(combined)

    def set_feature_indices(self, freq_indices, other_indices):
        """Set indices for frequency and other features."""
        self.freq_indices = freq_indices
        self.other_indices = other_indices


# Helper function to identify frequency-related features
def identify_frequency_features(X_columns):
    """Identify frequency-related features in the dataset."""
    freq_features = [
        i
        for i, col in enumerate(X_columns)
        if "freq" in col.lower() or "band" in col.lower()
    ]
    other_features = [i for i in range(len(X_columns)) if i not in freq_features]

    print(
        f"Identified {len(freq_features)} frequency-related features and {len(other_features)} other features"
    )
    return freq_features, other_features


# Modified prepare_data_for_pytorch to handle scaling
def prepare_data_for_pytorch_with_scaling(
    X_train, Y_train, X_test, Y_test, components, batch_size=128, scale_y=True
):
    """Prepare data for PyTorch models with optional Y-scaling."""

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train.values)
    X_test_tensor = torch.FloatTensor(X_test.values)

    # Handle Y data scaling if requested
    if scale_y:
        # Create scaler for Y values
        y_scaler = StandardScaler()
        Y_train_values = Y_train[components].values
        Y_test_values = Y_test[components].values

        # Fit scaler and transform data
        Y_train_scaled = y_scaler.fit_transform(Y_train_values)
        Y_test_scaled = y_scaler.transform(Y_test_values)

        # Convert to tensors
        Y_train_tensor = torch.FloatTensor(Y_train_scaled)
        Y_test_tensor = torch.FloatTensor(Y_test_scaled)

        # Save scaler for later use
        component_str = "_".join(components)
        joblib.dump(y_scaler, f"freq_aware_results/{component_str}_scaler.pkl")

        # Create data loaders
        train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        return (
            X_train_tensor,
            Y_train_tensor,
            X_test_tensor,
            Y_test_tensor,
            train_loader,
            y_scaler,
        )

    else:
        # No scaling
        Y_train_tensor = torch.FloatTensor(Y_train[components].values)
        Y_test_tensor = torch.FloatTensor(Y_test[components].values)

        # Create data loaders
        train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        return (
            X_train_tensor,
            Y_train_tensor,
            X_test_tensor,
            Y_test_tensor,
            train_loader,
            None,
        )


def train_model(
    model,
    train_loader,
    X_test_tensor,
    Y_test_tensor,
    criterion,
    optimizer,
    device,
    epochs=100,
    early_stopping_patience=15,
    verbose=True,
    lr_scheduler_type="reduce_on_plateau",
    warmup_epochs=5,
):
    """Train a PyTorch model with early stopping and learning rate scheduling."""
    model = model.to(device)

    # Set up learning rate scheduler based on specified type
    if lr_scheduler_type == "reduce_on_plateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.85, patience=5, min_lr=5e-7
        )
    elif lr_scheduler_type == "cosine_annealing":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=epochs, eta_min=1e-6
        )
    elif lr_scheduler_type == "one_cycle":
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=optimizer.param_groups[0]["lr"],
            steps_per_epoch=len(train_loader),
            epochs=epochs,
        )
    else:
        scheduler = None

    # For early stopping
    best_loss = float("inf")
    best_model_state = None
    patience_counter = 0

    # Track losses and learning rates for plotting
    train_losses = []
    val_losses = []
    learning_rates = []

    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        # Apply learning rate warmup if needed
        if warmup_epochs > 0 and epoch < warmup_epochs and scheduler is None:
            lr_multiplier = (epoch + 1) / warmup_epochs
            for param_group in optimizer.param_groups:
                param_group["lr"] = optimizer.param_groups[0]["lr"] * lr_multiplier

        # Record current learning rate
        current_lr = optimizer.param_groups[0]["lr"]
        learning_rates.append(current_lr)

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()

            # Step OneCycleLR scheduler here if being used
            if lr_scheduler_type == "one_cycle":
                scheduler.step()

            running_loss += loss.item()

        # Calculate average training loss
        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation loss
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_test_tensor.to(device))
            val_loss = criterion(val_outputs, Y_test_tensor.to(device)).item()
            val_losses.append(val_loss)

        # Print progress
        if verbose and (epoch + 1) % 10 == 0:
            print(
                f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {val_loss:.6f}, LR: {current_lr:.8f}"
            )

        # Learning rate scheduler step (except for OneCycleLR which is done per iteration)
        if scheduler is not None:
            if lr_scheduler_type == "reduce_on_plateau":
                scheduler.step(val_loss)
            elif lr_scheduler_type == "cosine_annealing":
                scheduler.step()

        # Check for early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                if verbose:
                    print(f"Early stopping at epoch {epoch + 1}")
                break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    # Plot learning rate schedule
    plt.figure(figsize=(10, 4))
    plt.plot(learning_rates)
    plt.xlabel("Epochs")
    plt.ylabel("Learning Rate")
    plt.title("Learning Rate Schedule")
    plt.yscale("log")
    plt.savefig("freq_aware_results/learning_rate_schedule.png")
    plt.close()

    return model, train_losses, val_losses


# Modified evaluate_model function to handle scaling
def evaluate_model_with_scaling(
    model, X_test_tensor, Y_test_tensor, Y_test, components, device, y_scaler=None
):
    """Evaluate a trained model and calculate performance metrics."""
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor.to(device)).cpu().numpy()

    # Inverse transform if scaler was used
    if y_scaler is not None:
        predictions_original = y_scaler.inverse_transform(predictions)
        y_test_original = Y_test[components].values
    else:
        predictions_original = predictions
        y_test_original = Y_test[components].values

    # Calculate metrics
    metrics = {}

    for i, component in enumerate(components):
        y_true = y_test_original[:, i]
        y_pred = predictions_original[:, i]

        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true, y_pred)
        mae = mean_absolute_error(y_true, y_pred)

        # Use SMAPE instead of MAPE for S12
        if "S12" in component or "S_deemb(1,2)" in component:
            smape_val = symmetric_mean_absolute_percentage_error(y_true, y_pred)
            metrics[component] = {
                "mse": mse,
                "rmse": rmse,
                "r2": r2,
                "mae": mae,
                "smape": smape_val,
            }
        else:
            # Regular MAPE for other S-parameters
            metrics[component] = {
                "mse": mse,
                "rmse": rmse,
                "r2": r2,
                "mae": mae,
                "mape": mean_absolute_percentage_error(y_true, y_pred),
            }

    # Calculate average metrics
    avg_metrics = {
        "rmse": np.mean([metrics[comp]["rmse"] for comp in components]),
        "r2": np.mean([metrics[comp]["r2"] for comp in components]),
        "mae": np.mean([metrics[comp]["mae"] for comp in components]),
    }

    # Add SMAPE or MAPE average depending on which components were evaluated
    if any("S12" in comp or "S_deemb(1,2)" in comp for comp in components):
        avg_metrics["smape"] = np.mean([metrics[comp]["smape"] for comp in components])
    else:
        avg_metrics["mape"] = np.mean([metrics[comp]["mape"] for comp in components])

    return metrics, avg_metrics, predictions_original


def plot_learning_curves(train_losses, val_losses, model_name):
    """Plot the learning curves."""
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title(f"Learning Curves for {model_name}")
    plt.legend()
    plt.savefig(f"freq_aware_results/learning_curves_{model_name}.png")
    plt.close()


def plot_predictions(Y_test, predictions, components, model_name):
    """Plot predictions vs actual values."""
    fig, axes = plt.subplots(1, len(components), figsize=(15, 5))

    for i, component in enumerate(components):
        ax = axes[i] if len(components) > 1 else axes
        y_true = Y_test[component].values
        y_pred = predictions[:, i]

        ax.scatter(y_true, y_pred, alpha=0.3)
        ax.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], "r--")
        ax.set_xlabel("Actual")
        ax.set_ylabel("Predicted")
        ax.set_title(f"{component}")

    plt.tight_layout()
    plt.savefig(f"freq_aware_results/predictions_{model_name}.png")
    plt.close()


def plot_error_distribution(Y_test, predictions, components, model_name):
    """Plot error distributions."""
    fig, axes = plt.subplots(1, len(components), figsize=(15, 5))

    for i, component in enumerate(components):
        ax = axes[i] if len(components) > 1 else axes
        y_true = Y_test[component].values
        y_pred = predictions[:, i]

        errors = y_pred - y_true

        sns.histplot(errors, kde=True, ax=ax)
        ax.set_xlabel("Prediction Error")
        ax.set_ylabel("Frequency")
        ax.set_title(f"{component} Error Distribution")

    plt.tight_layout()
    plt.savefig(f"freq_aware_results/error_dist_{model_name}.png")
    plt.close()


# Modified train_frequency_aware_models function
def train_frequency_aware_models(
    X_train, X_test, Y_train, Y_test, hyperparameters=None, selected_features=None
):
    """
    Train frequency-aware models for each S-parameter with conditional scaling.
    """
    # S-parameter definitions
    s_parameter_models = {
        "S22": ["S_deemb(2,2)_real", "S_deemb(2,2)_imag"],
    }

    # 'S12': ['S_deemb(1,2)_real', 'S_deemb(1,2)_imag']

    # Set default hyperparameters if not provided
    if hyperparameters is None:
        hyperparameters = {
            "hidden_sizes": [64, 128, 256],
            "dropout_rate": 0.2,
            "learning_rate": 0.001,
            "batch_size": 256,
            "epochs": 150,
            "early_stopping_patience": 15,
            "activation": "gelu",
            "lr_scheduler_type": "one_cycle",
        }

    # Filter features if requested
    if selected_features is not None:
        X_train = X_train[selected_features]
        X_test = X_test[selected_features]
        print(f"Using {len(selected_features)} selected features")

    # Check for GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Identify frequency-related features
    freq_indices, other_indices = identify_frequency_features(X_train.columns)

    # Store results and models
    models = {}
    all_results = {}
    all_predictions = {}
    scalers = {}  # Store scalers for each model

    # Record start time
    start_time = time.time()

    # Train a model for each S-parameter
    for model_name, components in s_parameter_models.items():
        print(f"\n{'=' * 50}")
        print(f"Training frequency-aware model for {model_name}")
        print(f"{'=' * 50}")

        # Decide whether to scale Y data (only for S12)
        scale_y = model_name == "S12"

        # Prepare data with conditional scaling
        prep_results = prepare_data_for_pytorch_with_scaling(
            X_train,
            Y_train,
            X_test,
            Y_test,
            components,
            hyperparameters["batch_size"],
            scale_y=scale_y,
        )

        if scale_y:
            (
                X_train_tensor,
                Y_train_tensor,
                X_test_tensor,
                Y_test_tensor,
                train_loader,
                y_scaler,
            ) = prep_results
            scalers[model_name] = y_scaler
            print("Applied StandardScaler to Y values for S12")
        else:
            (
                X_train_tensor,
                Y_train_tensor,
                X_test_tensor,
                Y_test_tensor,
                train_loader,
                _,
            ) = prep_results

        # Initialize model
        model = FrequencyAwareNetwork(
            len(freq_indices),
            len(other_indices),
            hyperparameters["hidden_sizes"],
            hyperparameters["dropout_rate"],
            hyperparameters.get("activation", "gelu"),
        )
        model.set_feature_indices(freq_indices, other_indices)

        # Loss and optimizer
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=hyperparameters["learning_rate"])

        # Train model (use your existing train_model function)
        trained_model, train_losses, val_losses = train_model(
            model,
            train_loader,
            X_test_tensor,
            Y_test_tensor,
            criterion,
            optimizer,
            device,
            hyperparameters["epochs"],
            hyperparameters["early_stopping_patience"],
            lr_scheduler_type=hyperparameters.get("lr_scheduler_type", "one_cycle"),
        )

        # Plot learning curves
        plot_learning_curves(train_losses, val_losses, model_name)

        # Evaluate model with proper scaling handling
        metrics, avg_metrics, predictions = evaluate_model_with_scaling(
            trained_model,
            X_test_tensor,
            Y_test_tensor,
            Y_test,
            components,
            device,
            scalers.get(model_name),
        )

        # Plot predictions and error distributions
        plot_predictions(Y_test, predictions, components, model_name)
        plot_error_distribution(Y_test, predictions, components, model_name)

        # Print results
        print(f"\nPerformance metrics for {model_name}:")
        for component, metric in metrics.items():
            print(f"  {component}:")
            print(f"    RMSE: {metric['rmse']:.6f}")
            print(f"    R²: {metric['r2']:.6f}")
            print(f"    MAE: {metric['mae']:.6f}")
            if "smape" in metric:
                print(f"    SMAPE: {metric['smape']:.2f}%")
            else:
                print(f"    MAPE: {metric['mape']:.2f}%")

        print(f"\nAverage metrics for {model_name}:")
        print(f"  R²: {avg_metrics['r2']:.6f}")
        print(f"  RMSE: {avg_metrics['rmse']:.6f}")
        print(f"  MAE: {avg_metrics['mae']:.6f}")
        if "smape" in avg_metrics:
            print(f"  SMAPE: {avg_metrics['smape']:.2f}%")
        else:
            print(f"  MAPE: {avg_metrics['mape']:.2f}%")

        # Store results
        models[model_name] = trained_model
        all_results[model_name] = {
            "component_metrics": metrics,
            "avg_metrics": avg_metrics,
        }
        all_predictions[model_name] = predictions

    # Record total training time
    train_time = time.time() - start_time
    print(f"\nTotal training time: {train_time:.2f} seconds")

    # Save models
    for model_name, model in models.items():
        torch.save(model.state_dict(), f"freq_aware_results/{model_name}_model.pth")

    print("Models and results saved to freq_aware_results/")

    return models, all_results, all_predictions, scalers


# Function to experiment with different hyperparameters
def hyperparameter_tuning(X_train, X_test, Y_train, Y_test, param_grid):
    """
    Perform hyperparameter tuning by training models with different configurations.

    Parameters:
    -----------
    X_train, X_test : pd.DataFrame
        Preprocessed feature datasets
    Y_train, Y_test : pd.DataFrame
        Target S-parameter datasets
    param_grid : dict
        Dictionary of hyperparameter values to try

    Returns:
    --------
    results : dict
        Dictionary of results for each configuration
    """
    results = {}

    # Generate all hyperparameter combinations
    param_keys = list(param_grid.keys())
    param_values = list(param_grid.values())

    def generate_combinations(index, current_params):
        if index == len(param_keys):
            # Train model with current parameter combination
            config_name = "_".join([f"{k}={v}" for k, v in current_params.items()])
            print(f"\n\n{'#' * 70}")
            print(f"# Testing configuration: {config_name}")
            print(f"{'#' * 70}\n")

            # Train models
            _, all_results, _ = train_frequency_aware_models(
                X_train, X_test, Y_train, Y_test, hyperparameters=current_params
            )

            # Store results
            avg_r2 = np.mean(
                [result["avg_metrics"]["r2"] for result in all_results.values()]
            )
            results[config_name] = {
                "params": current_params.copy(),
                "avg_r2": avg_r2,
                "detailed_results": all_results,
            }
            return

        # Recursive exploration of parameter combinations
        for value in param_values[index]:
            current_params[param_keys[index]] = value
            generate_combinations(index + 1, current_params)

    # Start generating combinations
    generate_combinations(0, {})

    # Rank results
    ranked_results = sorted(results.items(), key=lambda x: x[1]["avg_r2"], reverse=True)

    # Print summary
    print("\n\n" + "=" * 80)
    print("HYPERPARAMETER TUNING RESULTS")
    print("=" * 80)

    for i, (config_name, result) in enumerate(ranked_results):
        print(f"\n{i + 1}. Configuration: {config_name}")
        print(f"   Average R²: {result['avg_r2']:.6f}")
        print(f"   Parameters: {result['params']}")

    return results


# Function to test different feature subsets
def feature_selection_experiment(X_train, X_test, Y_train, Y_test, feature_sets):
    """
    Test different feature subsets to find optimal combinations.

    Parameters:
    -----------
    X_train, X_test : pd.DataFrame
        Complete feature datasets
    Y_train, Y_test : pd.DataFrame
        Target S-parameter datasets
    feature_sets : dict
        Dictionary mapping set names to lists of feature columns

    Returns:
    --------
    results : dict
        Dictionary of results for each feature set
    """
    results = {}

    for set_name, features in feature_sets.items():
        print(f"\n\n{'#' * 70}")
        print(f"# Testing feature set: {set_name} ({len(features)} features)")
        print(f"{'#' * 70}\n")

        # Train models with this feature set
        _, all_results, _ = train_frequency_aware_models(
            X_train, X_test, Y_train, Y_test, selected_features=features
        )

        # Store results
        avg_r2 = np.mean(
            [result["avg_metrics"]["r2"] for result in all_results.values()]
        )
        results[set_name] = {
            "features": features,
            "feature_count": len(features),
            "avg_r2": avg_r2,
            "detailed_results": all_results,
        }

    # Rank results
    ranked_results = sorted(results.items(), key=lambda x: x[1]["avg_r2"], reverse=True)

    # Print summary
    print("\n\n" + "=" * 80)
    print("FEATURE SELECTION RESULTS")
    print("=" * 80)

    for i, (set_name, result) in enumerate(ranked_results):
        print(f"\n{i + 1}. Feature Set: {set_name}")
        print(f"   Features: {len(result['features'])}")
        print(f"   Average R²: {result['avg_r2']:.6f}")

    return results


# Example usage


# Example of running with all features and default hyperparameters
# models, results, predictions = train_frequency_aware_models(
#     X_train, X_test, Y_raw_train, Y_raw_test,
#     hyperparameters=default_hyperparameters
# )

# Example of hyperparameter tuning
# param_grid = {
#     'learning_rate': [0.0001, 0.001, 0.01],
#     'dropout_rate': [0.1, 0.2, 0.3],
#     'batch_size': [128, 256, 512]
# }
# tuning_results = hyperparameter_tuning(X_train, X_test, Y_raw_train, Y_raw_test, param_grid)

# Example of feature selection experiment
# core_features = ['freq', 'vb', 'vc', 'gm_abs_log']
# freq_features = [col for col in X_train.columns if 'freq' in col]
# impedance_features = [col for col in X_train.columns if 'Zin' in col or 'Zout' in col]

# feature_sets = {
#     'all_features': X_train.columns.tolist(),
#     'frequency_only': freq_features,
#     'core_plus_frequency': core_features + freq_features,
#     'core_plus_impedance': core_features + impedance_features,
#     'optimized_set': ['freq', 'freq_log', 'freq_log_norm', 'vb', 'vc', 'gm_abs_log',
#                       'Zin_real_log', 'Zin_imag_log', 'Zout_real_log']
# }
# feature_results = feature_selection_experiment(X_train, X_test, Y_raw_train, Y_raw_test, feature_sets)

## For S(1,1):


In [24]:
best_hyperparameters = {
    "learning_rate": 0.001,
    "dropout_rate": 0.1,
    "batch_size": 1024,
    "epochs": 200,
    "early_stopping_patience": 30,
    "hidden_sizes": [256, 512, 1024, 512],
    "lr_scheduler_type": "reduce_on_plateau",
    "activation": "gelu",
}

# Train with scaling for S11
models, results, predictions, scalers = train_frequency_aware_models(
    X_train, X_test, Y_raw_train, Y_raw_test, hyperparameters=best_hyperparameters
)

# You can also save the scalers for future use
joblib.dump(scalers, "freq_aware_results/all_scalers.pkl")

Using device: cuda
Identified 15 frequency-related features and 6 other features

Training frequency-aware model for S11
Epoch 10/200, Train Loss: 0.009793, Val Loss: 0.007758, LR: 0.00100000
Epoch 20/200, Train Loss: 0.008762, Val Loss: 0.006603, LR: 0.00085000
Epoch 30/200, Train Loss: 0.008016, Val Loss: 0.006164, LR: 0.00072250
Epoch 40/200, Train Loss: 0.007579, Val Loss: 0.006691, LR: 0.00061412
Epoch 50/200, Train Loss: 0.007467, Val Loss: 0.006433, LR: 0.00052201
Epoch 60/200, Train Loss: 0.007675, Val Loss: 0.006039, LR: 0.00044371
Epoch 70/200, Train Loss: 0.007300, Val Loss: 0.006003, LR: 0.00037715
Epoch 80/200, Train Loss: 0.007039, Val Loss: 0.006051, LR: 0.00032058
Epoch 90/200, Train Loss: 0.007144, Val Loss: 0.006029, LR: 0.00027249
Epoch 100/200, Train Loss: 0.007060, Val Loss: 0.006259, LR: 0.00023162
Epoch 110/200, Train Loss: 0.006876, Val Loss: 0.006006, LR: 0.00019687
Epoch 120/200, Train Loss: 0.006805, Val Loss: 0.005912, LR: 0.00014224
Epoch 130/200, Train Los

['freq_aware_results/all_scalers.pkl']

## For S(1,2):


In [22]:
best_hyperparameters = {
    "hidden_sizes": [384, 768, 1536, 768, 384],
    "dropout_rate": 0.1,
    "learning_rate": 0.002,
    "batch_size": 1024,
    "epochs": 300,
    "early_stopping_patience": 40,
    "activation": "gelu",
    "lr_scheduler_type": "reduce_on_plateau",
}

# Train with scaling for S12
models, results, predictions, scalers = train_frequency_aware_models(
    X_train, X_test, Y_raw_train, Y_raw_test, hyperparameters=best_hyperparameters
)

# You can also save the scalers for future use
joblib.dump(scalers, "freq_aware_results/all_scalers.pkl")

Using device: cuda
Identified 15 frequency-related features and 6 other features

Training frequency-aware model for S12
Applied StandardScaler to Y values for S12
Epoch 10/300, Train Loss: 0.083003, Val Loss: 0.071906, LR: 0.00200000
Epoch 20/300, Train Loss: 0.079795, Val Loss: 0.070786, LR: 0.00170000
Epoch 30/300, Train Loss: 0.078014, Val Loss: 0.069310, LR: 0.00144500
Epoch 40/300, Train Loss: 0.077573, Val Loss: 0.073279, LR: 0.00122825
Epoch 50/300, Train Loss: 0.077067, Val Loss: 0.067784, LR: 0.00088741
Epoch 60/300, Train Loss: 0.075787, Val Loss: 0.067225, LR: 0.00088741
Epoch 70/300, Train Loss: 0.075418, Val Loss: 0.067471, LR: 0.00064115
Epoch 80/300, Train Loss: 0.075541, Val Loss: 0.066763, LR: 0.00054498
Epoch 90/300, Train Loss: 0.075101, Val Loss: 0.068795, LR: 0.00054498
Epoch 100/300, Train Loss: 0.074955, Val Loss: 0.067769, LR: 0.00046323
Epoch 110/300, Train Loss: 0.074161, Val Loss: 0.066910, LR: 0.00033469
Epoch 120/300, Train Loss: 0.074433, Val Loss: 0.0665

['freq_aware_results/all_scalers.pkl']

## For S(2,1):


In [28]:
best_hyperparameters = {
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "batch_size": 1024,
    "epochs": 200,
    "early_stopping_patience": 30,
    "hidden_sizes": [1024, 2048, 2048, 1024],
    "lr_scheduler_type": "reduce_on_plateau",
    "activation": "gelu",
}

# Train with scaling for S11
models, results, predictions, scalers = train_frequency_aware_models(
    X_train, X_test, Y_raw_train, Y_raw_test, hyperparameters=best_hyperparameters
)

# You can also save the scalers for future use
joblib.dump(scalers, "freq_aware_results/all_scalers.pkl")

Using device: cuda
Identified 15 frequency-related features and 6 other features

Training frequency-aware model for S21
Epoch 10/200, Train Loss: 0.572542, Val Loss: 0.491979, LR: 0.00200000
Epoch 20/200, Train Loss: 0.491452, Val Loss: 0.390267, LR: 0.00200000
Epoch 30/200, Train Loss: 0.448655, Val Loss: 5.632973, LR: 0.00200000
Epoch 40/200, Train Loss: 0.443983, Val Loss: 0.370182, LR: 0.00170000
Epoch 50/200, Train Loss: 0.387322, Val Loss: 0.387145, LR: 0.00144500
Epoch 60/200, Train Loss: 0.378890, Val Loss: 0.413280, LR: 0.00144500
Epoch 70/200, Train Loss: 0.360920, Val Loss: 0.341764, LR: 0.00104401
Epoch 80/200, Train Loss: 0.353688, Val Loss: 0.367151, LR: 0.00104401
Epoch 90/200, Train Loss: 0.352128, Val Loss: 0.335613, LR: 0.00088741
Epoch 100/200, Train Loss: 0.349183, Val Loss: 0.339924, LR: 0.00064115
Epoch 110/200, Train Loss: 0.335221, Val Loss: 0.303835, LR: 0.00054498
Epoch 120/200, Train Loss: 0.329720, Val Loss: 0.333137, LR: 0.00046323
Epoch 130/200, Train Los

['freq_aware_results/all_scalers.pkl']

## For S(2,2)


In [31]:
best_hyperparameters = {
    "learning_rate": 0.002,
    "dropout_rate": 0.1,
    "batch_size": 1024,
    "epochs": 200,
    "early_stopping_patience": 30,
    "hidden_sizes": [1024, 1536, 2048, 1536, 1024],
    "lr_scheduler_type": "reduce_on_plateau",
    "activation": "gelu",
}

# Train with scaling for S11
models, results, predictions, scalers = train_frequency_aware_models(
    X_train, X_test, Y_raw_train, Y_raw_test, hyperparameters=best_hyperparameters
)

# You can also save the scalers for future use
joblib.dump(scalers, "freq_aware_results/all_scalers.pkl")

Using device: cuda
Identified 15 frequency-related features and 6 other features

Training frequency-aware model for S22
Epoch 10/200, Train Loss: 0.007110, Val Loss: 0.006922, LR: 0.00200000
Epoch 20/200, Train Loss: 0.005822, Val Loss: 0.005030, LR: 0.00200000
Epoch 30/200, Train Loss: 0.005383, Val Loss: 0.005435, LR: 0.00170000
Epoch 40/200, Train Loss: 0.004940, Val Loss: 0.004581, LR: 0.00144500
Epoch 50/200, Train Loss: 0.004763, Val Loss: 0.004272, LR: 0.00144500
Epoch 60/200, Train Loss: 0.004666, Val Loss: 0.004687, LR: 0.00104401
Epoch 70/200, Train Loss: 0.004576, Val Loss: 0.004091, LR: 0.00088741
Epoch 80/200, Train Loss: 0.004447, Val Loss: 0.003861, LR: 0.00075430
Epoch 90/200, Train Loss: 0.004437, Val Loss: 0.003941, LR: 0.00054498
Epoch 100/200, Train Loss: 0.004373, Val Loss: 0.003898, LR: 0.00046323
Epoch 110/200, Train Loss: 0.004296, Val Loss: 0.003755, LR: 0.00039375
Epoch 120/200, Train Loss: 0.004243, Val Loss: 0.003723, LR: 0.00028448
Epoch 130/200, Train Los

['freq_aware_results/all_scalers.pkl']