In [None]:
#IMPORTS + FixedAttention layer + custom loss
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib

import tensorflow as tf
from tensorflow.keras import Input, Model, layers
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# ---------------------------------------------------
# Residual Attention Layer
# ---------------------------------------------------
class ResidualAttention(layers.Layer):
    def __init__(self, **kwargs):
        super(ResidualAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        d = int(input_shape[-1])
        self.att_logits = self.add_weight(
            name="att_logits",
            shape=(d, d),
            initializer="zeros",
            trainable=True
        )
        super().build(input_shape)

    def call(self, x):
        A = tf.nn.softmax(self.att_logits, axis=-1)
        Ax = tf.linalg.matmul(x, A, transpose_b=True)
        return x + Ax   # <--- RESIDUAL CONNECTION

# ---------------------------------------------------
# Custom Loss
# ---------------------------------------------------
def custom_loss(y_true, y_pred):
    mag_loss = K.square(y_true[:, 0] - y_pred[:, 0])
    cos_loss = K.square(y_true[:, 1] - y_pred[:, 1])
    sin_loss = K.square(y_true[:, 2] - y_pred[:, 2])
    return K.mean(2.0 * mag_loss + cos_loss + sin_loss)



In [None]:
#LOAD & PREPARE DATA
data = pd.read_csv("output_Rx_PEC_lossy_2layers_angles_all.csv")

X = data.iloc[:, :8].values
y_mag = data["Zsmag"].values
y_phase = data["Zsphase"].values  # must be rad

y_log = np.log10(y_mag + 1e-9)
y_cos = np.cos(y_phase)
y_sin = np.sin(y_phase)

y = np.column_stack((y_log, y_cos, y_sin))

# Split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size=0.98, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

joblib.dump(scaler, "input_lossy_2layers_Residual_Attention3.pkl")

In [None]:
#BUILD MODEL WITH FIXED ATTENTION
inputs = Input(shape=(8,))

# --- Residual Attention ---
x = ResidualAttention()(inputs)

# --- MLP ---
x = Dense(512, activation='relu', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = Dense(256, activation='relu', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

x = Dense(128, activation='relu', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(64, activation='relu', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)

outputs = Dense(3, activation='linear')(x)

model = Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss=custom_loss,
    metrics=["mae"]
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Adjusted callbacks for a huge dataset 
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=10,         # Wait for 15 epochs without significant improvement
    min_delta=1e-4,      # Improvement threshold 
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.7,          # Reduce learning rate by 20% instead of 30%
    patience=5,         # Wait 10 epochs with no improvement before reducing LR
    min_lr=1e-6,         # Lower bound for the learning rate
    verbose=2
)


history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=300,
    batch_size=1024,     # Increase batch size for efficiency on a huge dataset
    callbacks=[early_stopping, reduce_lr],
    verbose=2
)

model.save("Zsurf_model_lossy_2layers_Residual_Attention3.keras")

In [None]:
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=1)

print("Test Loss:", test_loss)
print("Test MAE :", test_mae)