In [25]:
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dropout, Dense, Flatten,Input,GlobalAveragePooling1D
from tensorflow.keras.metrics import R2Score,MeanAbsolutePercentageError
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models, regularizers
import tensorflow as tf

In [2]:
df=pd.read_csv("pp_ecg_preprocessedSignalLarge.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,caseid,dt,result,ppg_tid,ecg_tid,ppg_signal,ecg_signal
0,0,1,3060.0,154.0,9acbed98f1f15c7827ee3bcc55eaef19f861b824,8c9161aaae8cb578e2aa7b60f44234d98d2b3344,"[-1.5116935159800502, -2.6001532337322755, -3....","[-4.635602658133209, -3.924501035151671, -2.77..."
1,0,1,3060.0,154.0,9acbed98f1f15c7827ee3bcc55eaef19f861b824,8c9161aaae8cb578e2aa7b60f44234d98d2b3344,"[-2.0239566627080916, -2.8439508903118993, -3....","[-4.698922745411354, -3.3754397274073, -1.7596..."
2,0,1,3060.0,154.0,9acbed98f1f15c7827ee3bcc55eaef19f861b824,8c9161aaae8cb578e2aa7b60f44234d98d2b3344,"[-2.1229598869289186, -3.148025636945019, -4.0...","[3.1132789651423773, 3.490500268912875, 3.7903..."
3,0,1,3060.0,154.0,9acbed98f1f15c7827ee3bcc55eaef19f861b824,8c9161aaae8cb578e2aa7b60f44234d98d2b3344,"[-2.5874447985985203, -3.394867110376111, -4.1...","[-0.83490506168045, -0.835910708782481, -0.835..."
4,0,1,3060.0,154.0,9acbed98f1f15c7827ee3bcc55eaef19f861b824,8c9161aaae8cb578e2aa7b60f44234d98d2b3344,"[-0.7960797549960681, -1.7471838487999183, -2....","[-1.8446166839141898, -1.418030027366001, -0.7..."


In [4]:
df.shape

(11532, 8)

In [7]:
def check_min_sample_size(df):
    mn = float('inf')  # More readable than 1e9
    for index,row in df.iterrows():
        if np.isnan(row["result"]):
            continue
        ppg_signal=json.loads(row["ppg_signal"])
        ecg_signal=json.loads(row["ecg_signal"])
        if len(ppg_signal)!=100 or len(ecg_signal)!=100:
            print(index,len(ppg_signal),len(ecg_signal))

In [5]:
df = df.drop(9875)  # Drops the row with index 5

In [8]:
check_min_sample_size(df)

In [9]:
def create_X_and_Y_from_df(df):
    X=[]
    Y=[]
    for index,row in df.iterrows():
        if np.isnan(row["result"]):
            continue
        ppg_signal=json.loads(row["ppg_signal"])
        ecg_signal=json.loads(row["ecg_signal"])
        oneD_image=[]
        for i in range(len(ppg_signal)):
            oneD_image.append([ppg_signal[i],ecg_signal[i]])
        X.append(oneD_image)
        Y.append(row["result"])
    X=np.array(X)
    Y=np.array(Y)
    return X,Y

In [10]:
X,Y=create_X_and_Y_from_df(df)

In [30]:
X.shape

(8951, 100, 2)

In [31]:
Y.shape

(8951,)

In [32]:
# Standardize each signal type separately
for i in range(X.shape[2]):
    X[:,:,i] = (X[:,:,i] - X[:,:,i].mean()) / X[:,:,i].std()

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [45]:
model = Sequential()

model.add(Input(shape=(100, 2)))

# Input shape (100 timesteps, 2 features)
model.add(Conv1D(64, 5, activation='relu', kernel_regularizer=l2(0.001), 
                 kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Dropout(0.3))

model.add(Conv1D(128, 3, activation='relu', kernel_regularizer=l2(0.001),
                kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Dropout(0.3))

model.add(Conv1D(256, 3, activation='relu', kernel_regularizer=l2(0.001),
                kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Dropout(0.4))

# Better alternative to Flatten for temporal data
model.add(GlobalAveragePooling1D())

model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Output layer for regression
model.add(Dense(1, activation='linear'))

In [48]:
adam = Adam(learning_rate=0.0001)
model.compile(
    optimizer=adam,
    loss='mean_squared_error',  # For regression, you typically use 'mse' or 'mae' as loss
    metrics=[
        'mae',  # Mean Absolute Error
        'mse',  # Mean Squared Error
        R2Score(),  # R² (R-squared)
        MeanAbsolutePercentageError()
    ]
)
lr_schedule = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6
)
es = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

In [None]:
history = model.fit(
    X_train, Y_train,
    epochs=200,
    batch_size=64,
    validation_split=0.2,
    callbacks=[lr_schedule, es]
)

In [11]:
y=Y

In [12]:
valid_mask = (y >= 70) & (y <= 250)
X = X[valid_mask]
y = y[valid_mask]

In [13]:
def normalize_channels(X):
    X_normalized = np.zeros_like(X)
    for i in range(X.shape[-1]):
        channel = X[..., i]
        scaler = StandardScaler()
        X_normalized[..., i] = scaler.fit_transform(channel.reshape(-1, 1)).reshape(channel.shape)
    return X_normalized

In [14]:
X = normalize_channels(X)

In [15]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [20]:
class SignalAugmenter(layers.Layer):
    def __init__(self, noise_std=0.05, **kwargs):
        super().__init__(**kwargs)
        self.noise_std = noise_std

    def call(self, inputs, training=None):
        if training:
            noise = tf.random.normal(tf.shape(inputs), stddev=self.noise_std)
            return inputs + noise
        return inputs


In [21]:
from tensorflow.keras import layers, Model

def build_bgl_model(input_shape=(100, 2)):
    # Input layer
    inp = layers.Input(shape=input_shape)
    
    # Conv Block 1
    x = layers.Conv1D(64, kernel_size=7, activation='relu', padding='same')(inp)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=2)(x)

    # Conv Block 2
    x = layers.Conv1D(128, kernel_size=5, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)

    # Conv Block 3
    x = layers.Conv1D(256, kernel_size=3, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)

    # Global pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Dense layers
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    out = layers.Dense(1)(x)

    # Build model
    model = Model(inputs=inp, outputs=out, name="Enhanced_Model")
    return model


In [22]:
model = build_bgl_model()
model.summary()

In [23]:
optimizer = Adam(
    learning_rate=1e-4, 
    weight_decay=1e-6  # Explicit L2 regularization
)

model.compile(
    optimizer=optimizer,
    loss='huber',
    metrics=['mae','mse', R2Score(),MeanAbsolutePercentageError()]
)

callbacks = [
    EarlyStopping(patience=30, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.2, patience=10, verbose=1)
]


In [24]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/200
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 115ms/step - loss: 113.9970 - mae: 114.4970 - mean_absolute_percentage_error: 98.1490 - mse: 14245.3311 - r2_score: -11.6674 - val_loss: 112.9866 - val_mae: 113.4866 - val_mean_absolute_percentage_error: 98.4183 - val_mse: 13946.5352 - val_r2_score: -12.0523 - learning_rate: 1.0000e-04
Epoch 2/200
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 89ms/step - loss: 106.6824 - mae: 107.1824 - mean_absolute_percentage_error: 91.8945 - mse: 12653.1094 - r2_score: -10.2665 - val_loss: 108.2647 - val_mae: 108.7647 - val_mean_absolute_percentage_error: 94.0901 - val_mse: 12890.7656 - val_r2_score: -11.0642 - learning_rate: 1.0000e-04
Epoch 3/200
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 88ms/step - loss: 97.2908 - mae: 97.7908 - mean_absolute_percentage_error: 83.0504 - mse: 10765.3281 - r2_score: -8.7201 - val_loss: 94.7234 - val_mae: 95.2234 - val_mean_absolute_percentage_error

In [100]:
print(X_train.shape)
print(y_train.shape)


(6072, 100, 2)
(6072, 1)


In [99]:
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)


In [102]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)
