In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, BatchNormalization, Dropout, LSTM, SpatialDropout1D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Attention, GlobalAveragePooling1D

import wandb
df = pd.read_pickle('/Users/athenasaghi/VSProjects/CognitiveFatigueDetection/processed_data_full.pkl')

X_raweeg = np.stack(df['raweeg'].values)
X_features = np.stack(df['features'].values)
Y_labels = df['label'].values

# Remove class 3
mask = Y_labels != 3
X_raweeg = X_raweeg[mask]
X_features = X_features[mask]
Y_labels = Y_labels[mask]

# Update labels: 0 -> 0, 1 and 2 -> 1
# Y_labels[Y_labels == 1] = 1
Y_labels[Y_labels == 2] = 1

num_classes = len(np.unique(Y_labels))
Y_labels = to_categorical(Y_labels, num_classes=num_classes)

scaler = StandardScaler()
X_raweeg = scaler.fit_transform(X_raweeg.reshape(-1, X_raweeg.shape[-1])).reshape(X_raweeg.shape)
X_features = scaler.fit_transform(X_features.reshape(-1, X_features.shape[-1])).reshape(X_features.shape)

# Replace padding zeros with small noise to avoid learning issues
X_raweeg[X_raweeg == 0] = np.random.normal(loc=0, scale=1e-6, size=np.sum(X_raweeg == 0))

# Oversample minority classes
X_resampled, Y_resampled, Xf_resampled = [], [], []
labels = np.argmax(Y_labels, axis=1)
for cls in np.unique(labels):
    cls_indices = np.where(labels == cls)[0]
    X_cls_raweeg = X_raweeg[cls_indices]
    X_cls_features = X_features[cls_indices]
    Y_cls = Y_labels[cls_indices]
    X_cls_raweeg, Y_cls, X_cls_features = resample(X_cls_raweeg, Y_cls, X_cls_features, n_samples=max([len(np.where(labels == c)[0]) for c in np.unique(labels)]), random_state=42)
    X_resampled.append(X_cls_raweeg)
    Y_resampled.append(Y_cls)
    Xf_resampled.append(X_cls_features)

X_raweeg = np.vstack(X_resampled)
Y_labels = np.vstack(Y_resampled)
X_features = np.vstack(Xf_resampled)

# Split into train and test sets
X_train_raweeg, X_test_raweeg, X_train_features, X_test_features, Y_train, Y_test = train_test_split(
    X_raweeg, X_features, Y_labels, test_size=0.2, random_state=42, stratify=np.argmax(Y_labels, axis=1)
)


In [38]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D


scaler_raweeg = MinMaxScaler()
scaler_features = MinMaxScaler()

X_train_raweeg = scaler_raweeg.fit_transform(X_train_raweeg.reshape(-1, X_train_raweeg.shape[-1])).reshape(X_train_raweeg.shape)
X_test_raweeg = scaler_raweeg.transform(X_test_raweeg.reshape(-1, X_test_raweeg.shape[-1])).reshape(X_test_raweeg.shape)

X_train_features = scaler_features.fit_transform(X_train_features.reshape(-1, X_train_features.shape[-1])).reshape(X_train_features.shape)
X_test_features = scaler_features.transform(X_test_features.reshape(-1, X_test_features.shape[-1])).reshape(X_test_features.shape)

print(X_train_raweeg.shape, X_train_features.shape, Y_train.shape)
print(X_test_raweeg.shape, X_test_features.shape, Y_test.shape)

def add_noise(data, noise_level=0.05):
    return data + noise_level * np.random.randn(*data.shape)

X_train_raweeg = add_noise(X_train_raweeg, noise_level=0.001)
X_train_features = add_noise(X_train_features, noise_level=0.01)


# Encoder function with L2 normalization before output
def create_cnn_encoder(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(16, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(32, kernel_size=3, activation='relu')(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(32, activation=None)(x)
    model = Model(inputs=inputs, outputs=x)
    return model


def create_lstm_encoder(input_shape):
    inputs = Input(shape=input_shape)
    x = LSTM(32, return_sequences=False, kernel_regularizer=l2(1e-4))(inputs)
    x = Dropout(0.5)(x)
    x = Dense(32, activation=None, kernel_regularizer=l2(1e-4))(x)
    model = Model(inputs=inputs, outputs=x)
    return model


encoder_raweeg = create_cnn_encoder(X_train_raweeg.shape[1:])
encoder_features = create_cnn_encoder(X_train_features.shape[1:])


def contrastive_loss(z_raweeg, z_features, temperature=0.2):
    epsilon = 1e-6
    z_raweeg = tf.math.l2_normalize(z_raweeg + epsilon, axis=1)
    z_features = tf.math.l2_normalize(z_features + epsilon, axis=1)
    logits = tf.matmul(z_raweeg, z_features, transpose_b=True) / temperature
    batch_size = tf.shape(logits)[0]
    labels = tf.range(batch_size)
    loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    return tf.reduce_mean(loss)



lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=1000,
    decay_rate=0.96,
    staircase=True
)

optimizer = AdamW(learning_rate=lr_schedule, weight_decay=1e-3, clipnorm=0.6)
epochs = 400
batch_size = 64 


for epoch in range(epochs):
    idx = np.random.permutation(len(X_train_raweeg))
    X_raweeg_shuffled = X_train_raweeg[idx]
    X_features_shuffled = X_train_features[idx]
    losses = []

    # print(len(X_train_raweeg) - batch_size + 1, batch_size)
    for i in range(0, len(X_train_raweeg) - batch_size + 1, batch_size):  # Ensure full batches
        x_r_batch = X_raweeg_shuffled[i:i+batch_size]
        x_f_batch = X_features_shuffled[i:i+batch_size]
        with tf.GradientTape() as tape:
            z_r = encoder_raweeg(x_r_batch, training=True)
            z_f = encoder_features(x_f_batch, training=True)
            loss = contrastive_loss(z_r, z_f)

        grads = tape.gradient(loss, encoder_raweeg.trainable_variables + encoder_features.trainable_variables)

        # Check for NaN in gradients
        if any(tf.math.reduce_any(tf.math.is_nan(g)) for g in grads if g is not None):
            print("NaN detected in gradients! Stopping training.")
            break

        optimizer.apply_gradients(zip(grads, encoder_raweeg.trainable_variables + encoder_features.trainable_variables))
        losses.append(loss.numpy())

    print(f"Epoch {epoch+1}/{epochs}, Contrastive Loss: {np.mean(losses):.4f}")

# Check if embeddings are meaningful (should have variance)
z_r_sample = encoder_raweeg.predict(X_train_raweeg[:10])
print("Encoder Output - Mean:", np.mean(z_r_sample), "Std Dev:", np.std(z_r_sample))


(78, 1854, 4) (78, 202, 16) (78, 2)
(20, 1854, 4) (20, 202, 16) (20, 2)
Epoch 1/400, Contrastive Loss: 4.2200
Epoch 2/400, Contrastive Loss: 4.1952
Epoch 3/400, Contrastive Loss: 4.1598
Epoch 4/400, Contrastive Loss: 4.1591
Epoch 5/400, Contrastive Loss: 4.1595
Epoch 6/400, Contrastive Loss: 4.1574
Epoch 7/400, Contrastive Loss: 4.1572
Epoch 8/400, Contrastive Loss: 4.1565
Epoch 9/400, Contrastive Loss: 4.1563
Epoch 10/400, Contrastive Loss: 4.1564
Epoch 11/400, Contrastive Loss: 4.1547
Epoch 12/400, Contrastive Loss: 4.1550
Epoch 13/400, Contrastive Loss: 4.1542
Epoch 14/400, Contrastive Loss: 4.1532
Epoch 15/400, Contrastive Loss: 4.1516
Epoch 16/400, Contrastive Loss: 4.1469
Epoch 17/400, Contrastive Loss: 4.1442
Epoch 18/400, Contrastive Loss: 4.1387
Epoch 19/400, Contrastive Loss: 4.1327
Epoch 20/400, Contrastive Loss: 4.1172
Epoch 21/400, Contrastive Loss: 4.1088
Epoch 22/400, Contrastive Loss: 4.1108
Epoch 23/400, Contrastive Loss: 4.1167
Epoch 24/400, Contrastive Loss: 4.1082
E

In [39]:
from tensorflow.keras.layers import Concatenate
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
import numpy as np
import tensorow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dropout, Dense, BatchNormalization, Conv1D, MaxPooling1D, GlobalAveragePooling1D, Concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix
k = 5
epochs = 400
kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
num_classes = Y_labels.shape[1]

metrics_all = { 'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': [], 'auc': [], 'val_auc': [],
                'precision': [], 'val_precision': [], 'recall': [], 'val_recall': [],
                'f1': [], 'val_f1': [], 'specificity': [], 'val_specificity': [], 'confusion_matrix': []}

metrics_per_class = { f'{metric}_class_{i}': [] for i in range(num_classes)
                      for metric in ['precision', 'recall', 'f1', 'specificity',
                                     'val_precision', 'val_recall', 'val_f1', 'val_specificity','val_acc'] }

true_labels = []
pred_labels = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_raweeg, np.argmax(Y_train, axis=1))):
    print(f"\n--------------- Fold {fold + 1}/{k}")

    X_train_raweeg_fold, X_val_raweeg_fold = X_train_raweeg[train_idx], X_train_raweeg[val_idx]
    X_train_features_fold, X_val_features_fold = X_train_features[train_idx], X_train_features[val_idx]
    Y_train_fold, Y_val_fold = Y_train[train_idx], Y_train[val_idx]

    input_raweeg = Input(shape=(X_train_raweeg.shape[1], X_train_raweeg.shape[2]))
    input_features = Input(shape=(X_train_features.shape[1], X_train_features.shape[2]))

    z_raweeg = encoder_raweeg(input_raweeg)
    z_features = encoder_features(input_features)

    combined = Concatenate()([z_raweeg, z_features])
    output = Dense(num_classes, activation='softmax')(combined)

    classifier = Model(inputs=[input_raweeg, input_features], outputs=output)
    classifier.compile(optimizer=AdamW(learning_rate=1e-2, weight_decay=1e-4),
                       loss='categorical_crossentropy',
                       metrics=['accuracy', tf.keras.metrics.AUC(name='auc'),
                                tf.keras.metrics.Precision(name='precision'),
                                tf.keras.metrics.Recall(name='recall'),
                                tf.keras.metrics.SpecificityAtSensitivity(0.5, name='specificity')] +
                                [tf.keras.metrics.Precision(name=f'precision_class_{i}', class_id=i) for i in range(num_classes)] +
                                [tf.keras.metrics.Recall(name=f'recall_class_{i}', class_id=i) for i in range(num_classes)])


    history = classifier.fit([X_train_raweeg_fold, X_train_features_fold], Y_train_fold,
                             validation_data=([X_val_raweeg_fold, X_val_features_fold], Y_val_fold),
                             epochs=epochs, batch_size=128, verbose=1,
                             callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)])



    for metric in metrics_all.keys():
        # print(metric)
        if metric in history.history:
            metrics_all[metric].append(history.history[metric])

    y_pred = classifier.predict([X_val_raweeg_fold, X_val_features_fold])
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(Y_val_fold, axis=1)

    true_labels.extend(y_true_classes)
    pred_labels.extend(y_pred_classes)

    cm = confusion_matrix(y_true_classes, y_pred_classes)
    cm_dict = {f"row_{i}": row.tolist() for i, row in enumerate(cm)}

    metrics_all['confusion_matrix'] = cm_dict

    for i in range(num_classes):
            metrics_per_class[f'precision_class_{i}'].append(history.history[f'precision_class_{i}'])
            metrics_per_class[f'recall_class_{i}'].append(history.history[f'recall_class_{i}'])
            f1_class = f1_score(y_true_classes, y_pred_classes, average=None)[i]
            metrics_per_class[f'f1_class_{i}'].append(f1_class)
            mask = (y_true_classes == i)
            metrics_per_class[f'val_acc_class_{i}'].append(accuracy_score(y_true_classes[mask], y_pred_classes[mask]))


ModuleNotFoundError: No module named 'tensorow'

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

y_pred_test = classifier.predict([X_test_raweeg, X_test_features])
y_pred_classes_test = np.argmax(y_pred_test, axis=1)
y_true_classes_test = np.argmax(Y_test, axis=1)

test_accuracy = accuracy_score(y_true_classes_test, y_pred_classes_test)
test_auc = roc_auc_score(Y_test, y_pred_test, multi_class='ovr')
conf_matrix = confusion_matrix(y_true_classes_test, y_pred_classes_test)

print(f"\nTest Accuracy: {test_accuracy:.4f}")
print(f"Test AUC: {test_auc:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step

Test Accuracy: 0.7000
Test AUC: 0.7900
