In [1]:
"""
Deep Learning Models for SCD vs NSR Classification
==================================================

This script assumes you have already:
  1. Extracted six 5‑minute waveform segments per subject
  2. Detected R‑peaks and computed 8 HRV time‑domain features for every segment
  3. Saved each segment’s features in a CSV file with names such as:
       SCD_<subject>_First_5_mins_before_SCD.csv
       NSR_<subject>_First_5_min.csv

We build three deep‑learning classifiers:
  • MLP on HRV features (tabular)
  • 1‑D CNN on raw ECG waveforms
  • Hybrid CNN‑LSTM on raw ECG waveforms

The code below shows:
  • Loading feature CSVs into a DataFrame
  • Label encoding (SCD=1, NSR=0)
  • Train /     test split (first segment per subject for training, remaining for test)
  • Training an MLP on features
  • Building raw‑signal datasets from numpy arrays (ecg_arrays dict)
  • CNN, LSTM, and CNN‑LSTM architectures
  • Metrics: accuracy, precision, recall, F1, AUC
"""

import os
import glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix, roc_curve)
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
############################################################
# 1. LOAD HRV FEATURE CSVs (tabular dataset)
############################################################

def load_feature_csvs(feature_dir):
    """Scan feature_dir for all CSV files, build a DataFrame with label."""
    rows = []
    for csv_path in glob.glob(os.path.join(feature_dir, '*.csv')):
        df = pd.read_csv(csv_path)
        # Determine label from filename
        fname = os.path.basename(csv_path)
        label = 1 if fname.startswith('SCD') else 0  # SCD positive class
        df['Label'] = label
        rows.append(df)
    if not rows:
        raise ValueError(f"No CSV files found in {feature_dir}")
    full_df = pd.concat(rows, ignore_index=True)
    return full_df

In [3]:
feature_dir_scd = 'SCD_Features_CSV_17apr'  # adjust if different
feature_dir_nsr = 'NSR_Features_CSV_17apr'

# Combine both directories
feature_df = pd.concat([
    load_feature_csvs(feature_dir_scd),
    load_feature_csvs(feature_dir_nsr)
], ignore_index=True)

In [7]:
feature_df

Unnamed: 0,MeanRR,RMSDD,pNN50,SDRR,CVRR,NN50,MinRR,MaxRR,Label
0,0.626046,0.456008,0.092662,0.305573,0.048810,0.442,0.257812,2.546875,1
1,0.676552,0.532766,0.091591,0.359284,0.053105,0.403,0.257812,2.328125,1
2,0.697758,0.924626,0.091569,0.676368,0.096934,0.391,0.257812,12.414062,1
3,0.649185,0.429469,0.093464,0.322436,0.049668,0.429,0.304688,2.781250,1
4,0.658207,0.522990,0.092511,0.371825,0.056491,0.420,0.257812,4.312500,1
...,...,...,...,...,...,...,...,...,...
211,0.549427,0.306124,0.075848,0.223507,0.040680,0.380,0.117188,2.070312,0
212,0.520122,0.243812,0.065854,0.190150,0.036559,0.378,0.187500,1.304688,0
213,0.565243,0.250536,0.063498,0.185425,0.032805,0.334,0.140625,1.632812,0
214,0.593890,0.342075,0.072962,0.252583,0.042530,0.367,0.125000,3.375000,0


In [8]:
X = feature_df.drop(columns=['Label']).values
y = feature_df['Label'].values

In [9]:
X

array([[ 0.62604603,  0.45600803,  0.09266247, ...,  0.442     ,
         0.2578125 ,  2.546875  ],
       [ 0.67655187,  0.53276556,  0.09159091, ...,  0.403     ,
         0.2578125 ,  2.328125  ],
       [ 0.69775847,  0.92462642,  0.09156909, ...,  0.391     ,
         0.2578125 , 12.4140625 ],
       ...,
       [ 0.56524253,  0.25053621,  0.0634981 , ...,  0.334     ,
         0.140625  ,  1.6328125 ],
       [ 0.59388951,  0.34207519,  0.07296223, ...,  0.367     ,
         0.125     ,  3.375     ],
       [ 0.56227889,  0.25644796,  0.06332703, ...,  0.335     ,
         0.1875    ,  1.625     ]])

In [10]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [12]:
X_scaled

array([[-0.3241981 , -0.33749618,  0.56370447, ...,  0.28709251,
         0.0982257 , -0.3516292 ],
       [-0.2292385 , -0.29918953,  0.48538361, ...,  0.06600582,
         0.0982257 , -0.36713535],
       [-0.18936647, -0.10362725,  0.4837886 , ..., -0.00202085,
         0.0982257 ,  0.34780906],
       ...,
       [-0.43851905, -0.44003904, -1.56792514, ..., -0.32514755,
        -1.06327533, -0.41642276],
       [-0.38465784, -0.39435555, -0.87619027, ..., -0.1380742 ,
        -1.21814213, -0.29292733],
       [-0.44409119, -0.43708872, -1.58042844, ..., -0.31947866,
        -0.59867491, -0.41697656]])

In [13]:
scaler

In [14]:
# Train/test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [15]:
############################################################
# 2. MLP MODEL FOR TABULAR FEATURES
############################################################

mlp_model = models.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
mlp_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
mlp_model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1, verbose=0)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<keras.src.callbacks.history.History at 0x1ab254f1f40>

In [16]:
# Evaluate
y_prob_mlp = mlp_model.predict(X_test).ravel()
y_pred_mlp = (y_prob_mlp >= 0.5).astype(int)
print("MLP Metrics:")
print("  Accuracy :", accuracy_score(y_test, y_pred_mlp))
print("  Precision:", precision_score(y_test, y_pred_mlp))
print("  Recall   :", recall_score(y_test, y_pred_mlp))
print("  F1‑score :", f1_score(y_test, y_pred_mlp))
print("  AUC‑ROC  :", roc_auc_score(y_test, y_prob_mlp))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
MLP Metrics:
  Accuracy : 0.9772727272727273
  Precision: 0.9565217391304348
  Recall   : 1.0
  F1‑score : 0.9777777777777777
  AUC‑ROC  : 0.9979338842975207
