In [1]:
import wfdb
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.signal import medfilt
import pywt
from ecgdetectors import Detectors
from scipy.signal import resample_poly
import csv

nsr_data = ['./data/nsrdb/16265',
 './data/nsrdb/16272',
 './data/nsrdb/16273',
 './data/nsrdb/16420',
 './data/nsrdb/16483',
 './data/nsrdb/16539',
 './data/nsrdb/16773',
 './data/nsrdb/16786',
 './data/nsrdb/16795',
 './data/nsrdb/17052',
 './data/nsrdb/17453',
 './data/nsrdb/18177',
 './data/nsrdb/18184',
 './data/nsrdb/19088',
 './data/nsrdb/19090',
 './data/nsrdb/19093',
 './data/nsrdb/19140',
 './data/nsrdb/19830']

def extract_first_30min_and_segment(record_paths):
    """
    Given a list of NSR record paths (e.g. './data/nsrdb/16265'),
    1) Read the first 30 minutes of the ECG from each record
    2) Segment that 30-min signal into six 5-min parts
    3) Return a dictionary mapping record_name -> [segment1, segment2, ... segment6]
       Each segment is a NumPy array of shape (num_samples_5min, num_channels).
    """
    
    # For 30 min, we have 30 * 60 = 1800 seconds. 
    # For 5 min, we have 5 * 60 = 300 seconds.
    
    first_30min_segments = {}
    
    for record_path in record_paths:
        
        # Extract record name from path
        # e.g. record_path = "./data/nsrdb/16265" => record_name = "16265"
        record_dir, record_name = os.path.split(record_path)
      
        print(f"Processing {record_name} ...")
        
        # We want the first 30 minutes => 1800 seconds => num_samples = 1800 * fs
        fs = 128
        num_samples_30min = int(30 * 60 * fs)
        
        # Read from sample 0 to sample 0+num_samples_30min
        try:
            rec = wfdb.rdrecord(record_path, sampfrom=0, sampto=num_samples_30min)
        except Exception as e:
            print(f"[ERROR] Could not read {record_path}: {e}")
            continue
        
        if rec.p_signal is None:
            print(f"[WARN] No signal found in {record_name}. Skipping.")
            continue
        
        full_30min = rec.p_signal[:,0]
        
        # Segment the 30-min array into six 5-min parts
        # Each 5-min part = 5 * 60 * fs samples
        
        samples_5min = int(5 * 60 * fs)  # 300 seconds * 128 => 38400
        
        # We can slice in 6 equal blocks
        segments_5min = []
        for i in range(6):
            start_i = i * samples_5min
            end_i = start_i + samples_5min
            segment = full_30min[start_i:end_i]
            segments_5min.append(segment)
        
        # Store in a dictionary
        first_30min_segments[record_name] = segments_5min
        print(f"[OK] Extracted 6 segments of 5 min each from {record_name}.")
    
    return first_30min_segments


def denoise_signal(X, dwt_transform, dlevels, cutoff_low, cutoff_high):
    coeffs = pywt.wavedec(X, dwt_transform, level=dlevels)   # wavelet transform 'bior4.4'
    # scale 0 to cutoff_low 
    for ca in range(0,cutoff_low):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    # scale cutoff_high to end
    for ca in range(cutoff_high, len(coeffs)):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    Y = pywt.waverec(coeffs, dwt_transform) # inverse wavelet transform
    return Y  


def r_peak_finder(ecg_sig):
    BASIC_SRATE = 128
    signal_pad_samples = 10
    signal_pad = np.zeros(signal_pad_samples)  # Pad to help detect early peaks
    scd_30_denoised_ = ...  # Your denoised 30-min ECG segment
    
    # Initialize the detectors at the given sampling rate
    detector_obj = Detectors(BASIC_SRATE)
    
    # Dictionary of detector functions
    detectors = {
        'pan_tompkins_detector': detector_obj.pan_tompkins_detector,
        'hamilton_detector': detector_obj.hamilton_detector,
        'christov_detector': detector_obj.christov_detector,
        'engzee_detector': detector_obj.engzee_detector,
        'swt_detector': detector_obj.swt_detector,
        'two_average_detector': detector_obj.two_average_detector,
    }
    
    r_peaks = np.array(detector_obj.engzee_detector(np.hstack((signal_pad, ecg_sig)) )) - signal_pad_samples
    return r_peaks


def compute_hrv_features(r_peaks, fs=128):
    """
    Time-domain HRV features from R-peaks.
    Returns a dict with:
      MeanRR, RMSDD, pNN50, SDRR, CVRR, NN50, MinRR, MaxRR
    """
    features = {
        'MeanRR': 0.0,
        'RMSDD': 0.0,
        'pNN50': 0.0,
        'SDRR': 0.0,
        'CVRR': 0.0,
        'NN50': 0,
        'MinRR': 0.0,
        'MaxRR': 0.0
    }

    rr_samples = np.diff(r_peaks)
    rr_ms = (rr_samples / fs) * 1000.0  # convert to ms

    mean_rr = np.mean(rr_ms)
    sdrr = np.std(rr_ms, ddof=1) if len(rr_ms) > 1 else 0.0
    min_rr = np.min(rr_ms)
    max_rr = np.max(rr_ms)

    rr_diffs = np.diff(rr_ms)
    rmssd = np.sqrt(np.mean(rr_diffs**2)) if len(rr_diffs) > 0 else 0.0
    nn50 = np.sum(np.abs(rr_diffs) > 50)
    pnn50 = (nn50 / len(rr_diffs)) * 100 if len(rr_diffs) > 0 else 0.0

    cvrr = (sdrr / mean_rr * 100.0) if mean_rr else 0.0

    features['MeanRR'] = mean_rr / 1000.0
    features['RMSDD'] = rmssd / 1000.0
    features['pNN50'] = pnn50 / 1000.0
    features['SDRR'] = sdrr / 1000.0
    features['CVRR'] = cvrr / 1000.0 
    features['NN50'] = nn50 / 1000.0
    features['MinRR'] = min_rr / 1000.0 
    features['MaxRR'] = max_rr / 1000.0
    
    return features

In [2]:
label_map = ["First", "Second", "Third", "Fourth", "Fifth", "Sixth"]

def save_hrv_to_csv(features_dict, csv_path):
    """
    Save a single row of features_dict into a CSV at csv_path.
    Overwrites if file exists.
    Columns: [MeanRR, RMSDD, pNN50, SDRR, CVRR, NN50, MinRR, MaxRR]
    """
    columns = ["MeanRR", "RMSDD", "pNN50", "SDRR", "CVRR", "NN50", "MinRR", "MaxRR"]
    with open(csv_path, mode='w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=columns)
        writer.writeheader()
        writer.writerow({col: features_dict[col] for col in columns})
    print(f"  -> Saved HRV features to {csv_path}")

In [3]:
nsr_segments  = extract_first_30min_and_segment(nsr_data)

Processing 16265 ...
[OK] Extracted 6 segments of 5 min each from 16265.
Processing 16272 ...
[OK] Extracted 6 segments of 5 min each from 16272.
Processing 16273 ...
[OK] Extracted 6 segments of 5 min each from 16273.
Processing 16420 ...
[OK] Extracted 6 segments of 5 min each from 16420.
Processing 16483 ...
[OK] Extracted 6 segments of 5 min each from 16483.
Processing 16539 ...
[OK] Extracted 6 segments of 5 min each from 16539.
Processing 16773 ...
[OK] Extracted 6 segments of 5 min each from 16773.
Processing 16786 ...
[OK] Extracted 6 segments of 5 min each from 16786.
Processing 16795 ...
[OK] Extracted 6 segments of 5 min each from 16795.
Processing 17052 ...
[OK] Extracted 6 segments of 5 min each from 17052.
Processing 17453 ...
[OK] Extracted 6 segments of 5 min each from 17453.
Processing 18177 ...
[OK] Extracted 6 segments of 5 min each from 18177.
Processing 18184 ...
[OK] Extracted 6 segments of 5 min each from 18184.
Processing 19088 ...
[OK] Extracted 6 segments of 5

In [4]:
output_dir_nsr = "NSR_Features_CSV_17apr"
os.makedirs(output_dir_nsr, exist_ok=True)

In [5]:
# Process NSR data (normal order => 1st is earliest 5 min)
for subj_id, seg_list in nsr_segments.items():
    print(f"\nProcessing NSR subject {subj_id}...")
    # seg_list[0] = first 5 min, seg_list[1] = second 5 min, ...
    for i in range(6):
        segment_label = f"NSR_{subj_id}_{label_map[i]}_5_min"  # e.g. "First_5_min"
        ecg_signal = seg_list[i]

        denoised_signal = denoise_signal(ecg_signal, 'rbio1.5', 9, 1 , 7)
        # 1) Detect R-peaks
        r_peaks = r_peak_finder(denoised_signal)
        # 2) Compute HRV
        feats = compute_hrv_features(r_peaks, fs=128)
        # 3) Build CSV file name, e.g. "NSR_SubjectX_First_5_min.csv"
        csv_filename = f"{segment_label}.csv"
        csv_path = os.path.join(output_dir_nsr, csv_filename)
        # 4) Save
        save_hrv_to_csv(feats, csv_path)


Processing NSR subject 16265...
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_First_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_Second_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_Third_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_Fourth_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_Fifth_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16265_Sixth_5_min.csv

Processing NSR subject 16272...
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_First_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_Second_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_Third_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_Fourth_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_Fifth_5_min.csv
  -> Saved HRV features to NSR_Features_CSV_17apr\NSR_16272_Sixth_5_min.csv

Processing NSR su