In [13]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from scipy.signal import resample_poly, butter, filtfilt
from wfdb import rdrecord, processing
import pywt

In [5]:
nsr_files = glob.glob("./data/nsrdb/*.hea")
nsr_records = [f[:-4] for f in nsr_files]

In [7]:
nsr_records

['./data/nsrdb\\16265',
 './data/nsrdb\\16272',
 './data/nsrdb\\16273',
 './data/nsrdb\\16420',
 './data/nsrdb\\16483',
 './data/nsrdb\\16539',
 './data/nsrdb\\16773',
 './data/nsrdb\\16786',
 './data/nsrdb\\16795',
 './data/nsrdb\\17052',
 './data/nsrdb\\17453',
 './data/nsrdb\\18177',
 './data/nsrdb\\18184',
 './data/nsrdb\\19088',
 './data/nsrdb\\19090',
 './data/nsrdb\\19093',
 './data/nsrdb\\19140',
 './data/nsrdb\\19830']

In [9]:
nsr_records = [f.replace("\\", "/")[:-4] for f in nsr_files]

In [11]:
nsr_records

['./data/nsrdb/16265',
 './data/nsrdb/16272',
 './data/nsrdb/16273',
 './data/nsrdb/16420',
 './data/nsrdb/16483',
 './data/nsrdb/16539',
 './data/nsrdb/16773',
 './data/nsrdb/16786',
 './data/nsrdb/16795',
 './data/nsrdb/17052',
 './data/nsrdb/17453',
 './data/nsrdb/18177',
 './data/nsrdb/18184',
 './data/nsrdb/19088',
 './data/nsrdb/19090',
 './data/nsrdb/19093',
 './data/nsrdb/19140',
 './data/nsrdb/19830']

# Load ECG data

In [18]:
def load_ecg_record(record_path, desired_fs=128, channel=0):
    """
    Loads an ECG record using wfdb.
    If the record's sampling frequency is 250 Hz, downsample to 128 Hz.
    If it's already 128 Hz, leave it as is.
    Returns:
      ecg: numpy array
      fs: final sampling frequency (should be 128 Hz if we unify everything)
    """
    record = rdrecord(record_path, channels=[channel])
    fs_original = record.fs
    ecg = record.p_signal[:, 0]
    
    if fs_original == desired_fs:
        # Already at 128 Hz
        return ecg, fs_original
    
    else:
        # If you encounter any other fs, handle similarly or raise an exception
        raise ValueError(f"Unexpected sampling frequency: {fs_original} Hz. "
                         f"Expected 128 or 250.")

In [20]:
def segment_ecg_signal(ecg, fs, segment_length_sec=300):
    """
    Splits the ECG into 5-minute segments (300 seconds).
    Returns a list of segments (each is a numpy array).
    """
    samples_per_segment = int(segment_length_sec * fs)
    segments = []
    start = 0
    
    # We'll create as many 5-min segments as possible
    # If you specifically want exactly 6 segments from 30 min,
    # ensure you have a 30-min signal. Otherwise, handle partial.
    while start + samples_per_segment <= len(ecg):
        end = start + samples_per_segment
        seg = ecg[start:end]
        segments.append(seg)
        start = end
    
    return segments

In [22]:
def denoise_signal_2(X, dwt_transform, dlevels, cutoff_low, cutoff_high):
    coeffs = pywt.wavedec(X, dwt_transform, level=dlevels)   # wavelet transform 'bior4.4'
    # scale 0 to cutoff_low 
    for ca in range(0,cutoff_low):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    # scale cutoff_high to end
    for ca in range(cutoff_high, len(coeffs)):
        coeffs[ca]=np.multiply(coeffs[ca],[0.0])
    Y = pywt.waverec(coeffs, dwt_transform) # inverse wavelet transform
    return Y  

# Remove Baseline Wander

In [27]:
def get_median_filter_width(sampling_rate, duration):
    res = int( sampling_rate*duration )
    res += ((res%2) - 1) # needs to be an odd number
    return res


# baseline fitting by filtering
# === Define Filtering Params for Baseline fitting Leads======================

BASIC_SRATE = 128
ms_flt_array = [0.2,0.6]    #<-- length of baseline fitting filters (in seconds)
mfa = np.zeros(len(ms_flt_array), dtype='int')
for i in range(0, len(ms_flt_array)):
    mfa[i] = get_median_filter_width(BASIC_SRATE,ms_flt_array[i])

In [29]:
def filter_signal(X):
    global mfa
    X0 = X  #read orignal signal
    for mi in range(0,len(mfa)):
        X0 = medfilt(X0,mfa[mi]) # apply median filter one by one on top of each other
    X0 = np.subtract(X,X0)  # finally subtract from orignal signal
    return X0

# R peak detect

In [34]:
def detect_r_peaks(ecg_segment, fs):
    """
    Uses wfdb's xqrs_detect to find R-peaks (indices).
    """
    qrs_inds = processing.xqrs_detect(ecg_segment, fs=fs, verbose=False)
    return qrs_inds

In [36]:
def extract_hrv_features(rr_intervals):
    """
    Returns a numpy array of the 8 features:
      [MeanRR, RMSSD, pNN50, SDRR, CVRR, NN50, MinRR, MaxRR]
    """
    if len(rr_intervals) < 2:
        return np.zeros(8)
    
    mean_rr = np.mean(rr_intervals)
    sdrr = np.std(rr_intervals)
    
    diff_rr = np.diff(rr_intervals)
    rmssd = np.sqrt(np.mean(diff_rr**2)) if len(diff_rr) > 0 else 0
    
    # 50 ms threshold => 0.05 s
    threshold = 0.05
    nn50 = np.sum(np.abs(diff_rr) > threshold)
    pnn50 = (nn50 / len(diff_rr)) * 100 if len(diff_rr) > 0 else 0
    
    cvrr = sdrr / mean_rr if mean_rr != 0 else 0
    min_rr = np.min(rr_intervals)
    max_rr = np.max(rr_intervals)
    
    return np.array([mean_rr, rmssd, pnn50, sdrr, cvrr, nn50, min_rr, max_rr])



def compute_rr_intervals(r_peaks, fs):
    """
    Returns array of RR intervals in seconds.
    """
    if len(r_peaks) < 2:
        return np.array([])
    rr_int = np.diff(r_peaks) / fs
    return rr_int