# Initialize

In [1]:
import wfdb as wf
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, resample
from glob import glob
# from ecgdetectors.ecgdetectors import Detectors

# Methods definitions

In [2]:
def get_records():
    """ Get paths for data in data/mitdb/ directory """
    # There are 3 files for each record
    # *.atr is one of them
    paths = glob('data/mitdb/*.atr')

    # Get rid of the extension
    paths = [path[:-4] for path in paths]
    paths.sort()

    return paths

# Butterworth LowPass Filter
def butter_lowpass_filter(data, cutoff, fs, order):
    nyq = 0.5 * fs # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients 
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

# Import and preprocess data

In [9]:
# Get available data files.
records = get_records()
print('Total files: ', len(records))

# using annotation to find the beats for data from mitdb
realbeats = ['N','L','R','B','A','a','J','S','V','r',
             'F','e','j','n','E','/','f','Q','?']

# Loop through each input file. Each file contains one
# record of ECG readings, sampled at 360 readings per
# second.
for path in records:
    pathpts = path.split('/')
    pathpts = pathpts[-1].split('\\')
    fn = pathpts[-1]
    # fn = path[-3:]
    print('Loading file:', path)

    # Read in the data
    record = wf.rdsamp(path)
    annotation = wf.rdann(path, 'atr')

    # Print some meta informations
    print('    Sampling frequency used for this record:', record[1].get('fs'))
    print('    Shape of loaded data array:', record[0].shape)
    print('    Number of loaded annotations:', len(annotation.num))
    
    # Get the ECG values from the file.
    data = record[0].transpose()

    # Get beat annotation to find the R-peaks
    # Generate the classifications based on the annotations.
    # 0.0 = undetermined
    # 1.0 = Normal beat (NOR)
    # 2.0 = Left Bundle Branch Block beat (LBBB)
    # 3.0 = Right Bundle Branch Block beat (RBBB)
    # 4.0 = Premature Ventricular Contraction beat (PVC)
    # 5.0 = Premature Atrial Contraction beat (PAC)
    cat = np.array(annotation.symbol)
    rate = np.zeros_like(cat, dtype='float')
    rpeaks_sample = np.array([], dtype='int')

    for catid, catval in enumerate(cat):
        # Find rpeaks in the ECG data. Most should match with
        # the annotations.
        if (catval in realbeats): 
            rpeaks_sample = np.append(rpeaks_sample, annotation.sample[catid]) 

        if (catval == 'N'):
            rate[catid] = 1.0 # NOR
        elif (catval == 'L'):
            rate[catid] = 2.0 # LBBB
        elif (catval == 'R'):
            rate[catid] = 3.0 # RBBB
        elif (catval == 'V'):
            rate[catid] = 4.0 # PVC
        elif (catval == 'A'):
            rate[catid] = 5.0 # PAC

    rates = np.zeros_like(data[0], dtype='float')
    rates[annotation.sample] = rate

    # Process each channel separately (2 per input file).
    for channelid, channel in enumerate(data):
        chname = record[1].get('sig_name')[channelid]
        print('    ECG channel type:', chname)
        
        # signals = detrend(channel)
        signals = butter_lowpass_filter(channel, cutoff=50, fs=360, order=2)

        # # Detect R-Peaks
        # # from ecgdetectors import Detectors
        # detectors = Detectors(360)
        # rpeaks_sample = detectors.christov_detector(signals)
        
        beatstoremove = np.array([0], dtype='int')

        beats = np.empty_like(list(rpeaks_sample), dtype=object)
        
        # Split into individual heartbeats. For each heartbeat
        # record, append classification.
        for idx, idxval in enumerate(rpeaks_sample):
            firstround = idx == 0
            lastround = idx == len(beats) - 1

            # Skip first and last beat.
            if (firstround or lastround):
                beats[idx] = [0]
                beatstoremove = np.append(beatstoremove, idx)
                continue

            beats[idx] = signals[rpeaks_sample[idx-1]+150:rpeaks_sample[idx+1]-50]
            beats[idx] = beats[idx][:440]

            # Get the classification value that is on
            # or near the position of the rpeak index.
            fromidx = 0 if idxval < 10 else idxval - 10
            toidx = idxval + 10
            catval = rates[fromidx:toidx].max()

            # Skip beat if there is no classification.
            if (catval == 0.0):
                beatstoremove = np.append(beatstoremove, idx)
                continue

            # Normal beat is now classified as 0.0 and abnormal beat are 1.0 - 4.0
            catval = catval - 1.0

            # Normalize the readings to a 0-1 range for ML purposes.
            beats[idx] = (beats[idx] - beats[idx].min()) / beats[idx].ptp()

            # # Resample from 360Hz to 125Hz
            # newsize = int((beats[idx].size * 125 / 360) + 0.5)
            # beats[idx] = resample(beats[idx], newsize)
        
            # Skipping records that are too long.
            if (beats[idx].size > 440):
                beatstoremove = np.append(beatstoremove, idx)
                continue

            # Pad with zeroes.
            zerocount = 440 - beats[idx].size
            beats[idx] = np.pad(beats[idx], (0, zerocount), 'constant', constant_values=(0.0, 0.0))

            # Append the classification to the beat data.
            beats[idx] = np.append(beats[idx], catval)

        # beatstoremove = np.append(beatstoremove, len(beats)-1)

        # Remove first and last beats and the ones without classification.
        beats = np.delete(beats, beatstoremove)

        # Save to CSV file.
        savedata = np.array(list(beats[:]), dtype='float')
        outfn = 'data/ecg_beats/'+fn+'_'+chname+'.csv'
        print('    Generating ', outfn)
        with open(outfn, "wb") as fin:
            np.savetxt(fin, savedata, delimiter=",", fmt='%f')

Total files:  48
Loading file: data/mitdb\207
    Sampling frequency used for this record: 360
    Shape of loaded data array: (650000, 2)
    Number of loaded annotations: 2385
    ECG channel type: MLII
    Generating  data/ecg_beats/207_MLII.csv
    ECG channel type: V1
    Generating  data/ecg_beats/207_V1.csv
Loading file: data/mitdb\208
    Sampling frequency used for this record: 360
    Shape of loaded data array: (650000, 2)
    Number of loaded annotations: 3040
    ECG channel type: MLII
    Generating  data/ecg_beats/208_MLII.csv
    ECG channel type: V1
    Generating  data/ecg_beats/208_V1.csv
Loading file: data/mitdb\209
    Sampling frequency used for this record: 360
    Shape of loaded data array: (650000, 2)
    Number of loaded annotations: 3052
    ECG channel type: MLII
    Generating  data/ecg_beats/209_MLII.csv
    ECG channel type: V1
    Generating  data/ecg_beats/209_V1.csv
Loading file: data/mitdb\210
    Sampling frequency used for this record: 360
    Shap