<a href="https://colab.research.google.com/github/jefersonjlima/Kalman-and-Bayesian-Filters-in-Python/blob/master/features_v02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intro

This code convert [MITDB](https://physionet.org/physiobank/database/mitdb/) to Quoretech Dataset Format.

Colab version code.


In [0]:
# install packages
import sys

try:
    import wfdb as wf
except:
    !pip install wfdb==1.3.9

In [0]:
from collections import Counter
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import pandas as pd
import pickle
from scipy.signal import firwin, filtfilt, resample_poly, resample
import wfdb as wf
import math
from sklearn.preprocessing import LabelEncoder
import pdb

# MIT Annotation

In [0]:
# Beat annotations:
# Code		Description

beatList = \
'''N		Normal beat
L		Left bundle branch block beat
R		Right bundle branch block beat
B		Bundle branch block beat (unspecified)
A		Atrial premature beat
a		Aberrated atrial premature beat
J		Nodal (junctional) premature beat
S		Supraventricular premature or ectopic beat (atrial or nodal)
V		Premature ventricular contraction
r		R-on-T premature ventricular contraction
F		Fusion of ventricular and normal beat
e		Atrial escape beat
j		Nodal (junctional) escape beat
n		Supraventricular escape beat (atrial or nodal)
E		Ventricular escape beat
/		Paced beat
f		Fusion of paced and normal beat
Q		Unclassifiable beat
~		Change in signal quality
?		Beat not classified during learning'''


# Non-beat annotations:
# Code		Description
noBeatList = \
'''[		Start of ventricular flutter/fibrillation
!		Ventricular flutter wave
]		End of ventricular flutter/fibrillation
x		Non-conducted P-wave (blocked APC)
(		Waveform onset
)		Waveform end
p		Peak of P-wave
t		Peak of T-wave
u		Peak of U-wave
`		PQ junction
'		J-point
^		(Non-captured) pacemaker artifact
|		Isolated QRS-like artifact [1]
~		Change in signal quality [1]
+		Rhythm change [2]
s		ST segment change [2]
T		T-wave change [2]
*		Systole
D		Diastole
=		Measurement annotation [2]
"		Comment annotation [2]
@		Link to external data [3]'''

In [0]:
beatTable = np.array([lst.split('\t\t') for lst in beatList.split('\n')])
noBeatTable = np.array([lst.split('\t\t') for lst in noBeatList.split('\n')])

# QRS Segmentation

In [0]:
class ecgDataFrame:
    '''
    create QRS Frames
    '''
    def __init__(self, path):
        def exam_name(value):
            sample = os.path.splitext(os.path.basename(value))[0]
            patch = os.path.dirname(value)    
            return sample, patch

        sample, patch = exam_name(path)  
        self._patch = patch
        self._sample = sample
        self.getSignal()

    def getSignal(self):
        _record = wf.rdsamp(self._patch + '/' + self._sample)

        self.fs = 200
        self.resampleFactor = 200 / _record.fs
        self.signal1 = resample_poly(_record.p_signals[:,0] , \
                                        int(_record.siglen * self.resampleFactor), _record.siglen)
        self.signal2 = resample_poly(_record.p_signals[:,1] , \
                                        int(_record.siglen * self.resampleFactor), _record.siglen)

        self.siglen = self.signal1.size
        self.signame = _record.signame

        _ann = wf.rdann(self._patch + '/' + self._sample, 'atr')
        self.annot = []
        self.annotSamp = []

        # excluded noise and rhythm marker
        for idx, an in enumerate(_ann.symbol):
            if an in beatTable[:,0]:
                self.annot.append(an)
                self.annotSamp.append(int(_ann.sample[idx] * self.resampleFactor))
        self.annlen = len(self.annot)

    def createDataFrame(self, sig , nsig, dblist, QRSlen):

        df = pd.DataFrame()
        symbLen = len(self.annot)

        for idx,ann in enumerate(self.annot):
            rr_coefLast = []
            annot_last =  []
            annot_mse =  []
            p0 = self.annotSamp[idx]
            if (idx-1) >= 0:
                p0_last = self.annotSamp[idx-1]
            else:
                p0_last = self.annotSamp[idx]
            left_values = math.ceil(1/3*QRSlen)
            ritght_values = math.floor(2/3*QRSlen)
                                    
            if ((p0-left_values) >= 0) and ((p0+ritght_values) < self.siglen):
                # create historic of RR interval posteriori
                
                if   self.annot[idx] == 'V' and self.annot[idx-1] == 'V':
                    self.annot[idx] = 'VC'
                elif self.annot[idx] == 'V' and self.annot[idx-1] == 'VC':
                    self.annot[idx] = 'VT'
                elif self.annot[idx] == 'V' and self.annot[idx-1] == 'VT':
                    self.annot[idx] = 'VT'
                
                if   self.annot[idx] == 'S' and self.annot[idx-1] == 'S':
                    self.annot[idx] = 'SC'
                elif self.annot[idx] == 'S' and self.annot[idx-1] == 'SC':
                    self.annot[idx] = 'STVA'
                elif self.annot[idx] == 'S' and self.annot[idx-1] == 'STVA':
                    self.annot[idx] = 'STVA'
                
                # test it rr_coeff +-5
                for i in range(0,-5,-1):
                    if ((idx-1+i) >= 0) and ((idx+1+i) < self.annlen):
                        rr_coefLast.append((self.annotSamp[idx+i]-self.annotSamp[idx-1+i])/
                                          (self.annotSamp[idx+1+i] - self.annotSamp[idx+i]))
                        annot_last.append(self.annot[idx-1+i])
                    else:
                        rr_coefLast.append(np.random.rand())
                        annot_last.append('~')
                    
                # get features
                avg_sig = sig[(p0-left_values):(p0+ritght_values)]
                avg_sig = avg_sig - np.mean(avg_sig)
                
                if (p0_last-left_values) > 0:
                    
                    # estimate qrs size
                    qrs_left = 10
                    qrs_right = 20
                    last_beat = sig[(p0_last-qrs_left):(p0_last+qrs_right)]
                    last_beat = last_beat - last_beat.min()
                    last_beat = last_beat/last_beat.max()
                    last_beat = last_beat - np.mean(last_beat)
                    now_beat  = sig[(p0-qrs_left):(p0+qrs_right)]
                    now_beat  = now_beat - now_beat.min()
                    now_beat  = now_beat/now_beat.max()
                    now_beat  = now_beat - np.mean(now_beat)
                    # mse
                    annot_mse = (np.square(now_beat - last_beat)).mean(axis=0)
                else:
                    annot_mse = 0
                    
                df = df.append({
                    'signal':   pd.Series(avg_sig),
                    'annot':    self.annot[idx], 
                    'signame':  self.signame[0],
                    'nsig':     nsig,
                    'sample':   dblist,
                    'rr_coefLast': rr_coefLast,
                    'annot_last': annot_last,
                    'annot_mse': annot_mse,
                    'index': idx
                }, ignore_index=True)

        return df


    def proj_fir(self,sig, low, high, npoles):
        poles = firwin(npoles, [low/(self.fs/2), high/(self.fs/2)], pass_zero=False)
        return filtfilt(poles, 1.0, sig)

# Download Dataset

In [0]:
#@title Select Database

dataset = 'mitdb' #@param ["mitdb", "ahadb", "svdb"] 


if dataset is 'svdb':
    patch = dataset + '/'
    leads = ['ECG1', 'ECG2']
    if os.path.exists(patch):
        print('SVDB Exist')
    else:
        # Copy SVDB database
        !wget -r -np 'https://physionet.org/physiobank/database/svdb/'
        !mv ./physionet.org/physiobank/database/svdb/ ./
        !rm -r ./physionet.org/    

elif dataset is 'ahadb':
    from google.colab import drive
    drive.mount('/content/drive')
    patch = '/content/drive/My Drive/MyDatasets/' + dataset + '/'
    leads = ['ECG0', 'ECG1']

elif dataset is 'mitdb':
    patch = dataset + '/'
    leads = ['MLII', 'V5']
    if os.path.exists(patch):
        print('MITDB Exist')
    else:
        # Copy MIT database
        !wget -r -np 'http://www.physionet.org/physiobank/database/mitdb/'
        !mv ./www.physionet.org/physiobank/database/mitdb/ ./
        !rm -r ./www.physionet.org/

In [0]:
import glob
listRecords = glob.glob(patch + '*.dat')

### Extract Transform Load

In [0]:
#@title Filter Design
df = pd.DataFrame()

for dblist in listRecords:
    print("File: %s"%(dblist))
    data = ecgDataFrame(dblist)
    print(Counter(data.annot))
    print(data.signame)
    
    poles = 50
    hfilter = 0.67 #@param {type:"slider", min:0.67, max:5, step:0.01}
    lfilter = 40   #@param {type:"slider", min:30, max:100, step:1}
    filter_enable = True #@param {type:"boolean"}
    
    if  not (True in np.isnan(data.signal1)) or (True in np.isnan(data.signal2)):
        if leads[0] in data.signame:
            if data.signame.index(leads[0]):
                nsig = 1
                if filter_enable:
                    sig = data.proj_fir(data.signal2, hfilter, lfilter, poles)
                else:
                    sig = data.signal2
            else:
                nsig = 0
                if filter_enable:
                    sig = data.proj_fir(data.signal1, hfilter, lfilter, poles)
                else:
                    sig = data.signal1
        elif leads[1] in data.signame:
            if data.signame.index(leads[1]):
                nsig = 1
                if filter_enable:
                    sig = data.proj_fir(data.signal2, hfilter, lfilter, poles)
                else:
                    sig = data.signal2
            else:
                nsig = 0
                if filter_enable:
                    sig = data.proj_fir(data.signal1, hfilter, lfilter, poles)
                else:
                    sig = data.signal1
        qrsFrame = 140 #@param {type:"slider", min:100, max:200, step:1}
        df = df.append(data.createDataFrame(sig, nsig, dblist, qrsFrame))
        df = df.reset_index(drop=True)
        print(df.index)
        print(Counter(df['annot']))
    else:
        print('Nan')

### QRS Windows Example

In [0]:
def foo(x):
    if x[0] < 0.8:
        return -1
    elif x[0] > 0.8 and x[0] < 1.2:
        return 0
    elif x[0] > 1.2:
        return 1
    
df['preco'] = df.rr_coefLast.apply(foo)

In [0]:
def foo2(x):
    x = np.asarray(x, dtype=np.float32)
    out = x
    select = x < 0.8
    out[select] = -1
    select = np.logical_and(x > 0.8, x < 1.2)
    out[select] = 0
    select = x > 1.2
    out[select] = 1
    return out

df['rr_coefLast_hot'] = df['rr_coefLast'].apply(foo2)

# Save model in Google Drive

In [0]:
import datetime
from google.colab import drive
drive.mount('/content/drive')
file_name = dataset + '_' + str(qrsFrame) + '_' +  str(poles) + '_' + str(filter_enable) + '_rhythm_{}.pkl'.format(datetime.date.today())
df.to_pickle('drive/My Drive/MyDatasets/'+ file_name, protocol=2)

In [0]:
file_name