In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.signal as signal
import pywt
import wfdb
import peakutils
from sklearn.preprocessing import StandardScaler,MinMaxScaler

In [4]:
def smooth(data):
    lowpass = 200
    highpass = 0.5
    
    
    scalar = MinMaxScaler((-1,1))
    data = scalar.fit_transform(data)
    data.resize(len(data))
    

    a,b = signal.butter(6,(highpass,lowpass), btype='bandpass', analog=True)
    filtered = signal.lfilter(b,a,data)
    smoothed = signal.cspline1d(filtered, lamb=1000)
    
    return smoothed

In [5]:
def findRPeaks(smoothed):
    signal_slice = np.ndarray.flatten(smoothed)
    rPeaks = peakutils.indexes(signal_slice, thres=0.3, min_dist=200)
    return rPeaks.tolist()

In [6]:
def findTPeaks(smoothed, peak1, peak2):

    
    signal_slice = smoothed[peak1+20:peak1+140]
    t = peakutils.indexes(signal_slice, thres=0.2, min_dist=200)

    
    return t + peak1 + 20

In [7]:
def findPPeaks(smoothed, peak1, peak2):
    pPeaks = []

    signal_slice = smoothed[peak2-90:peak2-20]
    p = peakutils.indexes(signal_slice, thres=0.2, min_dist=200)
            

    
    return peak2 - 90 + p

In [8]:
def findQRSarea(smoothed, peak):
    
    left_x = peak
    right_x = peak
    top_x = peak
    
    for i in range (1,200):
        if (smoothed[peak-i]>smoothed[peak-i+1]): left_x = peak-i +1
        if (smoothed[peak+i]>smoothed[peak+i-1]): right_x = peak+i-1
            
    left_y = smoothed[left_x]
    right_y = smoothed[right_x]
    top_y = smoothed[peak]
    
    area = 0.5*abs(left_x*(top_y-right_y)+ top_x*(right_y-left_y)+ right_x*(left_y-top_y))
    
    return area

In [9]:
def augment(smoothed, rPeaks, x):
    data = []
    interval = []


    for i in range(1,len(rPeaks)-2):

        

        tpeak = findTPeaks(smoothed, rPeaks[i], rPeaks[i+1])
        ppeak = findPPeaks(smoothed, rPeaks[i], rPeaks[i+1])
        
        RR_dist = rPeaks[i+1]-rPeaks[i]
        RR_mean = (smoothed[rPeaks[i]]+ smoothed[rPeaks[i+1]])/2
        QRSarea = findQRSarea(smoothed,rPeaks[i])
        
        if x in range(100,200):
            label = 0
        elif x in range(200,300):
            label = 1
        

        
        if len(tpeak) == 0 and len(ppeak) == 0:
            data.append([RR_dist,RR_mean,-1,-1,-1,-1,-1,QRSarea,0,0,label])
            continue
            
        elif len(tpeak) == 0:
            ppeak = ppeak[0]
            

            
            PRb_dist = ppeak - rPeaks[i]
            PRa_dist = rPeaks[i+1] - ppeak
            PRb_PRa_ratio = PRb_dist/PRa_dist
            P_amp = smoothed[ppeak]
            
            data.append([RR_dist,RR_mean,-1,-1,PRb_PRa_ratio,-1,P_amp,QRSarea,1,0,label])
            continue
        else:
            tpeak = tpeak[0]
            
        if len(ppeak) == 0:
            
            TRb_dist = tpeak - rPeaks[i]
            TRa_dist = rPeaks[i+1] - tpeak
            TRb_TRa_ratio = TRb_dist/TRa_dist
            T_amp = smoothed[tpeak]
            
            data.append([RR_dist,RR_mean,-1,TRb_TRa_ratio,-1,T_amp,-1,QRSarea,0,1,label])
            continue
        else:
            ppeak = ppeak[0]
        
        RR_dist = rPeaks[i+1]-rPeaks[i]
        RR_mean = (smoothed[rPeaks[i]]+ smoothed[rPeaks[i+1]])/2
        
        
        PT_dist = tpeak-ppeak
        
        TRb_dist = tpeak - rPeaks[i]
        TRa_dist = rPeaks[i+1] - tpeak
        TRb_TRa_ratio = TRb_dist/TRa_dist
        
        PRb_dist = ppeak - rPeaks[i]
        PRa_dist = rPeaks[i+1] - ppeak
        PRb_PRa_ratio = PRb_dist/PRa_dist
        
        T_amp = smoothed[tpeak]
        P_amp = smoothed[ppeak]
        
        
        
        interval = []
        interval.append(RR_dist)
        interval.append(RR_mean)
        interval.append(PT_dist)
        interval.append(TRb_TRa_ratio)
        interval.append(PRb_PRa_ratio)
        interval.append(T_amp)
        interval.append(P_amp)
        interval.append(QRSarea)
        interval.append(1)
        interval.append(1)
        interval.append(label)

        
        data.append(interval)
        
    
    return data

In [10]:
sampleID = []

for i in range(100,125):
    if i not in [102,104,110,120, 122,111,107]:
        sampleID.append(i)

for i in range(200,235):
    if i not in [204,206,211,216,218,224,225,226,227,229, 234,210,217]:
        sampleID.append(i)

In [11]:
test = [122,111,107,234,210,217]
len(sampleID)

40

In [12]:
import random
groups = []
for i in range(8):
    group = []
    for i in range(5):
        num = random.randrange(len(sampleID))
        group.append(sampleID[num])
        sampleID.remove(sampleID[num])
    groups.append(group)
    



In [64]:
groups.extend([test])

In [65]:
groups

[[223, 103, 221, 106, 200],
 [105, 220, 203, 202, 222],
 [116, 208, 205, 124, 213],
 [231, 118, 101, 201, 233],
 [114, 214, 115, 228, 123],
 [230, 109, 113, 219, 121],
 [112, 117, 207, 212, 232],
 [119, 209, 108, 215, 100],
 [122, 111, 107, 234, 210, 217]]

In [15]:
for idx,group in enumerate(groups):
    augmented = []

    for i in group: 

        record = wfdb.rdsamp('mit-bih-arrhythmia-database-1.0-2.0/' + str(i))
        ch = [record[1]['sig_name'].index('MLII')]
        record = wfdb.rdsamp('mit-bih-arrhythmia-database-1.0-2.0/' + str(i), channels = ch)
        data = record[0]
        data = data.astype(np.float32)
        #data.resize(len(data))

        smoothed = smooth(data)

        rPeaks = findRPeaks(smoothed)


        aug_sample = augment(smoothed, rPeaks, i)

        augmented.extend(aug_sample)

        aug_sample = []
    
    augmented = np.array(augmented)
    df = pd.DataFrame(augmented)
    df.to_csv("group_3_{}.csv".format(idx+1), index=False)
    print(f"Done {idx+1}/{9}")
    

Done 1/9
Done 2/9
Done 3/9
Done 4/9
Done 5/9
Done 6/9
Done 7/9
Done 8/9
Done 9/9


In [2]:
groups

NameError: name 'groups' is not defined

In [None]:
augmented = np.array(augmented)

In [120]:
df = pd.DataFrame(augmented)

In [121]:
df.to_csv("trainfoundfeature.csv", index=False)

In [13]:
groups = [[123,215,121,230,233], [221,101,114,232,207],[201,103,200,112,113],[117,220,119,231,209],[106,214,208,105,202],[212,213,219,100,115],[222,203,124,109,108],[205,116,118,228,223],[122,111,107,234,210,217]]