In [1]:
import pyedflib
import numpy as np

from scipy import signal, stats
import matplotlib.pyplot as plt
import csv
import pandas as pd
import itertools as iter
import math

In [2]:
#read dataset
csv_file = 'mros-visit1-dataset-0.3.0.csv'
df = pd.read_csv(csv_file, dtype={"nsrrid": str, "poordi4": int}, low_memory=False)
AHI = df[['nsrrid', 'poordi4']] #select only two columns

#select only 50 patients from each group (normal/abnormal)
n_data = 100
group1 = AHI[AHI.poordi4 < 5][0:n_data/2] #group1 : normal
group1['class'] = 0
group2 = AHI[AHI.poordi4 >= 30][0:n_data/2] #group2 : OSA patient
group2['class'] = 1
all_data = pd.concat([group1, group2])

In [3]:
# notch function applied from what using in SSVEP
def notch_filter(ecg):
    nyq = 0.5 * sampling_rate
    low = 60 / nyq
    high = 61 / nyq
    order = 2
    b, a = signal.butter(order, [low, high], btype='band')
    return signal.lfilter(b, a, ecg)

In [4]:
#set params
sampling_rate = 512 #number of samplings per second
second_hour_start = sampling_rate * 60 * 60
fifth_hour_end = sampling_rate * 60 * 60 * 5
length = fifth_hour_end-second_hour_start
ECGs = np.zeros(shape=(n_data, length + 1))

In [None]:
## get ECGs ##
for i, it in enumerate(iter.izip(all_data['nsrrid'], all_data['class'])):
    try:
        #one loop per subject
        print 'round', i
        f_name = "../mros/polysomnography/edfs/visit1/mros-visit1-" + it[0].lower() + ".edf"
        f = pyedflib.EdfReader(f_name)

        '''
        #Uncomment to see shape of data
        if i == 0:
            n = f.signals_in_file
            signal_labels = f.getSignalLabels()
            print "This file includes", n, "signals :"
            print "(list of signal : number of samples in each channel)"
            for i, s in enumerate(signal_labels):
                print '\t', s, ':', f.getNSamples()[i]
        '''

        #select only ECG
        ecg_L = f.readSignal(9)

        #select only data from 2nd to 5th hours
        ecg_L_4hrs = ecg_L[second_hour_start : fifth_hour_end]
  
        #bandpass filter
        filtered_signal = notch_filter(ecg_L_4hrs)
        
        #add all patients' ECGs and class to numpy array
        ECGs[i] = np.append(filtered_signal, it[1])
        
        '''
        #write selected period of signal to file
        fw_name = "./ECGs/visit1/mros-visit1-" + it[0].lower() + "-4hours.edf"
        np.save(fw_name, np.array(filtered_signal))
        '''
        
        print "Select ECG data only from 2nd to 5th hours =", length, filtered_signal.shape, "samplings from all", len(ecg_L), "samplings."
    
    except Exception, e:
        print 'FAIL at', f_name, e
    finally:
        f._close()
    
print ECGs.shape

In [17]:
def get_ecg(pid, start, duration, subject_type = 'normal'):
    # get ECG from edf file
    f_name = "../mros/polysomnography/edfs/visit1/mros-visit1-" + pid.lower() + ".edf"
    try:
        f = pyedflib.EdfReader(f_name)

        #select only ECG
        ecg_L = f.readSignal(9)
        
        #filter signal

    except Exception, e:
        print 'Cannot read EDF file.', e
    finally:
        f._close()

    # apnea typically lasts 20 to 40 seconds (can up to more than 1 min)
    # try to select events which occur in comparable duration
    hypopnea_event_start = sampling_rate * int(start)
    hypopnea_event_end = sampling_rate * int((start + duration))

    print "Getting ECG from subject #", pid, "(", subject_type.upper() , ")"
    print "Start:", hypopnea_event_start, "End:", hypopnea_event_end, "duration:", duration
    
    ecg_L = ecg_L[hypopnea_event_start: hypopnea_event_end]

    #Log result's stat values
    print "S.D.:", round(np.std(ecg_L),4)
    print
    
    return ecg_L

In [19]:
test_list = [{
                'pid' : 'aa0027',
                'start' : 24241.6,
                'duration' : 40,
                'subject_type' : 'normal',
                'type' : 'hypopnea'
            },{
                'pid' : 'aa0029',
                'start' : 35564.2,
                'duration' : 40,
                'subject_type' : 'osa',
                'type' : 'hypopnea'
            },{
                'pid' : 'aa0027',
                'start' : 24585,
                'duration' : 18,
                'subject_type' : 'normal',
                'type' : 'hypopnea'
            },{
                'pid' : 'aa0029',
                'start' : 30749.3,
                'duration' : 18,
                'subject_type' : 'osa',
                'type' : 'hypopnea'
            },{
                'pid' : 'aa0027',
                'start' : 23605.2,
                'duration' : 22,
                'subject_type' : 'normal',
                'type' : 'hypopnea'
            },{
                'pid' : 'aa0029',
                'start' : 33527.1,
                'duration' : 22,
                'subject_type' : 'osa',
                'type' : 'hypopnea'
            }
            ,{
                'pid' : 'aa0027',
                'start' : 30286.2,
                'duration' : 22,
                'subject_type' : 'normal',
                'type' : 'obstuctive apnea'
            },{
                'pid' : 'aa0029',
                'start' : 38630.8,
                'duration' : 21.1,
                'subject_type' : 'osa',
                'type' : 'central apnea'
            }] #add more test case here

for t in test_list:
    ecg_L = get_ecg(t['pid'], t['start'], t['duration'], t['subject_type'])

Getting ECG from subject # aa0027 ( NORMAL )
Start: 12411392 End: 12431872 duration: 40
S.D.: 0.1892

Getting ECG from subject # aa0029 ( OSA )
Start: 18208768 End: 18229248 duration: 40
S.D.: 0.1623

Getting ECG from subject # aa0027 ( NORMAL )
Start: 12587520 End: 12596736 duration: 18
S.D.: 0.1701

Getting ECG from subject # aa0029 ( OSA )
Start: 15743488 End: 15752704 duration: 18
S.D.: 0.1633

Getting ECG from subject # aa0027 ( NORMAL )
Start: 12085760 End: 12097024 duration: 22
S.D.: 0.1925

Getting ECG from subject # aa0029 ( OSA )
Start: 17165824 End: 17177088 duration: 22
S.D.: 0.1582

Getting ECG from subject # aa0027 ( NORMAL )
Start: 15506432 End: 15517696 duration: 22
S.D.: 0.2051

Getting ECG from subject # aa0029 ( OSA )
Start: 19778560 End: 19789312 duration: 21.1
S.D.: 0.1595

