In [5]:
import pyedflib
import numpy as np

from scipy import signal
import matplotlib.pyplot as plt
import csv
import pandas as pd
import itertools as iter

In [6]:
#read dataset
csv_file = 'mros-visit1-dataset-0.3.0.csv'
df = pd.read_csv(csv_file, dtype={"nsrrid": str, "poordi4": int}, low_memory=False)
AHI = df[['nsrrid', 'poordi4']] #select only two columns

#select only 50 patients from each group (normal/abnormal)
n_data = 100
group1 = AHI[AHI.poordi4 < 5][0:n_data/2] #group1 : normal
group1['class'] = 0
group2 = AHI[AHI.poordi4 >= 30][0:n_data/2] #group2 : OSA patient
group2['class'] = 1
all_data = pd.concat([group1, group2])

In [None]:
# notch function applied from what using in SSVEP
def notch_filter(ecg):
    nyq = 0.5 * sampling_rate
    low = 60 / nyq
    high = 61 / nyq
    order = 2
    b, a = signal.butter(order, [low, high], btype='band')
    return signal.lfilter(b, a, ecg)

In [11]:
## get ECGs ##

#set params
sampling_rate = 512 #number of samplings per second
second_hour_start = sampling_rate * 60 * 60
fifth_hour_end = sampling_rate * 60 * 60 * 5
length = fifth_hour_end-second_hour_start
ECGs = np.zeros(shape=(n_data, length))

for i, it in enumerate(iter.izip(all_data['nsrrid'], all_data['class'])):
    try:
        print 'round', i
        f_name = "../mros/polysomnography/edfs/visit1/mros-visit1-" + it[0].lower() + ".edf"
        f = pyedflib.EdfReader(f_name)

        '''
        #Uncomment to see shape of data
        if i == 0:
            n = f.signals_in_file
            signal_labels = f.getSignalLabels()
            print "This file includes", n, "signals :"
            print "(list of signal : number of samples in each channel)"
            for i, s in enumerate(signal_labels):
                print '\t', s, ':', f.getNSamples()[i]
        '''

        #select only ECG
        ecg_L = f.readSignal(9)

        #select only data from 2nd to 5th hours
        ecg_L_4hrs = ecg_L[second_hour_start : fifth_hour_end]
  
        #bandpass filter
        filtered_signal = notch_filter(ecg_L_4hrs)
        
        #add all patients' ECGs and class to numpy array
        ECGs[i] = filtered_signal + it[1]
        
        '''
        #write selected period of signal to file
        fw_name = "./ECGs/visit1/mros-visit1-" + it[0].lower() + "-4hours.edf"
        np.save(fw_name, np.array(filtered_signal))
        '''
        
        print "Select ECG data only from 2nd to 5th hours =", length, filtered_signal.shape, "samplings from all", len(ecg_L), "samplings."
    
    except Exception, e:
        print 'FAIL at', f_name, e
    finally:
        f._close()
    
print ECGs.shape

round 0
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 16128000 samplings.
round 1
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 22256640 samplings.
round 2
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 19046400 samplings.
round 3
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 20259840 samplings.
round 4
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 16204800 samplings.
round 5
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 22195200 samplings.
round 6
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 19184640 samplings.
round 7
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 22256640 samplings.
round 8
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 23040000 samplings.
r

Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 17617920 samplings.
round 75
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 17971200 samplings.
round 76
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 20044800 samplings.
round 77
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 32271360 samplings.
round 78
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 19722240 samplings.
round 79
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 19691520 samplings.
round 80
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 17817600 samplings.
round 81
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 22609920 samplings.
round 82
Select ECG data only from 2nd to 5th hours = 7372800 (7372800,) samplings from all 22256640 samplings.
r