In [39]:
import csv
import numpy
import sys
import wfdb
from wfdb import processing

ann_str_to_num  = {
    '(AFIB\x00': 0,
    '(ASYS\x00': 1,
    '(B\x00': 2,
    '(BI\x00': 3,
    '(HGEA\x00': 4,
    '(N\x00': 5,
    '(NSR\x00': 5,
    '(NOD\x00': 6,
    '(NOISE\x00': 7,
    '(PM\x00': 8,
    '(SBR\x00': 9,
    '(SVTA\x00': 10,
    '(VER\x00': 11,
    '(VF\x00': 12,
    '(VFIB\x00': 12,
    '(VFL\x00': 13,
    '(VT\x00': 14
}


VFDB_LOCATION = './vfdb/'
QRS_LOCATION = './vfdb-qrs/'
VFDB_RECORDS = [418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428,
                429, 430, 602, 605, 607, 609, 610, 611, 612, 614, 615]

def load_qrs_i(record):
    """Returns a list with the positions of Rs that pan_tompkins returned for
       a specific record.
    """
    filepath = QRS_LOCATION + record + '_1s_i.txt'
    qrs_i = numpy.genfromtxt(filepath, delimiter=',')
    return qrs_i

def load_qrs_amp(record):
    """Returns a list with the amp values that pan_tompkins returned for
       a specific record.
    """
    filepath = QRS_LOCATION + record + '_1s_amp.txt'
    qrs_amp = numpy.genfromtxt(filepath, delimiter=',')
    return qrs_amp

def get_annotation(record):
    """Returns the annotatation object for a given record
    """
    record_path = VFDB_LOCATION + record
    ann = wfdb.rdann(record_path, 'atr')
    return ann

def get_record(record):
    """Returns the annotatation object for a given record
    """
    record_path = VFDB_LOCATION + record
    record = wfdb.rdrecord(record_path)
    return record

def get_annotation_positions(ann):
    """ Takes an annotation object and returns a list of tuples:
        [
        ('<idx if annotaion position>', 'annotations aux_label'),
        ('<idx if annotaion position>', 'annotations aux_label'),
        ...
        ('<idx if annotaion position>', 'annotations aux_label')
        ]
    """
    return list(zip(ann.sample.tolist(), ann.aux_note))

def create_qrs_annotation_mapping(qrs_i, qrs_amp, ann_pos):
    """Assign annotations to every R peak.

    :param qrs_i: list with the r positions
    :param qrs_amp: amp values in the qrs_i positions
    :param ann_pos: list of tuples with the position of every annotation

    returns a ndarray (number of R, 3) Every row will contain the
            index of the R pick, the amp of the peak and the annotation
            as number. See the dict in the beggining of this file for the
            mapping.
            [
                [index_of_R_peak, value_of_amp, annotation_id],
                ...
                [index_of_R_peak, value_of_amp, annotation_id]
            ]
    """
    # From 0 to the first annotation set the annotation to normal.
    ann_pos = [(0, '(N\x00')] + ann_pos
    # number of annotation
    len_ann_pos = len(ann_pos)
    # Number of R peaks
    len_qrs_i = len(qrs_i)
    # Initialize a zeros array for the result
    res = numpy.zeros((len_qrs_i, 3))
    # For every annotation loop the R peaks
    i = 0
    while i < len_ann_pos:
        # Take the current annotation string and its position in the siganl
        pos, ann = ann_pos[i]
        # Take the posotion of the next annotation
        ann = ann_str_to_num[ann]
        # In the case of the last annotation loop until the end of the R peaks
        # array.
        if i == len_ann_pos - 1:
            next_pos = qrs_i[qrs_i.size - 1] + 1
        else:
            next_pos, next_ann = ann_pos[i + 1]
        # For evert peak:
        x = 0
        while x < len_qrs_i:
            # If the R index is after the index of the annotation and
            # before the next annotation (ot the end of the R array)
            # set the annotation string
            if pos <= qrs_i[x] and qrs_i[x] < next_pos:
                res[x][0] = qrs_i[x]
                res[x][1] = qrs_amp[x]
                # if the annotation is NOISE a.k.a 7, set the previous
                # annotation
                if ann == 7 and pos != 0:
                    ann = ann_str_to_num[ann_pos[i - 1][1]]
                res[x][2] = ann
            x = x +1
        i = i + 1

    return res

def create_rr_annotation(annotated_r_peaks):
    """Create RR segments and annotate them.

    :param qrs_i: np.array with the r positions
    :param ann_pos: list of tuples with the position of every annotation

    returns a length(qrs_i) by 4 array which looks like this:
    [
        [start_or_RR_segment, end_or_RR_segnemnt, RR_distane (aka end_or_RR_segnemnt - start_or_RR_segment), annotation_id]
        ...
        [start_or_RR_segment, end_or_RR_segnemnt, RR_distane (aka end_or_RR_segnemnt - start_or_RR_segment), annotation_id]
    ]
    """
    res = numpy.zeros((annotated_r_peaks[:,0].size - 1, 4))
    # Set the start of the RR segments
    # Take all R indices
    all_r_idx = annotated_r_peaks[:,0]
    # Take the annotations for all Rs
    all_annots = annotated_r_peaks[:,2]

    # For the start R take from the first to the second to last
    # position 0 to -1
    start_r = all_r_idx[0:-1]
    # For the end R take from the second to the end, position 1 to end
    end_r = all_r_idx[1:all_r_idx.size]
    if start_r.size != end_r.size:
        raise Exception("start_r and end_r have diffrent sizes.")
    # The 1st column is the start R peak
    res[:,0] = start_r
    # The 2nd column is the end R peak
    res[:,1] = end_r
    # The 3rd column is the difference of end - start Rs
    res[:,2] = end_r - start_r
    # The 4th column is the annotation of the RR segment, defined by the
    # annotation of the end R.
    res[:,3] = all_annots[1:all_annots.size]

    return res

def save_rr_segment_to_csv(record_id):
    """
    Takes a record id from vfdb and writes the RR segment with its annotation
    to a csv file.

    :param record_id: The record ID eg 418
    """

    qrs_i = load_qrs_i(record_id)
    #print type(qrs_i)
    qrs_amp = load_qrs_amp(record_id)
    ann = get_annotation(record_id)
    annot_positions = get_annotation_positions(ann)
    annotated_r_peaks = create_qrs_annotation_mapping(qrs_i, qrs_amp, annot_positions)
    annotated_rr_segments = create_rr_annotation(annotated_r_peaks)
    # numpy.set_printoptions(suppress=True)
    # print annotated_rr_segments
    numpy.savetxt(QRS_LOCATION + record_id + '_RR.txt', annotated_rr_segments, delimiter=",")


In [5]:
ann_str_to_num  = {
    '(AFIB\x00': 0,
    '(ASYS\x00': 1,
    '(B\x00': 2,
    '(BI\x00': 3,
    '(HGEA\x00': 4,
    '(N\x00': 5,
    '(NSR\x00': 5,
    '(NOD\x00': 6,
    '(NOISE\x00': 7,
    '(PM\x00': 8,
    '(SBR\x00': 9,
    '(SVTA\x00': 10,
    '(VER\x00': 11,
    '(VF\x00': 12,
    '(VFIB\x00': 12,
    '(VFL\x00': 13,
    '(VT\x00': 14
}

ann_num_to_str = {}
for k, v in ann_str_to_num.items():
    if v == 5:
        k = '(N\x00'
    elif v == 12:
        k = '(VF\x00'
    ann_num_to_str[v] = k
for item in ann_num_to_str.items():
    print (item)

(0, '(AFIB\x00')
(1, '(ASYS\x00')
(2, '(B\x00')
(3, '(BI\x00')
(4, '(HGEA\x00')
(5, '(N\x00')
(6, '(NOD\x00')
(7, '(NOISE\x00')
(8, '(PM\x00')
(9, '(SBR\x00')
(10, '(SVTA\x00')
(11, '(VER\x00')
(12, '(VF\x00')
(13, '(VFL\x00')
(14, '(VT\x00')


In [9]:
print (VFDB_RECORDS)

[418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 602, 605, 607, 609, 610, 611, 612, 614, 615]


In [12]:
import numpy
print ("\nrecord \t num_of_RR")
for record in VFDB_RECORDS:
    filepath = QRS_LOCATION + str(record) + '_RR.csv'
    rr = numpy.genfromtxt(filepath, delimiter=',')
    print (record, '\t', rr[:,0].size)


record 	 num_of_RR
418 	 4300
419 	 3905
420 	 2923
421 	 4525
422 	 4072
423 	 3537
424 	 3795
425 	 2128
426 	 3774
427 	 3894
428 	 3265
429 	 3613
430 	 4695
602 	 3307
605 	 1921
607 	 1847
609 	 2214
610 	 2166
611 	 3947
612 	 2489
614 	 3329
615 	 1592


In [14]:
# Find all different labels in every annotation
import pprint

records = [str(x) for x in VFDB_RECORDS]
for record in records:
    ann = get_annotation(record)
    #print record
    #pprint.pprint(set(ann.aux_note))
    labels = set(ann.aux_note)
    print (record, labels)

418 {'(N\x00', '(VFL\x00'}
419 {'(NOISE\x00', '(N\x00', '(VFL\x00'}
420 {'(NOISE\x00', '(N\x00', '(VT\x00'}
421 {'(NOISE\x00', '(N\x00', '(VT\x00'}
422 {'(NOISE\x00', '(N\x00', '(VT\x00', '(VFIB\x00'}
423 {'(VT\x00', '(NOD\x00', '(N\x00', '(AFIB\x00', '(ASYS\x00', '(NOISE\x00'}
424 {'(NOD\x00', '(VFL\x00', '(NSR\x00', '(N\x00', '(VFIB\x00', '(ASYS\x00', '(NOISE\x00'}
425 {'(NOISE\x00', '(N\x00', '(B\x00', '(VT\x00'}
426 {'(VT\x00', '(VF\x00', '(N\x00', '(SVTA\x00', '(NOISE\x00'}
427 {'(N\x00', '(VT\x00', '(ASYS\x00'}
428 {'(NOISE\x00', '(VT\x00', '(BI\x00'}
429 {'(NOISE\x00', '(VFL\x00', '(VT\x00', '(BI\x00'}
430 {'(SBR\x00', '(VT\x00', '(BI\x00', '(VER\x00', '(VFL\x00', '(VF\x00', '(HGEA\x00', '(ASYS\x00', '(NOISE\x00'}
602 {'(VT\x00', '(PM\x00', '(N\x00', '(ASYS\x00', '(NOISE\x00'}
605 {'(NOISE\x00', '(VT\x00'}
607 {'(SVTA\x00', '(VT\x00', '(NOD\x00', '(ASYS\x00'}
609 {'(VT\x00', '(VER\x00', '(VFL\x00', '(N\x00', '(AFIB\x00', '(HGEA\x00'}
610 {'(NOISE\x00', '(N\x00', '(VT\x00', '(HGE

In [44]:
from collections import Counter
import numpy
import pprint
#import vfdb_annotate_r

records = [str(x) for x in VFDB_RECORDS]
#records = ['418']
# each segment will have 100 RR
SEGMENT_LEN = 100

for record in records:
    filepath = QRS_LOCATION + str(record) + '_RR.csv'
    rrs = numpy.genfromtxt(filepath, delimiter=',')
    rows, columns = rrs.shape
    num_segments = rows / (SEGMENT_LEN / 2)
    i = 0
    l = []
    while i < num_segments:
        start = int(i * (SEGMENT_LEN / 2))
        if i == num_segments - 1:
            end = rows - 1
        else:
            end = start + SEGMENT_LEN
        # start is the idx of the first RR of this segment
        # end is the idx of the last RR in this segment
        
        labels = rrs[start:end, 3]
        data = Counter(labels)
        most_frequent = max(data)
        if data[most_frequent] >= 0.8 * SEGMENT_LEN:
            l.append(ann_num_to_str[most_frequent])
        else:
            l.append("NO_LABEL")
                
        #print [ann_num_to_str[x] for x in list(set(labels))]            
        i += 1
    print ("record", record, "has", num_segments, "segments", Counter(l))

record 418 has 86.0 segments Counter({'NO_LABEL': 49, '(N\x00': 37})
record 419 has 78.1 segments Counter({'(VFL\x00': 34, '(N\x00': 26, 'NO_LABEL': 19})
record 420 has 58.46 segments Counter({'(N\x00': 31, '(VT\x00': 25, 'NO_LABEL': 3})
record 421 has 90.5 segments Counter({'(N\x00': 57, '(VT\x00': 19, 'NO_LABEL': 15})
record 422 has 81.44 segments Counter({'(N\x00': 44, '(VF\x00': 24, '(VT\x00': 10, 'NO_LABEL': 4})
record 423 has 70.74 segments Counter({'(N\x00': 23, '(VT\x00': 23, '(AFIB\x00': 19, 'NO_LABEL': 6})
record 424 has 75.9 segments Counter({'(N\x00': 58, 'NO_LABEL': 11, '(VF\x00': 5, '(VFL\x00': 2})
record 425 has 42.56 segments Counter({'(N\x00': 27, 'NO_LABEL': 9, '(B\x00': 5, '(VT\x00': 2})
record 426 has 75.48 segments Counter({'(VF\x00': 42, '(N\x00': 15, 'NO_LABEL': 15, '(VT\x00': 4})
record 427 has 77.88 segments Counter({'(VT\x00': 44, '(ASYS\x00': 16, '(N\x00': 15, 'NO_LABEL': 3})
record 428 has 65.3 segments Counter({'(BI\x00': 56, 'NO_LABEL': 8, '(VT\x00': 2})
r

In [42]:
qrs_i = load_qrs_i(record_id)
#print type(qrs_i)
qrs_amp = load_qrs_amp(record_id)
ann = get_annotation(record_id)
annot_positions = get_annotation_positions(ann)
annotated_r_peaks = create_qrs_annotation_mapping(qrs_i, qrs_amp, annot_positions)
annotated_rr_segments = create_rr_annotation(annotated_r_peaks)
print(annotated_rr_segments)

[[  2.40000000e+01   1.28000000e+02   1.04000000e+02   5.00000000e+00]
 [  1.28000000e+02   2.61000000e+02   1.33000000e+02   5.00000000e+00]
 [  2.61000000e+02   3.45000000e+02   8.40000000e+01   5.00000000e+00]
 ..., 
 [  5.24600000e+05   5.24730000e+05   1.30000000e+02   5.00000000e+00]
 [  5.24730000e+05   5.24860000e+05   1.30000000e+02   5.00000000e+00]
 [  5.24860000e+05   5.24990000e+05   1.30000000e+02   5.00000000e+00]]
