In [260]:
import numpy as np
import pandas as pd
import wfdb
from matplotlib import pyplot as plt
from preprocess import *
from sampling import *
%matplotlib inline

In [36]:
records = wfdb.get_record_list('mitdb')
directory = 'data/'
sample_rate = 360

### Store signals, annotation positions and annotation symbols in dictionaries

In [92]:
# signal data

SignalData = {}

for record in records:    
    signal, field = wfdb.rdsamp(directory + record)
    for channel in [0,1]:
        SignalData[record + ' ' + field['sig_name'][channel]] = signal[:, channel]

In [96]:
# annotation data

AnnPos, AnnSym = {}, {}

for record in records:
    annotation = wfdb.rdann(directory + record, 'atr')
    AnnPos[record] = annotation.sample
    AnnSym[record] = annotation.symbol

### Prepare segmentation positions
Segmentation positions are the middle points between 2 neighboring beats. They serve as the starting positions of the following beats. For the first beat in a record, the starting position is set to be 0.

In [None]:
SegPos = segmentation(AnnPos) # Segpos is a dictionary that contains all the segmentation data.

### Remove noise from the signal

In [3]:
from preprocess import scale, denoise

In [102]:
for key in SignalData.keys():
    
    SignalData[key] = scale(denoise(SignalData[key]))

### Sample the training and testing data

Sample the data with a rolling window.

Each training dataset contains: 
    1. signals collected in a feature derivation window (FDW)
    2. beat annotations in a forecast window (FW)
    And there is a time gap between FDW and FW.
The goal is to use 1 to predict 2.

In [246]:
# some parameters
split_pos = 450000 # the position used for splitting training and testing dataset.
FDW_width, FW_width, gap_width = 10, 1, 0  # widths(in number of beats) of the feature derivation window, forecast window, and the gap in between.
delay = 10 # distance(in number of beats) between two consequential FDWs.

In [257]:
TrainData, TrainLabel, TestData, TestLabel = build_training_testing_dataset(SignalData, SegPos, AnnSym, 
                                   split_pos, FDW_width, FW_width, gap_width, delay)