In [1]:
import scipy.io
import numpy as np
from scipy.signal import find_peaks

In [2]:
# Load in the .mat data files
# Retrieved from https://archive.ics.uci.edu/ml/datasets/Cuff-Less+Blood+Pressure+Estimation

sample_file1 = scipy.io.loadmat(f'data/part_{1}.mat')
part_1 = sample_file1['p'][0]
sample_file2 = scipy.io.loadmat(f'data/part_{2}.mat')
part_2 = sample_file2['p'][0]
sample_file3 = scipy.io.loadmat(f'data/part_{3}.mat')
part_3 = sample_file3['p'][0]
sample_file4 = scipy.io.loadmat(f'data/part_{4}.mat')
part_4 = sample_file4['p'][0]
sample_file5 = scipy.io.loadmat(f'data/part_{5}.mat')
part_5 = sample_file5['p'][0]
sample_file6 = scipy.io.loadmat(f'data/part_{6}.mat')
part_6 = sample_file6['p'][0]
sample_file7 = scipy.io.loadmat(f'data/part_{7}.mat')
part_7 = sample_file7['p'][0]
sample_file8 = scipy.io.loadmat(f'data/part_{8}.mat')
part_8 = sample_file8['p'][0]
sample_file9 = scipy.io.loadmat(f'data/part_{9}.mat')
part_9 = sample_file9['p'][0]
sample_file10 = scipy.io.loadmat(f'data/part_{10}.mat')
part_10 = sample_file10['p'][0]
sample_file11 = scipy.io.loadmat(f'data/part_{11}.mat')
part_11 = sample_file11['p'][0]
sample_file12 = scipy.io.loadmat(f'data/part_{12}.mat')
part_12 = sample_file12['p'][0]

In [5]:
# Combine all the sequences being used
# These have been selected based on the absence of artifacts and irregularities

data = np.concatenate(
    (part_1[200:500],
     part_2[200:400],
     part_3[200:400],
     part_4[200:400],
     part_5[:309], part_5[311:700],
     part_6[100:300], part_6[800:1000],
     part_8[0:800],
     part_7[0:400],
     part_9[200:800],
     part_10[0:200],
     part_11[400:900],
     part_12[0:200]),
     axis=0)
print(data.shape)

(299,)


In [6]:
# Initialize arrays for segments of each signal
ECGs = []
PPGs = []
ABPs = []

# Iterate through each group of signals in the data
for j, segment in enumerate(data):
    PPG_sec = []
    ABP_sec = []
    ECG_sec = []

    # Split each group into PPG, ABP, and ECG signals
    PPG = segment[0]
    ABP = segment[1]
    ECG = segment[2]

    # Find peaks in the PPG signal to divide each sequence into 2 heartbeat intervals
    peaks, _ = find_peaks(np.concatenate(([min(PPG)],PPG,[min(PPG)])), prominence=0.3, distance=50)
    peaks = peaks-1
    peaks2 = peaks[::2]

    # Add each 2 hearbeat sequence to the list of signals from that recording
    for i, el in enumerate(peaks2):
        if i+1 < len(peaks2):
            ECG_sec.append(ECG[peaks2[i]:peaks2[i+1]])
            PPG_sec.append(PPG[peaks2[i]:peaks2[i+1]])
            ABP_sec.append(ABP[peaks2[i]:peaks2[i+1]])

    # Add each list of recordings to the signals
    ECGs.append(ECG_sec)
    PPGs.append(PPG_sec)
    ABPs.append(ABP_sec)

# Convert each signal to a numpy array
ECGs = np.array(ECGs)
PPGs = np.array(PPGs)
ABPs = np.array(ABPs)

In [7]:
def pre_process(signal, max_len=256):
    del_inds = {}
    for i, sec in enumerate(signal):
        for j, seq in enumerate(sec):
            # If sequence is longer than 256, cut it to a length of 256
            if max_len - len(seq) < 0:
                seq = np.resize(seq[0:max_len],(max_len,1))
            # Zero-pad each sequence so that it is 256 samples long
            else:
                seq = np.resize(np.pad(seq, (0,max_len-len(seq))),(max_len,1))
            # Replace old sequence with resized sequence
            sec[j] = seq

        # Compute the mean for each sample point in a segment 
        means = np.mean(sec,axis=0)
        means = [item[0] for item in means]
        
        # Compute the error between each sequence and the segment mean
        errs = []
        for seq in sec:
            err = np.mean((np.reshape(seq, (256,))-means)**2)
            errs.append(err)
        
        # Note the index for each sequence that is one standard deviation away from the segment mean
        # Many of these segments are anomalies and will be removed in the next step
        temp_inds = []
        for j, err in enumerate(errs):
            if err > (np.mean(errs) + np.std(errs)):
                temp_inds.append(j)

        del_inds[i] = temp_inds
        
    return signal, del_inds

In [8]:
# Process each signal
ECGs, ECG_inds = pre_process(ECGs)
PPGs, PPG_inds = pre_process(PPGs)
ABPs, ABP_inds = pre_process(ABPs)

In [9]:
# Reshape signals into #,256,1 matrix after removing anomalies
PPG_seq = np.empty((0,256,1))
ECG_seq = np.empty((0,256,1))
ABP_seq = np.empty((0,256,1))

for i in range(len(PPGs)):
    # Get unique anomalies combined from each signal
    rmv = list(set(PPG_inds[i] + ECG_inds[i] + ABP_inds[i]))
    
    # Remove anomalies
    newPPG = np.delete(PPGs[i], rmv, axis=0)
    newECG = np.delete(ECGs[i], rmv, axis=0)
    newABP = np.delete(ABPs[i], rmv, axis=0)
    
    # Append to matrix
    PPG_seq = np.concatenate((PPG_seq, newPPG))
    ECG_seq = np.concatenate((ECG_seq, newECG))
    ABP_seq = np.concatenate((ABP_seq, newABP))

In [10]:
print(PPG_seq.shape,ECG_seq.shape,ABP_seq.shape)

(61938, 256, 1) (61938, 256, 1) (61938, 256, 1)


In [11]:
# Save Data
np.save("PPG_test1.npy",PPG_seq)
np.save("ECG_test1.npy",ECG_seq)
np.save("ABP_test1.npy",ABP_seq)