In [3]:
import os
import csv
import wfdb
import heartpy as hp
import pandas as pd
import numpy as np
from scipy.signal import resample, butter, filtfilt

In [4]:
def funcBPFilter(ppg_signal, order, f1, f2, fs):
    fc = [f1, f2]
    b, a = butter(order, fc, btype='bandpass', analog=False, output='ba', fs=fs)
    signalBP = filtfilt(b, a, ppg_signal)
    return signalBP

In [5]:
subjects = pd.read_csv(r"D:\Proga\AML\datasets\Autonomic_Aging\subject-info.csv",dtype = {"ID":str})
subjects.dropna(inplace=True)
subjects = subjects.reset_index()
subjects = subjects[["ID","Age_group"]]
subjects

Unnamed: 0,ID,Age_group
0,0001,2.0
1,0002,2.0
2,0003,7.0
3,0004,4.0
4,0005,2.0
...,...,...
1091,1117,3.0
1092,1118,8.0
1093,1119,2.0
1094,1120,3.0


In [6]:
ids = subjects["ID"].values
age_groups = subjects["Age_group"].values

print(len(ids))
print(len(age_groups))
print(ids[len(ids)-1])
print(age_groups[len(age_groups)-1])

1096
1096
1121
3.0


In [7]:
result = []
result_metadata = []
result_signals = {}

target_fs = 1000
five_minute_samples = 300 * target_fs
new_id = 0

for id in ids:
    new_id += 1 
    try:
        record = wfdb.rdrecord('D:\\Proga\\AML\\datasets\\Autonomic_Aging\\' + id)

        channel_index = record.sig_name.index('NIBP')
        signal = record.p_signal[:, channel_index].astype(np.float32)
        fs = record.fs

        if fs != target_fs:
            num_samples = int(len(signal) * target_fs / fs)
            signal = resample(signal, num_samples).astype(np.float32)
            fs = target_fs

        signal = signal[:five_minute_samples]

        signal = funcBPFilter(signal, order=2, f1=0.5, f2=4.0, fs=fs)
        
        try:
            wd, m = hp.process(signal, fs)
            idx = int(subjects[subjects["ID"] == id].index.to_list()[0])
            age_group = age_groups[idx]
            
            result_metadata.append({
                'id': new_id,
                'age_group': age_group,
                **m
            })
            
            result_signals[new_id] = signal
            
            print((new_id, id, age_group, signal, m))

        except hp.exceptions.BadSignalWarning as e:
            result_metadata.append({
                'id': new_id,
                'age_group': None,
                'error': "BadSignalWarning"
            })
            print((new_id, id, None, None, None, "BadSignalWarning"))

    except FileNotFoundError as e:
        result_metadata.append({
            'id': new_id,
            'age_group': None,
            'error': "FileNotFound"
        })
        print((new_id, id, None, None, None, "FileNotFound"))

    print(f"Process end: {id}")

(1, '0001', 2.0, array([-3.44318253, -3.4652588 , -3.48715573, ..., -0.06583556,
       -0.0558075 , -0.04675162]), {'bpm': 70.03786851420155, 'ibi': 856.6794117647058, 'sdnn': 57.48800529323622, 'sdsd': 34.19853757699058, 'rmssd': 54.74403994874611, 'pnn20': 0.7177177177177178, 'pnn50': 0.33933933933933935, 'hr_mad': 37.0, 'sd1': 38.708971849155795, 'sd2': 70.68649254898368, 's': 8596.030374671744, 'sd1/sd2': 0.5476148335176145, 'breathingrate': 0.13333333333333333})
Process end: 0001
(2, '0002', 2.0, array([ 1.06345403,  1.06206593,  1.06086265, ..., -0.00775216,
       -0.00656295, -0.00549129]), {'bpm': 65.71499387061984, 'ibi': 913.0336391437309, 'sdnn': 72.72798446730887, 'sdsd': 44.26448137095001, 'rmssd': 76.39953907290375, 'pnn20': 0.8159509202453987, 'pnn50': 0.5429447852760736, 'hr_mad': 51.0, 'sd1': 54.02085535632682, 'sd2': 87.3501293081049, 's': 14824.323420432522, 'sd1/sd2': 0.6184404738060808, 'breathingrate': 0.13333333333333333})
Process end: 0002
(3, '0003', 7.0, arr

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
  interp = UnivariateSpline(x, rrlist, k=3)


(4, '0004', 4.0, array([ 1.20169226e+01,  1.19567010e+01,  1.18945512e+01, ...,
       -4.96777070e-03, -4.17833467e-03, -3.47333123e-03]), {'bpm': 62.72575575823778, 'ibi': 956.5448717948718, 'sdnn': 53.73198481026203, 'sdsd': 21.058419113018463, 'rmssd': 35.21007236773987, 'pnn20': 0.6045016077170418, 'pnn50': 0.1607717041800643, 'hr_mad': 32.0, 'sd1': 24.895978626280534, 'sd2': 71.74090944290808, 's': 5611.073440205071, 'sd1/sd2': 0.34702624791915876, 'breathingrate': 0.2})
Process end: 0004
(5, '0005', 2.0, array([ 2.06434814,  2.01608705,  1.96731721, ..., -0.00919237,
       -0.0077573 , -0.00646917]), {'bpm': 65.20256262789734, 'ibi': 920.2092307692308, 'sdnn': 77.23251153413193, 'sdsd': 76.29787619723439, 'rmssd': 113.08152854805877, 'pnn20': 0.8858024691358025, 'pnn50': 0.654320987654321, 'hr_mad': 47.0, 'sd1': 79.96067265612626, 'sd2': 74.60793525114237, 's': 18741.801455458062, 'sd1/sd2': 1.0717448805809424, 'breathingrate': 0.26666666666666666})
Process end: 0005
(6, '0006'

  signal = record.p_signal[:, channel_index].astype(np.float32)


Process end: 0065
(63, '0066', 2.0, array([6.96258721, 6.83597938, 6.70797723, ..., 0.08973887, 0.07574253,
       0.06317922]), {'bpm': 73.01815060915509, 'ibi': 821.7134986225896, 'sdnn': 45.618498370121344, 'sdsd': 21.83891524833771, 'rmssd': 34.82981513157151, 'pnn20': 0.56353591160221, 'pnn50': 0.13535911602209943, 'hr_mad': 31.0, 'sd1': 24.628119603519764, 'sd2': 59.6823717780475, 's': 4617.715798882831, 'sd1/sd2': 0.41265316490955095, 'breathingrate': 0.13333333333333333})
Process end: 0066
(64, '0067', 2.0, array([ 5.08345672e+00,  5.00254770e+00,  4.92072905e+00, ...,
       -6.77790032e-03, -5.69796004e-03, -4.73401449e-03]), {'bpm': 67.55420923709987, 'ibi': 888.1755952380952, 'sdnn': 124.83365884487272, 'sdsd': 69.74843086995982, 'rmssd': 99.2601436599957, 'pnn20': 0.7880597014925373, 'pnn50': 0.4955223880597015, 'hr_mad': 116.49999999999989, 'sd1': 70.1841268034094, 'sd2': 161.78366962867676, 's': 35671.671150621514, 'sd1/sd2': 0.43381465486902887, 'breathingrate': 0.1})
P

  result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


(547, '0559', 4.0, array([6.41773820e+01, 6.40856598e+01, 6.39838200e+01, ...,
       6.74420557e-03, 5.68196273e-03, 4.73115107e-03]), {'bpm': 54.350063820150694, 'ibi': 1103.9545454545455, 'sdnn': 0.27146171010862, 'sdsd': masked, 'rmssd': nan, 'pnn20': nan, 'pnn50': nan, 'hr_mad': 0.0, 'sd1': nan, 'sd2': nan, 's': nan, 'sd1/sd2': nan, 'breathingrate': 0.1})
Process end: 0559
(548, '0560', 8.0, array([-9.40841645, -9.46058226, -9.51248184, ...,  0.01923279,
        0.01646661,  0.01392648]), {'bpm': 72.86230429546367, 'ibi': 823.4710743801653, 'sdnn': 33.262808849595295, 'sdsd': 32.612326170629345, 'rmssd': 38.01457266373818, 'pnn20': 0.3259668508287293, 'pnn50': 0.03038674033149171, 'hr_mad': 17.0, 'sd1': 26.88029391014818, 'sd2': 38.6539565588164, 's': 3264.2081135084622, 'sd1/sd2': 0.6954086024608309, 'breathingrate': 0.23333333333333334})
Process end: 0560
(549, '0561', 4.0, array([-4.33140701, -4.35729255, -4.3833924 , ..., -0.00929807,
       -0.00781992, -0.00649987]), {'bpm':

In [None]:
df_metadata = pd.DataFrame(result_metadata)

metadata_path = 'D:\\Proga\\AML\\PPG_dataset\\dataset.csv'

df_metadata.to_csv(metadata_path, index=False)

In [None]:
df_signals = pd.DataFrame(result_signals)

signals_path = 'D:\\Proga\\AML\\PPG_dataset\\signals.csv'

df_signals.to_csv(signals_path, index=False)