In [13]:
import os
import wfdb
import numpy as np
import pandas as pd
from scipy import io
from skimage import transform
from scipy.ndimage import zoom

In [15]:
def resample_data(
    sigbufs, channel_labels, fs, target_fs, channels=8, channel_stoi=None,skimage_transform=True,
    interpolation_order=3
):
    channel_labels = [c.lower() for c in channel_labels]
    #https://github.com/scipy/scipy/issues/7324 zoom issues
    factor = target_fs/fs
    timesteps_new = int(len(sigbufs)*factor)
    if(channel_stoi is not None):
        data = np.zeros((timesteps_new, channels), dtype=np.float32)
        for i,cl in enumerate(channel_labels):
            if(cl in channel_stoi.keys() and channel_stoi[cl]<channels):
                if(skimage_transform):
                    data[:,channel_stoi[cl]]=transform.resize(sigbufs[:,i],(timesteps_new,),order=interpolation_order).astype(np.float32)
                else:
                    data[:,channel_stoi[cl]]=zoom(sigbufs[:,i],timesteps_new/len(sigbufs),order=interpolation_order).astype(np.float32)
    else:
        if(skimage_transform):
            data=transform.resize(sigbufs,(timesteps_new,channels),order=interpolation_order).astype(np.float32)
        else:
            data=zoom(sigbufs,(timesteps_new/len(sigbufs),1),order=interpolation_order).astype(np.float32)
    return data

In [19]:
datadir = '/home/dxng/datasets/PTB-XL/data/'
path = '/home/dxng/datasets/PTB-XL/data/ptbxl_database.csv'
df = pd.read_csv(path, index_col='ecg_id')
print(df.shape)
# display(df.head())
print(df.loc[1, 'filename_lr'])
filepath = os.path.join(datadir, df.loc[1, 'filename_lr'])
sigbufs, header = wfdb.rdsamp(filepath)
channel_stoi_default = {
    "i": 0, "ii": 1, "v1":2, "v2":3, "v3":4, "v4":5, "v5":6, "v6":7, "iii":8, "avr":9, "avl":10, 
    "avf":11, "vx":12, "vy":13, "vz":14
}
channels = 12
target_fs = 100
data = resample_data(
    sigbufs=sigbufs, channel_stoi=channel_stoi_default, channel_labels=header['sig_name'],
    fs=header['fs'],target_fs=target_fs,channels=channels,skimage_transform=True
)
print(data.shape)
start_idx = 0
chunk_length = 250
stride = chunk_length // 4
idx_start = list(range(start_idx, 250, stride))
print(idx_start)

(21837, 27)
records100/00000/00001_lr
(1000, 12)
[0, 62, 124, 186, 248]


In [7]:
path_label = '/home/dxng/datasets/PTB-XL/data/scp_statements.csv'
label_df = pd.read_csv(path_label)
print(label_df.shape)
display(label_df.head())

(71, 13)


Unnamed: 0.1,Unnamed: 0,description,diagnostic,form,rhythm,diagnostic_class,diagnostic_subclass,Statement Category,SCP-ECG Statement Description,AHA code,aECG REFID,CDISC Code,DICOM Code
0,NDT,non-diagnostic T abnormalities,1.0,1.0,,STTC,STTC,other ST-T descriptive statements,non-diagnostic T abnormalities,,,,
1,NST_,non-specific ST changes,1.0,1.0,,STTC,NST_,Basic roots for coding ST-T changes and abnorm...,non-specific ST changes,145.0,MDC_ECG_RHY_STHILOST,,
2,DIG,digitalis-effect,1.0,1.0,,STTC,STTC,other ST-T descriptive statements,suggests digitalis-effect,205.0,,,
3,LNGQT,long QT-interval,1.0,1.0,,STTC,STTC,other ST-T descriptive statements,long QT-interval,148.0,,,
4,NORM,normal ECG,1.0,,,NORM,NORM,Normal/abnormal,normal ECG,1.0,,,F-000B7
