In [None]:
import pandas as pd
import numpy as np
import wfdb
from wfdb.io.convert.edf import read_edf,rdedfann
import torch
from scipy.io import loadmat
from scipy.signal import resample
import mne
from mne.io import concatenate_raws, read_raw_edf
import os

os.chdir('')
device = torch.device('cuda:0' if torch.cuda.is_available() else  'cpu')

### [CPSC 2019](http://2019.icbeb.org/Challenge.html)
- 500Hz

In [None]:
# load .mat files
dataset_path = '../Data'
dataset_name = 'cpsc_2019'
fs=500
root_path = os.path.join(dataset_path,dataset_name,'train/data')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for filename in os.listdir(root_path):
    file_path = os.path.join(root_path, filename)
    data = loadmat(file_path)
    signal = data['ecg']
    record_name = filename[:-4]
    wfdb.wrsamp(record_name, fs=fs, units=['mV'], sig_name=['ECG'], p_signal=signal,write_dir=save_path)

    ann = loadmat(os.path.join(dataset_path,dataset_name,'train/ref',f'R_{filename[5:]}'))
    qrs_positions = ann['R_peak'][:,0]
    wfdb.wrann(record_name, extension="atr",sample=qrs_positions,symbol=["N"] * len(qrs_positions),write_dir=save_path)   


### [CPSC 2020](http://icbeb2020.pastconf.com/CSPC2020)
- 400Hz

In [None]:
# load .mat files
dataset_path = '../Data'
dataset_name = 'cpsc_2020'
fs=400
root_path = os.path.join(dataset_path,dataset_name,'TrainingSet/data')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for filename in os.listdir(root_path):
    file_path = os.path.join(root_path, filename)
    data = loadmat(file_path)
    signal = data['ecg']
    record_name = filename[:-4]
    wfdb.wrsamp(record_name, fs=fs, units=['mV'], sig_name=['ECG'], p_signal=signal,write_dir=save_path)

    ann = loadmat(os.path.join(dataset_path,dataset_name,'TrainingSet/ref',f'R{filename[1:]}'))
    qrs_positions = ann['ref'][0][0]
    list1_combined = list(zip(qrs_positions[0][:,0], ["S"] * len(qrs_positions[0][:,0])))
    list2_combined = list(zip(qrs_positions[1][:,0], ["V"] * len(qrs_positions[1][:,0])))
    qrs_positions = list1_combined+ list2_combined
    qrs_positions = sorted(qrs_positions, key=lambda x: x[0])
    qrs_positions = np.array(qrs_positions)

    wfdb.wrann(f'{record_name}', extension="atr",sample=qrs_positions[:,0].astype(int),symbol=qrs_positions[:,1].astype(str),write_dir=save_path)


### ADFECGDB
1000Hz

In [None]:
# load .mat files
dataset_path = '../Data/DetectionDB'
dataset_name = 'ADFECGDB'
fs=1000
root_path = os.path.join(dataset_path,dataset_name,'data')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for filename in os.listdir(root_path):
    if filename[-4:] != '.edf':
        continue
    file_path = os.path.join(root_path, filename)
    data = read_raw_edf(file_path)
    sig_name = data._raw_extras[0]['ch_names']
    units = data._raw_extras[0]['units'].astype(str).tolist()
    signal = data[sig_name][0].T
    record_name = filename[:-4]
    wfdb.wrsamp(record_name, fs=fs, units=units, sig_name=sig_name, p_signal=signal,write_dir=save_path)

    qrs_positions = mne.events_from_annotations(data)[0][:,0]
    wfdb.wrann(f'{record_name}', extension="atr",sample=qrs_positions.astype(int),symbol=np.full(qrs_positions.shape,"N").astype(str),write_dir=save_path)


### NIFECGDB
1000Hz

In [None]:
# load .mat files
dataset_path = '../Data/DetectionDB'
dataset_name = 'NIFECGDB'
fs=1000
root_path = os.path.join(dataset_path,dataset_name,'data')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for filename in os.listdir(root_path):
    if filename[-4:] != '.edf':
        continue
    file_path = os.path.join(root_path, filename)
    data = read_raw_edf(file_path)
    sig_name = data._raw_extras[0]['ch_names']
    units = data._raw_extras[0]['units'].astype(str).tolist()
    signal = data[sig_name][0].T
    record_name = filename[:-4]
    wfdb.wrsamp(record_name, fs=fs, units=units, sig_name=sig_name, p_signal=signal,write_dir=save_path)

    qrs_positions = mne.events_from_annotations(data)[0][:,0]
    wfdb.wrann(f'{record_name}', extension="atr",sample=qrs_positions.astype(int),symbol=np.full(qrs_positions.shape,"N").astype(str),write_dir=save_path)


In [None]:
sig_name

### BA-LABOUR
500Hz
FECG 1kHz

In [None]:
# load .mat files
dataset_path = '../Data/DetectionDB'
dataset_name = 'BA-LABOUR'
fs=500
root_path = os.path.join(dataset_path,dataset_name,'data/B1_Pregnancy_dataset')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb/B1_Pregnancy_dataset')
if not os.path.exists(save_path):
    os.makedirs(save_path)

for i in range(1,11):
    root = os.path.join(root_path, f'B1_Pregnancy_{str(i).zfill(2)}')
    signal = pd.read_csv(os.path.join(root, f'B1_abSignals_{str(i).zfill(2)}.txt'),sep='\t', header=None)
    signal = signal.map(lambda x: str(x).replace(",", ".") if isinstance(x, str) else x)
    signal = signal.values.astype(float)
    sig_name = [f'Abdomen_{i}' for i in range(4)]+[f'indirect_fecg_{i}' for i in range(4)]
    record_name = f'B1_Pregnancy_{str(i).zfill(2)}'
    wfdb.wrsamp(record_name, fs=fs, units=['mV']*(len(sig_name)), sig_name=sig_name, p_signal=signal,write_dir=save_path)

    fetal_qrs_positions = pd.read_csv(os.path.join(root, f'B1_Fetal_R_{str(i).zfill(2)}.txt'),sep='\t', header=None).values.astype(int)[:,0]
    wfdb.wrann(f'{record_name}_Fetal', extension="atr",sample=fetal_qrs_positions,symbol=np.full(fetal_qrs_positions.shape,"N").astype(str),write_dir=save_path)

    maternal_qrs_positions = pd.read_csv(os.path.join(root, f'B1_Maternal_R_{str(i).zfill(2)}.txt'),sep='\t', header=None).values.astype(int)[:,0]
    wfdb.wrann(f'{record_name}_Maternal', extension="atr",sample=maternal_qrs_positions,symbol=np.full(maternal_qrs_positions.shape,"N").astype(str),write_dir=save_path)


In [None]:
from scipy.signal import resample

# load .mat files
dataset_path = '../Data/DetectionDB'
dataset_name = 'BA-LABOUR'
fs=500
root_path = os.path.join(dataset_path,dataset_name,'data/B2_Labour_dataset')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb/B2_Labour_dataset')
if not os.path.exists(save_path):
    os.makedirs(save_path)

for i in range(1,11):
    root = os.path.join(root_path, f'B2_Labour_{str(i).zfill(2)}')
    signal = pd.read_csv(os.path.join(root, f'B2_abSignals_{str(i).zfill(2)}.txt'),sep='\t', header=None)
    signal = signal.map(lambda x: str(x).replace(",", ".") if isinstance(x, str) else x)
    signal = signal.values.astype(float)
    fecg_signal = pd.read_csv(os.path.join(root, f'B2_dFECG_{str(i).zfill(2)}.txt'),sep='\t', header=None).map(lambda x: str(x).replace(",", ".") if isinstance(x, str) else x).values.astype(float)
    # resample to 500Hz
    fecg_signal = resample(fecg_signal,num=len(signal))
    signal = np.concatenate([signal,fecg_signal],axis=1)
    sig_name = [f'Abdomen_{i}' for i in range(4)]+[f'indirect_fecg_{i}' for i in range(4)] + ['raw_fecg', 'fecg']
    record_name = f'B2_Labour_{str(i).zfill(2)}'
    wfdb.wrsamp(record_name, fs=fs, units=['mV']*(len(sig_name)), sig_name=sig_name, p_signal=signal,write_dir=save_path)

    fetal_qrs_positions = pd.read_csv(os.path.join(root, f'B2_Fetal_R_{str(i).zfill(2)}.txt'),sep='\t', header=None).values.astype(int)[:,0]//2
    wfdb.wrann(f'{record_name}_Fetal', extension="atr",sample=fetal_qrs_positions,symbol=np.full(fetal_qrs_positions.shape,"N").astype(str),write_dir=save_path)

    maternal_qrs_positions = pd.read_csv(os.path.join(root, f'B2_Maternal_R_{str(i).zfill(2)}.txt'),sep='\t', header=None).values.astype(int)[:,0]
    wfdb.wrann(f'{record_name}_Maternal', extension="atr",sample=maternal_qrs_positions,symbol=np.full(maternal_qrs_positions.shape,"N").astype(str),write_dir=save_path)


### SensSmartTech 

In [None]:
# load .mat files
dataset_path = '../Data/GenerationDB'
dataset_name = 'SensSmartTech'
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
file_set = set(map(lambda x:x[:-8],os.listdir(os.path.join(dataset_path,dataset_name,'data','WFDB'))))

for file_name in file_set:
    
    ecg_data = wfdb.rdrecord(os.path.join(dataset_path,dataset_name,'data','WFDB',file_name+'_ecg'))
    ppg_data = wfdb.rdrecord(os.path.join(dataset_path,dataset_name,'data','WFDB',file_name+'_ppg'))
    pcg_data = wfdb.rdrecord(os.path.join(dataset_path,dataset_name,'data','WFDB',file_name+'_pcg'))
    sig_name = ecg_data.sig_name+ppg_data.sig_name+pcg_data.sig_name
    p_signals = np.concat([ecg_data.p_signal,ppg_data.p_signal,pcg_data.p_signal],axis=1)

    wfdb.wrsamp(file_name, fs=ecg_data.fs, units=ecg_data.units+ppg_data.units+pcg_data.units, sig_name=sig_name,comments=ecg_data.comments, p_signal=p_signals,write_dir=save_path)

In [None]:
p_signals.shape

### DALIA
- PPG: 64Hz
- ECG: 700Hz

In [78]:
dataset_path = '../Data/GenerationDB'
dataset_name = 'DALIA'
ecg_fs = 700
ppg_fs = 64
std_fs = 100
root_path = os.path.join(dataset_path,dataset_name,'PPG_FieldStudy')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for i in range(1,16):
    file_name = f'S{i}'
    file_path = os.path.join(root_path,file_name,f'{file_name}.pkl')
    data = pd.read_pickle(file_path)
    ppg = data['signal']['wrist']['BVP']
    ecg = data['signal']['chest']['ECG']
    std_length = int(ecg.shape[0]//ecg_fs*std_fs)
    resampled_ecg = resample(ecg, std_length)
    resampled_ppg = resample(ppg, std_length)
    p_signals = np.concat([resampled_ecg,resampled_ppg],axis=1)
    
    wfdb.wrsamp(file_name, fs=std_fs,units=['',''], sig_name=['ECG','PPG'], p_signal=p_signals,write_dir=save_path)


### WESAD
- PPG: 64Hz
- ECG: 700Hz

In [86]:
dataset_path = '../Data/GenerationDB'
dataset_name = 'WESAD'
ecg_fs = 700
ppg_fs = 64
std_fs = 100
root_path = os.path.join(dataset_path,dataset_name,'data')
save_path = os.path.join(dataset_path,dataset_name,'train_wfdb')
if not os.path.exists(save_path):
    os.mkdir(save_path)

for file_name in os.listdir(root_path):
    file_path = os.path.join(root_path,file_name,f'{file_name}.pkl')
    data = pd.read_pickle(file_path)
    ppg = data['signal']['wrist']['BVP']
    ecg = data['signal']['chest']['ECG']
    std_length = int(ecg.shape[0]//ecg_fs*std_fs)
    resampled_ecg = resample(ecg, std_length)
    resampled_ppg = resample(ppg, std_length)
    p_signals = np.concat([resampled_ecg,resampled_ppg],axis=1)
    
    wfdb.wrsamp(file_name, fs=std_fs,units=['',''], sig_name=['ECG','PPG'], p_signal=p_signals,write_dir=save_path)
