In [1]:
import numpy as np
import biosppy
import scipy.io as sio
from scipy.io import savemat
import matplotlib.pyplot as plt
import pandas as pd
import random
import glob, os
import re
from tqdm import tqdm

In [2]:
def plot(signal):
    freq = 300
    secs = 10
    time = np.arange(signal.size) / freq
    plt.plot(time, signal)
    plt.show()

In [3]:
def split(list_a, chunk_size):
    for i in range(0, len(list_a), chunk_size):
        yield list(list_a[i:i + chunk_size])

In [4]:
def synth_signal(signal_id):
    signal = sio.loadmat('training2017/'+signal_id+'.mat')["val"][0]
    S = biosppy.signals.ecg.gamboa_segmenter(signal=signal, sampling_rate=300.0, tol=0.002)

    # identify the 6-peaks patches and their spacers in between
    peaks = [(i[0],i[-1]) for i in split(S[0],6)]
    spacers = [(peaks[i][-1],peaks[i+1][0]) for i in range(len(peaks)-1)]

    # identify head and tail
    head = (0,peaks[0][0])
    tail = (peaks[-1][-1],len(signal))

    # shuffle the order of peaks and spacers
    idx = list(range(len(peaks)))
    random.shuffle(idx)
    peaks = [peaks[i] for i in idx]

    idx = list(range(len(spacers)))
    random.shuffle(idx)
    spacers = [spacers[i] for i in idx]

    # alternate the shuffled peaks and spacers
    core = [None]*(len(peaks)+len(spacers))
    core[::2] = peaks
    core[1::2] = spacers

    # reconstruct the signal
    head = signal[head[0]:head[1]]
    core = np.concatenate([signal[i[0]:i[1]] for i in core]).ravel()
    tail = signal[tail[0]:tail[1]]
    synth_signal = np.concatenate([head,core,tail])
    
    return(synth_signal)

In [5]:
errors = ['A00055', 'A00307', 'A00585', 'A00944', 'A01246', 'A01259', 'A01550', 'A01585', 'A02390', 'A02505', 'A03103', 'A03275', 'A03443', 'A03468', 'A03552', 'A03965', 'A04137', 'A04170', 'A04346', 'A04644', 'A04701', 'A05305', 'A05992', 'A06092', 'A06741', 'A06897', 'A07070', 'A07136', 'A07139', 'A07154', 'A07213', 'A08086', 'A08402']

files = glob.glob('syntheticECGs/*')
for f in files:
    os.remove(f)

df = pd.read_csv('physionet_images/training2017_labels.csv', names=['mat', 'label'])
df = df[~df['mat'].isin(errors)]

np.random.seed(2022)
A = random.choices(df[df['label']=='A']['mat'].to_list(),k=4312) # random choice with replacement
np.random.seed(2022)
O = random.choices(df[df['label']=='O']['mat'].to_list(),k=2594)
np.random.seed(2022)
R = random.choices(df[df['label']=='O']['mat'].to_list(),k=4766)

def generate_synth(signal_id, lab):
    tag = str(len([i for i in filter(lambda x: re.search(signal_id, x), glob.glob('syntheticECGs/*'))]))
    synth = synth_signal(signal_id)
    mdic = {"val":synth ,"label":lab}
    savemat('syntheticECGs/' + signal_id + '.' + tag + '.mat', mdic)

for signal_id in tqdm(A, total=len(A)):
    generate_synth(signal_id, "A")

for signal_id in tqdm(O, total=len(O)):
    generate_synth(signal_id, "O")

for signal_id in tqdm(R, total=len(R)):
    generate_synth(signal_id, "~")

100%|██████████| 4312/4312 [00:28<00:00, 150.17it/s]
100%|██████████| 2594/2594 [00:38<00:00, 67.24it/s]
100%|██████████| 4766/4766 [01:51<00:00, 42.67it/s]


In [7]:
lst = []
df = pd.read_csv('physionet_images/training2017_labels.csv', names=['mat', 'label'])
try:
    os.remove('syntheticECGs/REFERENCE.csv')
except:
    pass
for i in tqdm(map(os.path.basename,glob.glob('syntheticECGs/*'))):
    mat = i.replace('.mat','')
    label = df[df['mat'] == mat.split('.')[0]]['label'].values[0]
    lst.append([mat,label])
df2 = pd.DataFrame.from_records(lst)
df2.columns = ['mat','label']
ref = pd.concat([df,df2])
ref = ref.sort_values(by=['mat'])
ref.to_csv('syntheticECGs/REFERENCE.csv',header=None,index=None)

11672it [00:07, 1642.19it/s]
