In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import platform
import random
import uuid
import os
import os.path
import skimage
import utils
import utils.wavelet
import utils.data
import utils.data.augmentation
import numpy as np
import scipy as sp
import scipy.signal
import pandas as pd
import networkx
import networkx.algorithms.approximation
import wfdb
import json
import tqdm
import matplotlib.pyplot as plt
from scipy.stats import norm
from utils.signal import StandardHeader

# Data loader to un-clutter code    
def load_data(filepath):
    dic = dict()
    with open(filepath) as f:
        text = list(f)
    for line in text:
        line = line.replace(' ','').replace('\n','').replace(',,','')
        if line[-1] == ',': line = line[:-1]
        head = line.split(',')[0]
        tail = line.split(',')[1:]
        if tail == ['']:
            tail = np.asarray([])
        else:
            tail = np.asarray(tail).astype(int)

        dic[head] = tail
    return dic


def trailonset(sig,on):
    on = on-sig[0]
    off = on-sig[0]+sig[-1]
    sig = sig+np.linspace(on,off,sig.size)
    
    return sig

def getcorr(segments):
    if len(segments) > 0:
        length = 2*max([segments[i][2].size for i in range(len(segments))])
    else:
        return np.zeros((0,0))

    corr = np.zeros((len(segments),len(segments)))

    for i in range(len(segments)):
        for j in range(len(segments)):
            if i != j:
                if segments[i][2].size != segments[j][2].size:
                    if segments[i][2].size != 1:
                        x1 = sp.interpolate.interp1d(np.linspace(0,1,len(segments[i][2])),segments[i][2])(np.linspace(0,1,length))
                    else:
                        x1 = np.full((length,),segments[i][2][0])
                    if segments[j][2].size != 1:
                        x2 = sp.interpolate.interp1d(np.linspace(0,1,len(segments[j][2])),segments[j][2])(np.linspace(0,1,length))
                    else:
                        x2 = np.full((length,),segments[j][2][0])
                else:
                    x1 = segments[i][2]
                    x2 = segments[j][2]
                if (x1.size == 1) and (x2.size == 1):
                    corr[i,j] = 1
                else:
                    c,_ = utils.signal.xcorr(x1,x2)
                    corr[i,j] = np.max(np.abs(c))
            else:
                corr[i,j] = 1
                
    return corr

def getdelete(segments, threshold):
    corr = getcorr(segments)
    
    index_delete = []
    
    for i in range(corr.shape[0]):
        if i in index_delete:
            continue
        for j in range(corr.shape[1]):
            if j == i:
                continue
            if corr[i,j] > threshold:
                if j not in index_delete:
                    index_delete.append(j)
                
    return index_delete
    

# Define directories

In [3]:
if platform.system() in ['Linux', 'Linux2']:
    basedir = '/media/guille/DADES/DADES/Delineator'
else:
    basedir = r'C:\Users\Emilio\Documents\DADES\DADES\Delineator'

# Load LUDB

In [4]:
dataset = {}
Pon = {}
Ppeak = {}
Poff = {}
QRSon = {}
QRSpeak = {}
QRSoff = {}
Ton = {}
Tpeak = {}
Toff = {}
group = {}

for i in tqdm.tqdm(range(200)):
    (signal, header) = wfdb.rdsamp(os.path.join(basedir,'ludb','{}'.format(i+1)))
    sortOrder = np.where(np.array([x.upper() for x in header['sig_name']])[:,None] == StandardHeader)[1]
    signal = signal[:,sortOrder]
    if header['fs'] != 500:
        print(header['fs'])
    signal = sp.signal.decimate(signal,2,axis=0)
    
    # 1st step: reduce noise
    signal = sp.signal.filtfilt(*sp.signal.butter(4,   0.5/250., 'high'),signal.T).T
    signal = sp.signal.filtfilt(*sp.signal.butter(4, 125.0/250.,  'low'),signal.T).T

    # 2nd step: retrieve onsets and offsets
    for j in range(len(StandardHeader)):
        lead = StandardHeader[j]
        name = str(i+1)+"_"+lead
        ann = wfdb.rdann(os.path.join(basedir,'ludb','{}'.format(i+1)),'atr_{}'.format(lead.lower()))
        dataset[name] = signal[:,j]
        
        locP = np.where(np.array(ann.symbol) == 'p')[0]
        if len(locP) != 0:
            if locP[0]-1 < 0:
                locP = locP[1:]
            if locP[-1]+1 == len(ann.sample):
                locP = locP[:-1]
        Pon[name] = ann.sample[locP-1]//2
        Ppeak[name] = ann.sample[locP]//2
        Poff[name] = ann.sample[locP+1]//2

        locQRS = np.where(np.array(ann.symbol) == 'N')[0]
        if len(locQRS) != 0:
            if locQRS[0]-1 < 0:
                locQRS = locQRS[1:]
            if locQRS[-1]+1 == len(ann.sample):
                locQRS = locQRS[:-1]
        QRSon[name] = ann.sample[locQRS-1]//2
        QRSpeak[name] = ann.sample[locQRS]//2
        QRSoff[name] = ann.sample[locQRS+1]//2

        locT = np.where(np.array(ann.symbol) == 't')[0]
        if len(locT) != 0:
            if locT[0]-1 < 0:
                locT = locT[1:]
            if locT[-1]+1 == len(ann.sample):
                locT = locT[:-1]
        Ton[name] = ann.sample[locT-1]//2
        Tpeak[name] = ann.sample[locT]//2
        Toff[name] = ann.sample[locT+1]//2
        
        # Store group
        group[name] = str(i+1)

dataset = pd.DataFrame(dataset)

100%|██████████| 200/200 [00:17<00:00, 11.22it/s]


In [5]:
threshold = 0.99

PsignalLUDB = {}
PQsignalLUDB = {}
QRSsignalLUDB = {}
STsignalLUDB = {}
TsignalLUDB = {}
TPsignalLUDB = {}

PgroupLUDB = {}
PQgroupLUDB = {}
QRSgroupLUDB = {}
STgroupLUDB = {}
TgroupLUDB = {}
TPgroupLUDB = {}

for k in tqdm.tqdm(dataset.keys()):
    # Buggy files
    if k in (['116_{}'.format(h) for h in StandardHeader] + 
             ['104_{}'.format(h) for h in StandardHeader] + 
             ['103_III',]):
        continue
    pon = Pon.get(k,np.array([]))
    pof = Poff.get(k,np.array([]))
    qon = QRSon.get(k,np.array([]))
    qof = QRSoff.get(k,np.array([]))
    ton = Ton.get(k,np.array([]))
    tof = Toff.get(k,np.array([]))
    
    unordered_samples = np.concatenate([pon,pof,qon,qof,ton,tof,]).astype(float)
    unordered_symbols = np.concatenate([['Pon']*pon.size,['Poff']*pof.size,
                                        ['QRSon']*qon.size,['QRSoff']*qof.size,
                                        ['Ton']*ton.size,['Toff']*tof.size,])
    # Sort fiducials taking logical orders if same sample of occurrence
    # There is (I'm definitely sure) a better way to do it
    samples = []
    symbols = []
    for i in range(unordered_samples.size):
        minimum = np.where(unordered_samples == min(unordered_samples))[0]
        if minimum.size == 1:
            minimum = minimum[0]
            samples.append(int(unordered_samples[minimum]))
            symbols.append(unordered_symbols[minimum])
            unordered_samples[minimum] = np.inf
        elif minimum.size == 2:
            if symbols[-1] == 'Pon':
                if unordered_symbols[minimum[0]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'QRSon':
                if unordered_symbols[minimum[0]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'Ton':
                if unordered_symbols[minimum[0]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            else:
                raise ValueError("Should not happen at all")
        else:
            raise ValueError("Definitely should not happen. Check file {}".format(k))
    samples = np.array(samples)
    symbols = np.array(symbols)
    
    # Extract segments
    P = []
    QRS = []
    T = []
    TP = []
    PQ = []
    ST = []

    # Extract segments
    for i in range(samples.size-1):
        if samples[i] == samples[i+1]:
            continue
        if symbols[i] == 'Pon':
            if symbols[i+1] == 'Poff':
                P.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. P onset not followed by offset".format(k))
        elif symbols[i] == 'QRSon':
            if symbols[i+1] == 'QRSoff':
                QRS.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. QRS onset not followed by offset".format(k))
        elif symbols[i] == 'Ton':
            if symbols[i+1] == 'Toff':
                T.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. T onset not followed by offset".format(k))
        elif symbols[i] == 'Poff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                PQ.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'QRSoff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                ST.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'Toff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        else:
            raise ValueError("This should definitely not happen")

    # Filter out too similar segments
    corr = getcorr(P)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    P = [P[i] for i in range(len(P)) if i in nodesclique]

    corr = getcorr(QRS)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    QRS = [QRS[i] for i in range(len(QRS)) if i in nodesclique]

    corr = getcorr(T)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    T = [T[i] for i in range(len(T)) if i in nodesclique]

    corr = getcorr(TP)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    TP = [TP[i] for i in range(len(TP)) if i in nodesclique]

    corr = getcorr(PQ)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    PQ = [PQ[i] for i in range(len(PQ)) if i in nodesclique]

    corr = getcorr(ST)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    ST = [ST[i] for i in range(len(ST)) if i in nodesclique]
    
    # Store segments
    for i in range(len(P)):
        PsignalLUDB[k + '_' + str(i)] = P[i][2]
        PgroupLUDB[k + '_' + str(i)] = (P[i][0],P[i][1])
    for i in range(len(QRS)):
        QRSsignalLUDB[k + '_' + str(i)] = QRS[i][2]
        QRSgroupLUDB[k + '_' + str(i)] = (QRS[i][0],QRS[i][1])
    for i in range(len(T)):
        TsignalLUDB[k + '_' + str(i)] = T[i][2]
        TgroupLUDB[k + '_' + str(i)] = (T[i][0],T[i][1])
    for i in range(len(TP)):
        TPsignalLUDB[k + '_' + str(i)] = TP[i][2]
        TPgroupLUDB[k + '_' + str(i)] = (TP[i][0],TP[i][1])
    for i in range(len(PQ)):
        PQsignalLUDB[k + '_' + str(i)] = PQ[i][2]
        PQgroupLUDB[k + '_' + str(i)] = (PQ[i][0],PQ[i][1])
    for i in range(len(ST)):
        STsignalLUDB[k + '_' + str(i)] = ST[i][2]
        STgroupLUDB[k + '_' + str(i)] = (ST[i][0],ST[i][1])


100%|██████████| 2400/2400 [04:01<00:00,  9.93it/s]


In [6]:
print(len(PgroupLUDB))
print(len(PQgroupLUDB))
print(len(QRSgroupLUDB))
print(len(STgroupLUDB))
print(len(TgroupLUDB))
print(len(TPgroupLUDB))

13822
9193
11216
13567
12656
16565


# Load QT db

In [7]:
#### LOAD DATASETS ####
dataset             = pd.read_csv(os.path.join(basedir,'QTDB','Dataset.csv'), index_col=0)
dataset             = dataset.sort_index(axis=1)
labels              = np.asarray(list(dataset)) # In case no data augmentation is applied
description         = dataset.describe()
group               = {k: '_'.join(k.split('_')[:-1]) for k in dataset}

# Zero-center data
for key in description:
    dataset[key]    = (dataset[key] - description[key]['mean'])/description[key]['std']
    
# Filter the data
for col in dataset:
    dataset[col] = sp.signal.filtfilt(*sp.signal.butter(4,   0.5/250., 'high'),dataset[col].T).T
    dataset[col] = sp.signal.filtfilt(*sp.signal.butter(4, 125.0/250.,  'low'),dataset[col].T).T
    
# Load fiducials
Pon = load_data(os.path.join(basedir,'QTDB','PonNew.csv'))
Poff = load_data(os.path.join(basedir,'QTDB','PoffNew.csv'))
QRSon = load_data(os.path.join(basedir,'QTDB','QRSonNew.csv'))
QRSoff = load_data(os.path.join(basedir,'QTDB','QRSoffNew.csv'))
Ton = load_data(os.path.join(basedir,'QTDB','TonNew.csv'))
Toff = load_data(os.path.join(basedir,'QTDB','ToffNew.csv'))

In [8]:
threshold = 0.99

PsignalQTDB = {}
PQsignalQTDB = {}
QRSsignalQTDB = {}
STsignalQTDB = {}
TsignalQTDB = {}
TPsignalQTDB = {}

PgroupQTDB = {}
PQgroupQTDB = {}
QRSgroupQTDB = {}
STgroupQTDB = {}
TgroupQTDB = {}
TPgroupQTDB = {}

for k in tqdm.tqdm(dataset.keys()):
    # Buggy files
    if k in ['sel232_0', 'sel232_1']:
        continue
    pon = Pon.get(k,np.array([]))
    pof = Poff.get(k,np.array([]))
    qon = QRSon.get(k,np.array([]))
    qof = QRSoff.get(k,np.array([]))
    ton = Ton.get(k,np.array([]))
    tof = Toff.get(k,np.array([]))
    
    unordered_samples = np.concatenate([pon,pof,qon,qof,ton,tof,]).astype(float)
    unordered_symbols = np.concatenate([['Pon']*pon.size,['Poff']*pof.size,
                                        ['QRSon']*qon.size,['QRSoff']*qof.size,
                                        ['Ton']*ton.size,['Toff']*tof.size,])
    # Sort fiducials taking logical orders if same sample of occurrence
    # There is (I'm definitely sure) a better way to do it
    samples = []
    symbols = []
    for i in range(unordered_samples.size):
        minimum = np.where(unordered_samples == min(unordered_samples))[0]
        if minimum.size == 1:
            minimum = minimum[0]
            samples.append(int(unordered_samples[minimum]))
            symbols.append(unordered_symbols[minimum])
            unordered_samples[minimum] = np.inf
        elif minimum.size == 2:
            if symbols[-1] == 'Pon':
                if unordered_symbols[minimum[0]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'QRSon':
                if unordered_symbols[minimum[0]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'Ton':
                if unordered_symbols[minimum[0]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            else:
                raise ValueError("Should not happen at all")
        else:
            raise ValueError("Definitely should not happen. Check file {}".format(k))
    samples = np.array(samples)
    symbols = np.array(symbols)
    
    # Extract segments
    P = []
    QRS = []
    T = []
    TP = []
    PQ = []
    ST = []

    for i in range(samples.size-1):
        if samples[i] == samples[i+1]:
            continue
        if symbols[i] == 'Pon':
            if symbols[i+1] == 'Poff':
                P.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. P onset not followed by offset".format(k))
        elif symbols[i] == 'QRSon':
            if symbols[i+1] == 'QRSoff':
                QRS.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. QRS onset not followed by offset".format(k))
        elif symbols[i] == 'Ton':
            if symbols[i+1] == 'Toff':
                T.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. T onset not followed by offset".format(k))
        elif symbols[i] == 'Poff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                PQ.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'QRSoff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                ST.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'Toff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        else:
            raise ValueError("This should definitely not happen")
            
    # Filter out too long TP segments (causing this to break)
    TP = [TP[i] for i in range(len(TP)) if TP[i][2].size < 250]

    # Filter out too similar segments
    corr = getcorr(P)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    P = [P[i] for i in range(len(P)) if i in nodesclique]

    corr = getcorr(QRS)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    QRS = [QRS[i] for i in range(len(QRS)) if i in nodesclique]

    corr = getcorr(T)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    T = [T[i] for i in range(len(T)) if i in nodesclique]

    corr = getcorr(TP)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    TP = [TP[i] for i in range(len(TP)) if i in nodesclique]

    corr = getcorr(PQ)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    PQ = [PQ[i] for i in range(len(PQ)) if i in nodesclique]

    corr = getcorr(ST)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    ST = [ST[i] for i in range(len(ST)) if i in nodesclique]
    
    # Store segments
    for i in range(len(P)):
        PsignalQTDB[k + '_' + str(i)] = P[i][2]
        PgroupQTDB[k + '_' + str(i)] = (P[i][0],P[i][1])
    for i in range(len(QRS)):
        QRSsignalQTDB[k + '_' + str(i)] = QRS[i][2]
        QRSgroupQTDB[k + '_' + str(i)] = (QRS[i][0],QRS[i][1])
    for i in range(len(T)):
        TsignalQTDB[k + '_' + str(i)] = T[i][2]
        TgroupQTDB[k + '_' + str(i)] = (T[i][0],T[i][1])
    for i in range(len(TP)):
        TPsignalQTDB[k + '_' + str(i)] = TP[i][2]
        TPgroupQTDB[k + '_' + str(i)] = (TP[i][0],TP[i][1])
    for i in range(len(PQ)):
        PQsignalQTDB[k + '_' + str(i)] = PQ[i][2]
        PQgroupQTDB[k + '_' + str(i)] = (PQ[i][0],PQ[i][1])
    for i in range(len(ST)):
        STsignalQTDB[k + '_' + str(i)] = ST[i][2]
        STgroupQTDB[k + '_' + str(i)] = (ST[i][0],ST[i][1])


100%|██████████| 210/210 [06:18<00:00,  1.80s/it]


In [9]:
print(len(PgroupQTDB))
print(len(PQgroupQTDB))
print(len(QRSgroupQTDB))
print(len(STgroupQTDB))
print(len(TgroupQTDB))
print(len(TPgroupQTDB))

4717
2672
1912
2732
2756
4710


# Load VT

In [None]:
#### LOAD DATASETS ####
dataset             = pd.read_csv(os.path.join(basedir,'QTDB','Dataset.csv'), index_col=0)
dataset             = dataset.sort_index(axis=1)
labels              = np.asarray(list(dataset)) # In case no data augmentation is applied
description         = dataset.describe()
group               = {k: '_'.join(k.split('_')[:-1]) for k in dataset}

# Zero-center data
for key in description:
    dataset[key]    = (dataset[key] - description[key]['mean'])/description[key]['std']
    
# Filter the data
for col in dataset:
    dataset[col] = sp.signal.filtfilt(*sp.signal.butter(4,   0.5/250., 'high'),dataset[col].T).T
    dataset[col] = sp.signal.filtfilt(*sp.signal.butter(4, 125.0/250.,  'low'),dataset[col].T).T
    
# Load fiducials
Pon = load_data(os.path.join(basedir,'QTDB','PonNew.csv'))
Poff = load_data(os.path.join(basedir,'QTDB','PoffNew.csv'))
QRSon = load_data(os.path.join(basedir,'QTDB','QRSonNew.csv'))
QRSoff = load_data(os.path.join(basedir,'QTDB','QRSoffNew.csv'))
Ton = load_data(os.path.join(basedir,'QTDB','TonNew.csv'))
Toff = load_data(os.path.join(basedir,'QTDB','ToffNew.csv'))

In [None]:
threshold = 0.99

PsignalQTDB = {}
PQsignalQTDB = {}
QRSsignalQTDB = {}
STsignalQTDB = {}
TsignalQTDB = {}
TPsignalQTDB = {}

PgroupQTDB = {}
PQgroupQTDB = {}
QRSgroupQTDB = {}
STgroupQTDB = {}
TgroupQTDB = {}
TPgroupQTDB = {}

for k in tqdm.tqdm(dataset.keys()):
    # Buggy files
    if k in ['sel232_0', 'sel232_1']:
        continue
    pon = Pon.get(k,np.array([]))
    pof = Poff.get(k,np.array([]))
    qon = QRSon.get(k,np.array([]))
    qof = QRSoff.get(k,np.array([]))
    ton = Ton.get(k,np.array([]))
    tof = Toff.get(k,np.array([]))
    
    unordered_samples = np.concatenate([pon,pof,qon,qof,ton,tof,]).astype(float)
    unordered_symbols = np.concatenate([['Pon']*pon.size,['Poff']*pof.size,
                                        ['QRSon']*qon.size,['QRSoff']*qof.size,
                                        ['Ton']*ton.size,['Toff']*tof.size,])
    # Sort fiducials taking logical orders if same sample of occurrence
    # There is (I'm definitely sure) a better way to do it
    samples = []
    symbols = []
    for i in range(unordered_samples.size):
        minimum = np.where(unordered_samples == min(unordered_samples))[0]
        if minimum.size == 1:
            minimum = minimum[0]
            samples.append(int(unordered_samples[minimum]))
            symbols.append(unordered_symbols[minimum])
            unordered_samples[minimum] = np.inf
        elif minimum.size == 2:
            if symbols[-1] == 'Pon':
                if unordered_symbols[minimum[0]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Poff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'QRSon':
                if unordered_symbols[minimum[0]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'QRSoff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            elif symbols[-1] == 'Ton':
                if unordered_symbols[minimum[0]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[0]]))
                    symbols.append(unordered_symbols[minimum[0]])
                    unordered_samples[minimum[0]] = np.inf
                elif unordered_symbols[minimum[1]] == 'Toff':
                    samples.append(int(unordered_samples[minimum[1]]))
                    symbols.append(unordered_symbols[minimum[1]])
                    unordered_samples[minimum[1]] = np.inf
            else:
                raise ValueError("Should not happen at all")
        else:
            raise ValueError("Definitely should not happen. Check file {}".format(k))
    samples = np.array(samples)
    symbols = np.array(symbols)
    
    # Extract segments
    P = []
    QRS = []
    T = []
    TP = []
    PQ = []
    ST = []

    for i in range(samples.size-1):
        if samples[i] == samples[i+1]:
            continue
        if symbols[i] == 'Pon':
            if symbols[i+1] == 'Poff':
                P.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. P onset not followed by offset".format(k))
        elif symbols[i] == 'QRSon':
            if symbols[i+1] == 'QRSoff':
                QRS.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. QRS onset not followed by offset".format(k))
        elif symbols[i] == 'Ton':
            if symbols[i+1] == 'Toff':
                T.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            else:
                print("Check file {}. T onset not followed by offset".format(k))
        elif symbols[i] == 'Poff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                PQ.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'QRSoff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                ST.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        elif symbols[i] == 'Toff':
            if symbols[i+1] == 'Pon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'QRSon':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] == 'Ton':
                TP.append((k,group[k],dataset[k][samples[i]:samples[i+1]].values))
            elif symbols[i+1] in ['Poff','QRSoff','Toff']:
                print("Check file {}. P offset not followed by onset".format(k))
        else:
            raise ValueError("This should definitely not happen")
            
    # Filter out too long TP segments (causing this to break)
    TP = [TP[i] for i in range(len(TP)) if TP[i][2].size < 250]

    # Filter out too similar segments
    corr = getcorr(P)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    P = [P[i] for i in range(len(P)) if i in nodesclique]

    corr = getcorr(QRS)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    QRS = [QRS[i] for i in range(len(QRS)) if i in nodesclique]

    corr = getcorr(T)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    T = [T[i] for i in range(len(T)) if i in nodesclique]

    corr = getcorr(TP)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    TP = [TP[i] for i in range(len(TP)) if i in nodesclique]

    corr = getcorr(PQ)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    PQ = [PQ[i] for i in range(len(PQ)) if i in nodesclique]

    corr = getcorr(ST)
    g = networkx.convert_matrix.from_numpy_matrix(corr < threshold)
    nodesclique = networkx.algorithms.approximation.max_clique(g)
    ST = [ST[i] for i in range(len(ST)) if i in nodesclique]
    
    # Store segments
    for i in range(len(P)):
        PsignalQTDB[k + '_' + str(i)] = P[i][2]
        PgroupQTDB[k + '_' + str(i)] = (P[i][0],P[i][1])
    for i in range(len(QRS)):
        QRSsignalQTDB[k + '_' + str(i)] = QRS[i][2]
        QRSgroupQTDB[k + '_' + str(i)] = (QRS[i][0],QRS[i][1])
    for i in range(len(T)):
        TsignalQTDB[k + '_' + str(i)] = T[i][2]
        TgroupQTDB[k + '_' + str(i)] = (T[i][0],T[i][1])
    for i in range(len(TP)):
        TPsignalQTDB[k + '_' + str(i)] = TP[i][2]
        TPgroupQTDB[k + '_' + str(i)] = (TP[i][0],TP[i][1])
    for i in range(len(PQ)):
        PQsignalQTDB[k + '_' + str(i)] = PQ[i][2]
        PQgroupQTDB[k + '_' + str(i)] = (PQ[i][0],PQ[i][1])
    for i in range(len(ST)):
        STsignalQTDB[k + '_' + str(i)] = ST[i][2]
        STgroupQTDB[k + '_' + str(i)] = (ST[i][0],ST[i][1])


# Merge databases

In [10]:
Psignal = {}
Pgroup = {}
PQsignal = {}
PQgroup = {}
QRSsignal = {}
QRSgroup = {}
STsignal = {}
STgroup = {}
Tsignal = {}
Tgroup = {}
TPsignal = {}
TPgroup = {}

Psignal.update(PsignalQTDB)
Pgroup.update(PgroupQTDB)
PQsignal.update(PQsignalQTDB)
PQgroup.update(PQgroupQTDB)
QRSsignal.update(QRSsignalQTDB)
QRSgroup.update(QRSgroupQTDB)
STsignal.update(STsignalQTDB)
STgroup.update(STgroupQTDB)
Tsignal.update(TsignalQTDB)
Tgroup.update(TgroupQTDB)
TPsignal.update(TPsignalQTDB)
TPgroup.update(TPgroupQTDB)

Psignal.update(PsignalLUDB)
Pgroup.update(PgroupLUDB)
PQsignal.update(PQsignalLUDB)
PQgroup.update(PQgroupLUDB)
QRSsignal.update(QRSsignalLUDB)
QRSgroup.update(QRSgroupLUDB)
STsignal.update(STsignalLUDB)
STgroup.update(STgroupLUDB)
Tsignal.update(TsignalLUDB)
Tgroup.update(TgroupLUDB)
TPsignal.update(TPsignalLUDB)
TPgroup.update(TPgroupLUDB)

In [11]:
print(len(Pgroup))
print(len(PQgroup))
print(len(QRSgroup))
print(len(STgroup))
print(len(Tgroup))
print(len(TPgroup))

18539
11865
13128
16299
15412
21275


# Delete too short or too long signals

In [12]:
# Signal lengths
Plength = {k: len(Psignal[k]) for k in Psignal.keys() if not isinstance(Psignal[k],float)}
PQlength = {k: len(PQsignal[k]) for k in PQsignal.keys() if not isinstance(PQsignal[k],float)}
QRSlength = {k: len(QRSsignal[k]) for k in QRSsignal.keys() if not isinstance(QRSsignal[k],float)}
STlength = {k: len(STsignal[k]) for k in STsignal.keys() if not isinstance(STsignal[k],float)}
Tlength = {k: len(Tsignal[k]) for k in Tsignal.keys() if not isinstance(Tsignal[k],float)}
TPlength = {k: len(TPsignal[k]) for k in TPsignal.keys() if not isinstance(TPsignal[k],float)}

In [13]:
# Filter signals by length
for k in list(Psignal.keys()):
    if isinstance(Psignal[k],float):
        Psignal.pop(k)
        Pgroup.pop(k)
    elif not ((len(Psignal[k]) > 1) and (len(Psignal[k]) < 45)):
        Psignal.pop(k)
        Pgroup.pop(k)
for k in list(PQsignal.keys()):
    if isinstance(PQsignal[k],float):
        PQsignal.pop(k)
        PQgroup.pop(k)
    elif not ((len(PQsignal[k]) > 1) and (len(PQsignal[k]) < 35)):
        PQsignal.pop(k)
        PQgroup.pop(k)
for k in list(QRSsignal.keys()):
    if isinstance(QRSsignal[k],float):
        QRSsignal.pop(k)
        QRSgroup.pop(k)
    elif not ((len(QRSsignal[k]) > 1) and (len(QRSsignal[k]) < 50)):
        QRSsignal.pop(k)
        QRSgroup.pop(k)
for k in list(STsignal.keys()):
    if isinstance(STsignal[k],float):
        STsignal.pop(k)
        STgroup.pop(k)
    elif not ((len(STsignal[k]) > 1) and (len(STsignal[k]) < 65)):
        STsignal.pop(k)
        STgroup.pop(k)
for k in list(Tsignal.keys()):
    if isinstance(Tsignal[k],float):
        Tsignal.pop(k)
        Tgroup.pop(k)
    elif not ((len(Tsignal[k]) > 1) and (len(Tsignal[k]) < 100)):
        Tsignal.pop(k)
        Tgroup.pop(k)
for k in list(TPsignal.keys()):
    if isinstance(TPsignal[k],float):
        TPsignal.pop(k)
        TPgroup.pop(k)
    elif not ((len(TPsignal[k]) > 1) and (len(TPsignal[k]) < 250)):
        TPsignal.pop(k)
        TPgroup.pop(k)

In [14]:
print(len(Pgroup))
print(len(PQgroup))
print(len(QRSgroup))
print(len(STgroup))
print(len(Tgroup))
print(len(TPgroup))

18429
11628
12971
16200
15303
21236


# Save files

In [15]:
utils.pickledump(Psignal,os.path.join('.','pickle','Psignal.pkl'))
utils.pickledump(Pgroup,os.path.join('.','pickle','Pgroup.pkl'))
utils.pickledump(PQsignal,os.path.join('.','pickle','PQsignal.pkl'))
utils.pickledump(PQgroup,os.path.join('.','pickle','PQgroup.pkl'))
utils.pickledump(QRSsignal,os.path.join('.','pickle','QRSsignal.pkl'))
utils.pickledump(QRSgroup,os.path.join('.','pickle','QRSgroup.pkl'))
utils.pickledump(STsignal,os.path.join('.','pickle','STsignal.pkl'))
utils.pickledump(STgroup,os.path.join('.','pickle','STgroup.pkl'))
utils.pickledump(Tsignal,os.path.join('.','pickle','Tsignal.pkl'))
utils.pickledump(Tgroup,os.path.join('.','pickle','Tgroup.pkl'))
utils.pickledump(TPsignal,os.path.join('.','pickle','TPsignal.pkl'))
utils.pickledump(TPgroup,os.path.join('.','pickle','TPgroup.pkl'))