In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import os.path
import skimage
import skimage.segmentation
import sklearn.preprocessing
import sklearn.model_selection
import math
import shutil
import pathlib
import glob
import shutil
import uuid
import random
import platform
import torch
import torchvision
import numpy as np
import scipy as sp
import scipy.io
import scipy.signal
import pandas as pd
import networkx
import wfdb
import fleetfmt
import json
import tqdm
import dill
import pickle
import time
import matplotlib.pyplot as plt

import scipy.stats

import src.data
import src.reader

import sak
import sak.signal.wavelet
import sak.data
import sak.data.augmentation
import sak.visualization
import sak.visualization.signal
import sak.torch
import sak.torch.nn
import sak.torch.nn as nn
import sak.torch.train
import sak.torch.data
import sak.data.preprocessing
import sak.torch.models
import sak.torch.models.lego
import sak.torch.models.variational
import sak.torch.models.classification

from sak.signal import StandardHeader

def smooth(x: np.ndarray, window_size: int, conv_mode: str = 'same'):
    x = np.pad(np.copy(x),(window_size,window_size),'edge')
    window = np.hamming(window_size)/(window_size//2)
    x = np.convolve(x, window, mode=conv_mode)
    x = x[window_size:-window_size]
    return x

# Float32, 250Hz

In [3]:
import ishneholterlib
import struct

window = 2048
max_size = window*10
window_step = max_size-window//4
target_fs = 250
target_dtype = "float32"

print("Writing numpy content to a new Fleet file.")
with pathlib.Path(f"/media/guille/DADES/DADES/ECG/unsupervised_{target_dtype}_{target_fs}hz.fleet").open('wb') as fhandle, fleetfmt.FileWriter(fhandle) as writer:
    print("MUSE dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/ECGData/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file)
            tmp = tmp.round(3)
            tmp.columns = map(lambda x: str(x).upper(), tmp.columns)
            fs = 500
            hea = list(tmp.columns)
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(hea,tmp): 
                if value.size < window: continue
                writer.append(f"MUSE/{fname[5:]}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Brugada dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Brugada/Databases/HUVR/*.ecg")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = ishneholterlib.Holter(file)
            ecg.load_data()
            fs = ecg.sr
            for lead in ecg.lead:
                k = lead.spec_str().upper()
                data = lead.data.copy()
                data = sak.signal.interpolate.interp1d(data,target_fs*data.size//fs)
                if data.size > max_size:
                    data = skimage.util.view_as_windows(data,(max_size,),step=window_step)
                else:
                    data = data[None,]
                for i,value in enumerate(data):
                    if i == 0:
                        continue
                    if value.size < window: continue
                    writer.append(f"Brugada/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Fallot dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Fallot/ECGs/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file)
            tmp = tmp.round(3)
            tmp.columns = map(lambda x: str(x).upper(), tmp.columns)
            hea = map(lambda x: str(x).upper(), tmp.columns)
            fs = 1/np.unique(np.round(np.diff(tmp["TIME IN SEC."].values),5))
            assert fs.size == 1, "check fs!!!"
            fs = int(fs[0])
            hea = list(tmp.columns)
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(hea,tmp): 
                if k == "TIME IN SEC.": continue
                if value.size < window: continue
                writer.append(f"Fallot/{fname}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("HCM dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/HCMData/CSV/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file,header=None)[:-500]/1000
            tmp.columns = map(lambda x: str(x).upper(), StandardHeader)
            fs = 500
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(StandardHeader,tmp): 
                if value.size < window: continue
                writer.append(f"HCM/{fname}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Challenge dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Challenge/*.mat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            sig,hea = wfdb.rdsamp(os.path.join(root,fname),return_res=16)
            hea["sig_name"] = list(map(lambda x: str(x).upper(), hea["sig_name"]))
            fs = hea["fs"]
            sig = sak.signal.interpolate.interp1d(sig,target_fs*sig.shape[0]//fs,axis=0).T
            for k,lead in zip(hea["sig_name"],sig):
                if lead.size > max_size:
                    data = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    data = lead[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    writer.append(f"Challenge/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("LongQT dataset")
    time.sleep(0.5)
    import xml.etree.ElementTree as ET
    ET.register_namespace("","http://www3.medical.philips.com")
    all_files = (glob.glob("/home/guille/DADES/DADES/ECG/LONG_QT/LongQT_*/*.XML") + 
                 glob.glob("/home/guille/DADES/DADES/ECG/LONG_QT/LongQT_*/*/*.XML"))
    head = "{http://www3.medical.philips.com}"
    namespace = {"xmlns:ns0": "http://www3.medical.philips.com",
                 "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
                 "xsi:schemaLocation": "http://www3.medical.philips.com PhilipsECG.xsd"}
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tree = ET.parse(file)
            if len(tree.findall("FullDisclosure")) != 0:
                continue
            fs = float(tree.findall("StripData/SampleRate")[0].text)
            ecg = {}
            for lead in tree.findall("StripData/WaveformData"):
                ecg[lead.get("lead").upper()] = np.array(lead.text.strip().split(","),dtype=int)
            ecg = np.array([ecg[k] for k in StandardHeader])
            ecg = sak.signal.interpolate.interp1d(ecg,round(ecg.shape[1]*250/fs))
            for k,lead in zip(StandardHeader,ecg):
                if lead.size > max_size:
                    data = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    data = lead[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    writer.append(f"LongQT/{fname}/{k}###{i}", (value/1000).astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("THEW dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/THEWProject/*/*.ecg")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,stu_id = os.path.split(root)
            ecg = ishneholterlib.Holter(file,check_valid=False)
            ecg.load_data()
            fs = ecg.sr
            for lead in ecg.lead:
                k = lead.spec_str().upper()
                data = lead.data.copy()
                data = sak.signal.interpolate.interp1d(data,target_fs*data.size//fs)
                if data.size > max_size:
                    data = skimage.util.view_as_windows(data,(max_size,),step=window_step)
                else:
                    data = data[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    if np.all(np.abs(np.diff(value)) < 1e-6): continue
                    writer.append(f"THEW/{stu_id}_{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("DeepFake dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/DeepFake/*.asc")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = pd.read_csv(file,header=None,sep=" ")
            fs = 500
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,value in zip(StandardHeader,ecg):
                k = k.upper()
                if value.size < window: continue
                writer.append(f"DeepFake/{fname}/{k}###0", (value/1000).astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("SoO dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Delineator/SoO/RETAG/*.txt")
    db = pd.read_csv('/home/guille/DADES/DADES/ECG/Delineator/SoO/DATABASE_MANUAL.csv')
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = pd.read_csv(file,header=0,sep=",",index_col=0)
            hea = list(map(lambda x: str(x).upper(), ecg.columns))
            filt_id = (db["ID"] == int(fname.split("-")[0]))
            fs = db[filt_id]["Sampling_Freq"].values[0]
            ecg = np.round(sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs))/(2**15)
            for k,lead in zip(hea,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"SoO/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("EDB dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PhysioNet/EDB/*.dat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg,hea = wfdb.rdsamp(file[:-4])
            fs = hea["fs"]
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,lead in zip(hea["sig_name"],ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"EDB/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("SVDB dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PhysioNet/SVDB/*.dat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg,hea = wfdb.rdsamp(file[:-4])
            fs = hea["fs"]
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,lead in zip(hea["sig_name"],ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"SVDB/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("PredictAF dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PredictAF/*/*/*.NHS")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,pat_id = os.path.split(root)
            rt,stu_id = os.path.split(rt)
            ecg,fs = src.reader.readNHS(file)
            ecg = sak.signal.interpolate.interp1d(ecg.T,int(target_fs*ecg.shape[0]//fs)).T
            for k,lead in zip(StandardHeader,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if i < 5:
                        continue
                    if value.size < window: continue
                    writer.append(f"PredictAF/{stu_id}_{pat_id}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Zhejiang dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/RubenDoste/ZhejiangDatabase/PVCVTRawECGData/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            if fname in ["onsets","offsets"]: continue
            ecg = pd.read_csv(file,header=0)
            hea = list(ecg.columns)
            fs = 2000
            ecg = np.round(sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs))/(2**13)
            for k,lead in zip(hea,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"Zhejiang/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("HUVR_CARTO dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/EGMDelineator/Databases/CARTOEXPORT/*/*ECG_Export.txt")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,pat_id = os.path.split(root)
            fs = 2000
            signal,header = src.reader.Read_CARTO3_ECG(file)
            channels = np.array(sak.map_upper([c.split("(")[0].replace(" ","") for c in header["Channels"]]))
            idx_header = np.array([np.where(channels == c)[0][0] for c in StandardHeader])
            signal = np.round(sak.signal.interpolate.interp1d(signal[idx_header],target_fs*signal.shape[1]//fs))/(2**10)
            for k,lead in zip(StandardHeader,signal):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"HUVR_CARTO/{pat_id}_{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

time.sleep(0.5)
print("Done.")
time.sleep(0.5)
!du -sh /media/guille/DADES/DADES/ECG/unsupervised*.fleet*

Writing numpy content to a new Fleet file.
MUSE dataset


100%|██████████| 10646/10646 [01:55<00:00, 92.52it/s]


Brugada dataset


100%|██████████| 7/7 [01:39<00:00, 14.26s/it]


Fallot dataset


100%|██████████| 1941/1941 [00:19<00:00, 98.59it/s] 


HCM dataset


100%|██████████| 153/153 [00:01<00:00, 129.64it/s]


Challenge dataset


100%|██████████| 41704/41704 [06:03<00:00, 114.69it/s]


LongQT dataset


100%|██████████| 807/807 [00:07<00:00, 106.86it/s]


THEW dataset


100%|██████████| 1700/1700 [2:48:23<00:00,  5.94s/it]  


DeepFake dataset


 24%|██▍       | 36246/150000 [03:27<09:56, 190.64it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 70%|██████▉   | 14354/20618 [26:50<10:12, 10.22it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
print("Done!")
!du -sh /media/guille/DADES/DADES/ECG/unsupervised*.fleet*

Done!
116G	/media/guille/DADES/DADES/ECG/unsupervised_float16_250hz.fleet
195G	/media/guille/DADES/DADES/ECG/unsupervised_float32_250hz.fleet
390G	/media/guille/DADES/DADES/ECG/unsupervised_float32_500hz.fleet


# Float32, 250Hz

In [4]:
import ishneholterlib
import struct

window = 2048
max_size = window*10
window_step = max_size-window//4
target_fs = 500
target_dtype = "float32"

print("Writing numpy content to a new Fleet file.")
with pathlib.Path(f"/media/guille/DADES/DADES/ECG/unsupervised_{target_dtype}_{target_fs}hz.fleet").open('wb') as fhandle, fleetfmt.FileWriter(fhandle) as writer:
    print("MUSE dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/ECGData/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file)
            tmp = tmp.round(3)
            tmp.columns = map(lambda x: str(x).upper(), tmp.columns)
            fs = 500
            hea = list(tmp.columns)
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(hea,tmp): 
                if value.size < window: continue
                writer.append(f"MUSE/{fname[5:]}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Brugada dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Brugada/Databases/HUVR/*.ecg")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = ishneholterlib.Holter(file)
            ecg.load_data()
            fs = ecg.sr
            for lead in ecg.lead:
                k = lead.spec_str().upper()
                data = lead.data.copy()
                data = sak.signal.interpolate.interp1d(data,target_fs*data.size//fs)
                if data.size > max_size:
                    data = skimage.util.view_as_windows(data,(max_size,),step=window_step)
                else:
                    data = data[None,]
                for i,value in enumerate(data):
                    if i == 0:
                        continue
                    if value.size < window: continue
                    writer.append(f"Brugada/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Fallot dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Fallot/ECGs/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file)
            tmp = tmp.round(3)
            tmp.columns = map(lambda x: str(x).upper(), tmp.columns)
            hea = map(lambda x: str(x).upper(), tmp.columns)
            fs = 1/np.unique(np.round(np.diff(tmp["TIME IN SEC."].values),5))
            assert fs.size == 1, "check fs!!!"
            fs = int(fs[0])
            hea = list(tmp.columns)
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(hea,tmp): 
                if k == "TIME IN SEC.": continue
                if value.size < window: continue
                writer.append(f"Fallot/{fname}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("HCM dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/HCMData/CSV/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tmp = pd.read_csv(file,header=None)[:-500]/1000
            tmp.columns = map(lambda x: str(x).upper(), StandardHeader)
            fs = 500
            tmp = sak.signal.interpolate.interp1d(tmp.values,target_fs*tmp.shape[0]//fs,axis=0).T
            for k,value in zip(StandardHeader,tmp): 
                if value.size < window: continue
                writer.append(f"HCM/{fname}/{k}###0", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Challenge dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Challenge/*.mat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            sig,hea = wfdb.rdsamp(os.path.join(root,fname),return_res=16)
            hea["sig_name"] = list(map(lambda x: str(x).upper(), hea["sig_name"]))
            fs = hea["fs"]
            sig = sak.signal.interpolate.interp1d(sig,target_fs*sig.shape[0]//fs,axis=0).T
            for k,lead in zip(hea["sig_name"],sig):
                if lead.size > max_size:
                    data = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    data = lead[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    writer.append(f"Challenge/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("LongQT dataset")
    time.sleep(0.5)
    import xml.etree.ElementTree as ET
    ET.register_namespace("","http://www3.medical.philips.com")
    all_files = (glob.glob("/home/guille/DADES/DADES/ECG/LONG_QT/LongQT_*/*.XML") + 
                 glob.glob("/home/guille/DADES/DADES/ECG/LONG_QT/LongQT_*/*/*.XML"))
    head = "{http://www3.medical.philips.com}"
    namespace = {"xmlns:ns0": "http://www3.medical.philips.com",
                 "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
                 "xsi:schemaLocation": "http://www3.medical.philips.com PhilipsECG.xsd"}
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            tree = ET.parse(file)
            if len(tree.findall("FullDisclosure")) != 0:
                continue
            fs = float(tree.findall("StripData/SampleRate")[0].text)
            ecg = {}
            for lead in tree.findall("StripData/WaveformData"):
                ecg[lead.get("lead").upper()] = np.array(lead.text.strip().split(","),dtype=int)
            ecg = np.array([ecg[k] for k in StandardHeader])
            ecg = sak.signal.interpolate.interp1d(ecg,round(ecg.shape[1]*250/fs))
            for k,lead in zip(StandardHeader,ecg):
                if lead.size > max_size:
                    data = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    data = lead[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    writer.append(f"LongQT/{fname}/{k}###{i}", (value/1000).astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("THEW dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/THEWProject/*/*.ecg")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,stu_id = os.path.split(root)
            ecg = ishneholterlib.Holter(file,check_valid=False)
            ecg.load_data()
            fs = ecg.sr
            for lead in ecg.lead:
                k = lead.spec_str().upper()
                data = lead.data.copy()
                data = sak.signal.interpolate.interp1d(data,target_fs*data.size//fs)
                if data.size > max_size:
                    data = skimage.util.view_as_windows(data,(max_size,),step=window_step)
                else:
                    data = data[None,]
                for i,value in enumerate(data):
                    if value.size < window: continue
                    if np.all(np.abs(np.diff(value)) < 1e-6): continue
                    writer.append(f"THEW/{stu_id}_{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("DeepFake dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/DeepFake/*.asc")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = pd.read_csv(file,header=None,sep=" ")
            fs = 500
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,value in zip(StandardHeader,ecg):
                k = k.upper()
                if value.size < window: continue
                writer.append(f"DeepFake/{fname}/{k}###0", (value/1000).astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("SoO dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/Delineator/SoO/RETAG/*.txt")
    db = pd.read_csv('/home/guille/DADES/DADES/ECG/Delineator/SoO/DATABASE_MANUAL.csv')
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg = pd.read_csv(file,header=0,sep=",",index_col=0)
            hea = list(map(lambda x: str(x).upper(), ecg.columns))
            filt_id = (db["ID"] == int(fname.split("-")[0]))
            fs = db[filt_id]["Sampling_Freq"].values[0]
            ecg = np.round(sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs))/(2**15)
            for k,lead in zip(hea,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"SoO/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("EDB dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PhysioNet/EDB/*.dat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg,hea = wfdb.rdsamp(file[:-4])
            fs = hea["fs"]
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,lead in zip(hea["sig_name"],ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"EDB/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("SVDB dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PhysioNet/SVDB/*.dat")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            ecg,hea = wfdb.rdsamp(file[:-4])
            fs = hea["fs"]
            ecg = sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs)
            for k,lead in zip(hea["sig_name"],ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"SVDB/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("PredictAF dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/PredictAF/*/*/*.NHS")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,pat_id = os.path.split(root)
            rt,stu_id = os.path.split(rt)
            ecg,fs = src.reader.readNHS(file)
            ecg = sak.signal.interpolate.interp1d(ecg.T,int(target_fs*ecg.shape[0]//fs)).T
            for k,lead in zip(StandardHeader,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if i < 5:
                        continue
                    if value.size < window: continue
                    writer.append(f"PredictAF/{stu_id}_{pat_id}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("Zhejiang dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/ECG/RubenDoste/ZhejiangDatabase/PVCVTRawECGData/*.csv")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            if fname in ["onsets","offsets"]: continue
            ecg = pd.read_csv(file,header=0)
            hea = list(ecg.columns)
            fs = 2000
            ecg = np.round(sak.signal.interpolate.interp1d(ecg.T,target_fs*ecg.shape[0]//fs))/(2**13)
            for k,lead in zip(hea,ecg):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"Zhejiang/{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

    print("HUVR_CARTO dataset")
    time.sleep(0.5)
    all_files = glob.glob("/home/guille/DADES/DADES/EGMDelineator/Databases/CARTOEXPORT/*/*ECG_Export.txt")
    for file in tqdm.tqdm(all_files):
        try:
            root,fname,ext = sak.splitrfe(file)
            rt,pat_id = os.path.split(root)
            fs = 2000
            signal,header = src.reader.Read_CARTO3_ECG(file)
            channels = np.array(sak.map_upper([c.split("(")[0].replace(" ","") for c in header["Channels"]]))
            idx_header = np.array([np.where(channels == c)[0][0] for c in StandardHeader])
            signal = np.round(sak.signal.interpolate.interp1d(signal[idx_header],target_fs*signal.shape[1]//fs))/(2**10)
            for k,lead in zip(StandardHeader,signal):
                k = k.upper()
                if lead.size > max_size:
                    lead = skimage.util.view_as_windows(lead,(max_size,),step=window_step)
                else:
                    lead = lead[None,]
                for i,value in enumerate(lead):
                    if value.size < window: continue
                    writer.append(f"HUVR_CARTO/{pat_id}_{fname}/{k}###{i}", value.astype(target_dtype))
        except KeyboardInterrupt:
            raise
        except:
            continue

time.sleep(0.5)
print("Done.")
time.sleep(0.5)
!du -sh /media/guille/DADES/DADES/ECG/unsupervised*.fleet*

Writing numpy content to a new Fleet file.
MUSE dataset


100%|██████████| 10646/10646 [01:53<00:00, 93.46it/s]


Brugada dataset


100%|██████████| 7/7 [02:44<00:00, 23.49s/it]


Fallot dataset


100%|██████████| 1941/1941 [00:19<00:00, 101.26it/s]


HCM dataset


100%|██████████| 153/153 [00:01<00:00, 101.60it/s]


Challenge dataset


 54%|█████▍    | 22547/41704 [03:15<02:28, 129.30it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 23%|██▎       | 4843/20618 [10:16<33:25,  7.87it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

