In [1]:
import numpy as np
import pandas as pd
import wfdb
import os
import math
import time
from sklearn.model_selection import train_test_split

################################################################################################

##     ## ########    ###    ########  ######## ########  ##     ## ########  ########  ##    ## 
##     ## ##         ## ##   ##     ##    ##    ##     ## ##     ## ##     ## ##     ##  ##  ##  
##     ## ##        ##   ##  ##     ##    ##    ##     ## ##     ## ##     ## ##     ##   ####   
######### ######   ##     ## ########     ##    ########  ##     ## ##     ## ##     ##    ##    
##     ## ##       ######### ##   ##      ##    ##     ## ##     ## ##     ## ##     ##    ##    
##     ## ##       ##     ## ##    ##     ##    ##     ## ##     ## ##     ## ##     ##    ##    
##     ## ######## ##     ## ##     ##    ##    ########   #######  ########  ########     ##    

################################################################################################

RELPATH = "BIG_DATASET/" # Relative pathname to Physionet dataset

#==============
# LOAD RAW DATA
#==============

f = open(RELPATH + "RECORDS") # Open the file containing directory paths

lines = f.readlines() # Read file line by line

paths = [] # New list to hold our pathnames

for l in lines:
    paths.append(l.strip()) # Remove newline character and append to paths list

hea_files = [] # New list to hold all filenames with .hea extension

for path in paths: # Iterate through our paths
    for file in os.listdir("{}{}".format(RELPATH, path)): # List files in each path
        if file.endswith(".hea"): # Check if file has a .hea extension
            hea_files.append(RELPATH + path + file.strip(".hea")) # If so, append to hea_files list
            

      
def load_data(x, n):
    raw_data = []
    print("Loading signals {}-{}".format(x, x+n))
    for i in range(x, x+n):
        raw_data.append(wfdb.rdsamp(hea_files[i]))
    print("Done loading data.")
    return raw_data
    

#===============
# PREPROCESSING
#===============


diagnostics = []
ages = []
sex = []

# Separate raw_data into signals and meta_data

def separate_data(data):
    print("Parsing signals...")
    signals = np.empty((0,5000,12))
    for d in data: 
        signals = np.append(signals, [d[0]], axis=0) 
    print("Done parsing signals.")
    return signals

def separate_y_data(data):
    print("Parsing labels...")
    metadata = []
    for d in data: 
        metadata.append(d[1])
    print("Done parsing labels.")
    return metadata

# Parsing functions

def get_dx(dx):
    temp = dx[4:] # Remove Dx: from line
    temp = temp.split(",") # Split CSVs into list
    for i in range(0, len(temp)): # Convert to integers
        temp[i] = int(temp[i]) 
    return temp

def parse_age(a):
    temp = a[5:] # Remove Age: from line
    temp = float(temp)
    if math.isnan(temp):
        print("Error, NaN discovered, skipping.")
    else:
        return int(temp)
    
def parse_sex(s):
    temp = s[5:] # Remove Sex: from line
    sx = 1 if temp == 'Male' else 0 # Male = 1 Female = 0
    return sx

# Separate metadata into different categories

def get_diagnostics(metadata):
    diagnostics = []
    for m in metadata:
        dx = m.get('comments')[2] # Get Dx column in comments
        diagnostics.append(get_dx(dx))
    return diagnostics

def categories():
    for m in metadata:
        a = m.get('comments')[0] # Get age
        s = m.get('comments')[1] # Get sex
        dx = m.get('comments')[2] # Get Dx column in comments
        diagnostics.append(get_dx(dx))
        ages.append(parse_age(a))
        sex.append(parse_sex(s))

#============
# CONDITIONS
#============

conditions = pd.read_csv("{}ConditionNames_SNOMED-CT.csv".format(RELPATH)) # Load condition names/numbers
cond_acronyms = dict(zip(conditions['Snomed_CT'], conditions['Acronym Name'])) # Acronym dictionary
cond_names = dict(zip(conditions['Snomed_CT'], conditions['Full Name'])) # Name dictionary

def get_acronyms(l):
    temp = []
    for item in l:
        temp.append(cond_acronyms.get(item)) # Get cond acronym from diagnostic number
    return temp

def get_acronym(c):
    return cond_acronyms.get(c)

def get_names(l):
    temp = []
    for item in l:
        temp.append(cond_names.get(item)) # Get cond name from diagnostic number
    return temp

In [2]:
nums = [55827005, 164890007, 164934002, 426177001, 426783006, 427084000]
acros = get_acronym(nums)

for i in range(0, len(nums)):
    print(f'{nums[i]}: {acros[i]}')

TypeError: unhashable type: 'list'

In [24]:
first = load_data(200, 1)
metadata = separate_y_data(first)
x = separate_data(first)
dx = get_diagnostics(metadata)
acronyms = get_acronyms(dx[0])
x = x.reshape(1, 5000, 12, 1)
mask = ~np.isnan(x)
x = np.where(mask, x, -10)
acronyms

Loading signals 200-201
Done loading data.
Parsing labels...
Done parsing labels.
Parsing signals...
Done parsing signals.


['AFIB', 'QTIE', 'RBBB', 'UW']

In [6]:
import tensorflow as tf
model = tf.keras.models.load_model('BIG_DATASET/CNNS/CNN_AFIB.h5')
pred = model.predict(x)
pred

array([[6.0196705e-15, 1.0000000e+00]], dtype=float32)

In [3]:
diag_acronyms = []

for d in diagnostics:
    name = get_acronym(d)
    diag_acronyms.append(name)

In [7]:
x_matt = np.load('BIG_DATASET/XDATA/data-0.npy')
x_matt = x_matt[0]

np.save('BIG_DATASET/XDATA/data-100.npy', x_matt)

In [3]:
# TEST TRAIN SPLIT (FINALLY)

start = time.time()

paths = ['XDATA', 'YDATA', 'XTEST', 'YTEST', 'XVALIDATE', 'YVALIDATE']

# MAKE PATHS

for p in paths:
    fullpath = f'{RELPATH}{p}'
    if not os.path.exists(fullpath):
        os.makedirs(fullpath)

for i in range(0, 1):
    rd = load_data(i*500, 500)
    X = separate_data(rd) # XDATA
    metadata = separate_y_data(rd) # RAW Y DATA
    diagnostics = [] # DIAGNOSTICS
    for m in metadata:
        dx = m.get('comments')[2] # Get Dx column in comments
        diagnostics.append(get_dx(dx))
    y = [] # ACTUAL Y DATA I WANT TO SAVE
    for j in range(0, len(diagnostics)):
        if 427084000 in diagnostics[j]: # Sinus Tachycardia
            y.append(1)
        else:
            y.append(0)
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Split the training set into a validation set
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    
    # SAVE TO FILES
    np.save(RELPATH + 'XDATA/data-{}.npy'.format(i), np.asarray(X_train))
    np.save(RELPATH + 'YDATA/data-{}.npy'.format(i), np.asarray(y_train))
    np.save(RELPATH + 'XTEST/data-{}.npy'.format(i), np.asarray(X_test))
    np.save(RELPATH + 'YTEST/data-{}.npy'.format(i), np.asarray(y_test))
    np.save(RELPATH + 'XVALIDATE/data-{}.npy'.format(i), np.asarray(X_val))
    np.save(RELPATH + 'YVALIDATE/data-{}.npy'.format(i), np.asarray(y_val))
    #print("Data file saved. ({}/{})".format(i+1, 90))

print(f"All data saved in {time.time() - start} seconds")

Loading signals 0-1
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.


ValueError: With n_samples=1, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [4]:
# RESHAPE ALL DATA
ecgs = os.listdir("ECGS")

for i in range(0, len(ecgs)):
    tmp = np.load(f'ECGS/{ecgs[i]}')
    tmp = tmp.reshape(1, 5000, 12, 1)
    mask = ~np.isnan(tmp)
    tmp = np.where(mask, tmp, -10)
    np.save(f'ECGS/data-{i}.npy', tmp)

In [16]:
test = np.load("ECGS/data-0.npy")
test.shape

(1, 5000, 12, 1)

In [2]:
for i in range(0, 500):
    rd = load_data(i, 1)
    X = separate_data(rd)
    X = X.reshape(1, 5000, 12, 1)
    mask = ~np.isnan(X)
    X = np.where(mask, X, -10)
    np.save(f'ECGS/data-{i}.npy', X)

Loading signals 0-1
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 1-2
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 2-3
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 3-4
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 4-5
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 5-6
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 6-7
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 7-8
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 8-9
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 9-10
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 10-11
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 11-12
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 12-13
Done loading 

Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 105-106
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 106-107
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 107-108
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 108-109
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 109-110
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 110-111
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 111-112
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 112-113
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 113-114
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 114-115
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 115-116
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 

Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 209-210
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 210-211
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 211-212
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 212-213
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 213-214
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 214-215
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 215-216
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 216-217
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 217-218
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 218-219
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 219-220
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 

Loading signals 308-309
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 309-310
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 310-311
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 311-312
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 312-313
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 313-314
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 314-315
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 315-316
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 316-317
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 317-318
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 318-319
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 319-320
Done loading data.
Parsing signals...
Done parsing s

Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 409-410
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 410-411
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 411-412
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 412-413
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 413-414
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 414-415
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 415-416
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 416-417
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 417-418
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 418-419
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 419-420
Done loading data.
Parsing signals...
Done parsing signals.
Loading signals 

In [8]:
x = np.load('ECGS/data-99.npy')
x

array([[[-0.142,  0.039,  0.181, ...,  0.063,  0.044,  0.044],
        [-0.049,  0.01 ,  0.059, ...,  0.   , -0.039, -0.039],
        [-0.044, -0.068, -0.024, ..., -0.059, -0.102, -0.098],
        ...,
        [-0.059, -0.02 ,  0.039, ..., -0.039, -0.034, -0.029],
        [-0.137, -0.034,  0.102, ..., -0.01 , -0.005, -0.005],
        [-0.083,  0.01 ,  0.093, ..., -0.01 , -0.005,  0.   ]]])

In [6]:
# LOAD GOOD X AND Y DATA INTO FILES

start = 0
stop = 90

for i in range(start, stop):
    rd = load_data(i*500, 500) # Load first 10,000
    signals = separate_data(rd)
    if np.any(np.nan(signals)):
        print(f"Skipping bad file data-{i}.npy")
    else:
        np.save(RELPATH + 'XDATA/data-{}.npy'.format(i), np.asarray(signals))
        print("Data file saved. ({}/{})".format(i+1, stop))

Loading signals 0-500
Done loading data.
Parsing signals...
Done parsing signals.


TypeError: 'float' object is not callable

In [24]:
# LOAD X DATA SEPARATED BY SEX

def make_xdata():
    # Make paths if they don't exist
    malepath = RELPATH + 'XDATA/MALE'
    femalepath = RELPATH + 'XDATA/FEMALE'
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)
        
    for i in range(0, 70):
        rd = load_data(i*500, 500) # Load first 10,000
        signals = separate_data(rd)
        metadata = separate_y_data(rd)
        sexes = []
        x_male = []
        x_female = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
        for j in range(0, len(signals)):
            if sexes[j] == 1:
                x_male.append(signals[j])
            else:
                x_female.append(signals[j])
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(x_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(x_female))
        print("Data file saved. ({}/{})".format(i+1, stop))

def make_xtest():
    # Make paths if they don't exist
    malepath = RELPATH + 'XTEST/MALE'
    femalepath = RELPATH + 'XTEST/FEMALE'
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)
        
    for i in range(70, 80):
        rd = load_data(i*500, 500) # Load first 10,000
        signals = separate_data(rd)
        metadata = separate_y_data(rd)
        sexes = []
        x_male = []
        x_female = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
        for j in range(0, len(signals)):
            if sexes[j] == 1:
                x_male.append(signals[j])
            else:
                x_female.append(signals[j])
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(x_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(x_female))
        print("Data file saved. ({}/{})".format(i+1, stop))

def make_xval():
    # Make paths if they don't exist
    malepath = RELPATH + 'XVALIDATE/MALE'
    femalepath = RELPATH + 'XVALIDATE/FEMALE'
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)
        
    for i in range(80, 90):
        rd = load_data(i*500, 500) # Load first 10,000
        signals = separate_data(rd)
        metadata = separate_y_data(rd)
        sexes = []
        x_male = []
        x_female = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
        for j in range(0, len(signals)):
            if sexes[j] == 1:
                x_male.append(signals[j])
            else:
                x_female.append(signals[j])
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(x_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(x_female))
        print("Data file saved. ({}/{})".format(i+1, stop))

start = time.time()
make_xdata()
make_xtest()
make_xval()
print("All X data exported in {} seconds".format(time.time() - start))

Loading signals 0-500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (1/90)
Loading signals 500-1000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (2/90)
Loading signals 1000-1500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (3/90)
Loading signals 1500-2000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (4/90)
Loading signals 2000-2500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (5/90)
Loading signals 2500-3000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (6/90)
Loading signals 3000-3500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done pa

Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (55/90)
Loading signals 27500-28000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (56/90)
Loading signals 28000-28500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (57/90)
Loading signals 28500-29000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (58/90)
Loading signals 29000-29500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (59/90)
Loading signals 29500-30000
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (60/90)
Loading signals 30000-30500
Done loading data.
Parsing signals...
Done parsing signals.
Parsing labels...
Done parsing labels.
Data file saved. (61/90)
Lo

In [39]:
xtest = np.load("BIG_DATASET/XDATA/CRBBB/FEMALE/data-0.npy")
xtest.shape

(194, 5000, 12)

In [2]:
# LOAD ONE CONDITION Y DATA INTO FILES

start = 0
stop = 90
snomed_conds = [164889003, 713427006, 55827005, 164890007, 427084000, 426177001, 428750005]
#426783006,
#164890007,
#427084000,
#164934002
#1: [] Sinus Bradycardia (SB) - 16559 (36.80%)
#2: [426783006] Sinus Rhythm (SR) - 8125 (18.06%)
#3: [] Atrial Flutter (AF) - 8060 (17.91%)
#4: [] Sinus Tachycardia (ST) - 7255 (16.12%)
#5: [164934002] T wave Change (TWC) - 7043 (15.65%)
#6: [] left ventricle hypertrophy (LVH) - 5401 (12.00%)
#7: [55930002] Electrocardiograpic ST segment changes (STTC) - 4227 (9.39%)
#8: [59931005] T wave opposite (TWO) - 2876 (6.39%)
#9: [427393009] Sinus Irregularity (SA) - 2550 (5.67%)
#10: [] Atrial Fibrillation (AFIB) - 1780 (3.96%)
#11: [429622005] ST drop down (STDD) - 1665 (3.70%)
#12: [39732003] Axis left shift (ALS) - 1545 (3.43%)
#13: [284470004] Premature Atrial Complex (APB) - 1312 (2.92%)
#14: [10370003] Rhythm from artificial pacing (ARTRHYM) - 1181 (2.62%)
#15: [] ST-T Change (STTC) - 1158 (2.57%)
#16: [270492004] 1 degree atrioventricular block (1AVB) - 1140 (2.53%)
#17: [] Complete right bundle branch block (CRBBB) - 1096 (2.44%)
#18: [427172004] Premature ventricular contractions (PVC) - 1091 (2.42%)
#19: [164917005] abnormal Q wave (AQW) - 1063 (2.36%)
#20: [251146004] lower voltage QRS in all lead (LVQRSAL) - 1042 (2.32%)
#cond = 713427006 # Snomed Code

def make_ydata(cond):
    # Make paths if they don't exist
    condpath = RELPATH + 'YDATA/{}'.format(get_acronym(cond))
    if not os.path.exists(condpath):
        os.makedirs(condpath)
        
    for i in range(0, 70):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        for m in metadata:
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                y.append(1)
            else:
                y.append(0)
                
        np.save(f'{condpath}/data-{i}.npy', np.asarray(y))
        print("Data file saved. ({}/{})".format(i+1, stop))
    print(f"{get_acronym(cond)} YDATA export finished.")
    
def make_ytest(cond):
    # Make paths if they don't exist
    condpath = RELPATH + 'YTEST/{}'.format(get_acronym(cond))
    if not os.path.exists(condpath):
        os.makedirs(condpath)
        
    for i in range(70, 80):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        for m in metadata:
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                y.append(1)
            else:
                y.append(0)
                
        np.save(f'{condpath}/data-{i}.npy', np.asarray(y))
        print("Data file saved. ({}/{})".format(i+1, stop))
    print(f"{get_acronym(cond)} YTEST export finished.")
        
def make_yval(cond):
    # Make paths if they don't exist
    condpath = RELPATH + 'YVALIDATE/{}'.format(get_acronym(cond))
    if not os.path.exists(condpath):
        os.makedirs(condpath)
        
    for i in range(80, 90):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        for m in metadata:
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                y.append(1)
            else:
                y.append(0)
                
        np.save(f'{condpath}/data-{i}.npy', np.asarray(y))
        print("Data file saved. ({}/{})".format(i+1, stop))
    print(f"{get_acronym(cond)} YVALIDATE export finished.")
    
start = time.time()
for c in snomed_conds:
    make_ydata(c)
    make_ytest(c)
    make_yval(c)
print(f'Done exporting {len(snomed_conds)} conditions in {time.time()-start} seconds.')

Loading signals 0-500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (1/90)
Loading signals 500-1000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (2/90)
Loading signals 1000-1500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (3/90)
Loading signals 1500-2000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (4/90)
Loading signals 2000-2500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (5/90)
Loading signals 2500-3000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (6/90)
Loading signals 3000-3500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (7/90)
Loading signals 3500-4000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (8/90)
Loading signals 4000-4500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (9/90)
Loading signals 4500-5000
Done lo

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (75/90)
Loading signals 37500-38000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (76/90)
Loading signals 38000-38500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (77/90)
Loading signals 38500-39000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (78/90)
Loading signals 39000-39500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (79/90)
Loading signals 39500-40000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (80/90)
AFIB YTEST export finished.
Loading signals 40000-40500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (81/90)
Loading signals 40500-41000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (82/90)
Loading signals 41000-41500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (83/90)
L

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (59/90)
Loading signals 29500-30000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (60/90)
Loading signals 30000-30500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (61/90)
Loading signals 30500-31000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (62/90)
Loading signals 31000-31500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (63/90)
Loading signals 31500-32000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (64/90)
Loading signals 32000-32500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (65/90)
Loading signals 32500-33000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (66/90)
Loading signals 33000-33500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (67/90)
Loading signals 33500-34000
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (43/90)
Loading signals 21500-22000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (44/90)
Loading signals 22000-22500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (45/90)
Loading signals 22500-23000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (46/90)
Loading signals 23000-23500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (47/90)
Loading signals 23500-24000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (48/90)
Loading signals 24000-24500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (49/90)
Loading signals 24500-25000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (50/90)
Loading signals 25000-25500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (51/90)
Loading signals 25500-26000
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (27/90)
Loading signals 13500-14000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (28/90)
Loading signals 14000-14500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (29/90)
Loading signals 14500-15000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (30/90)
Loading signals 15000-15500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (31/90)
Loading signals 15500-16000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (32/90)
Loading signals 16000-16500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (33/90)
Loading signals 16500-17000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (34/90)
Loading signals 17000-17500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (35/90)
Loading signals 17500-18000
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (11/90)
Loading signals 5500-6000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (12/90)
Loading signals 6000-6500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (13/90)
Loading signals 6500-7000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (14/90)
Loading signals 7000-7500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (15/90)
Loading signals 7500-8000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (16/90)
Loading signals 8000-8500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (17/90)
Loading signals 8500-9000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (18/90)
Loading signals 9000-9500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (19/90)
Loading signals 9500-10000
Done loading data.

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (85/90)
Loading signals 42500-43000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (86/90)
Loading signals 43000-43500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (87/90)
Loading signals 43500-44000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (88/90)
Loading signals 44000-44500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (89/90)
Loading signals 44500-45000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (90/90)
ST YVALIDATE export finished.
Loading signals 0-500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (1/90)
Loading signals 500-1000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (2/90)
Loading signals 1000-1500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (3/90)
Loading signa

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (70/90)
SB YDATA export finished.
Loading signals 35000-35500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (71/90)
Loading signals 35500-36000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (72/90)
Loading signals 36000-36500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (73/90)
Loading signals 36500-37000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (74/90)
Loading signals 37000-37500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (75/90)
Loading signals 37500-38000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (76/90)
Loading signals 38000-38500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (77/90)
Loading signals 38500-39000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (78/90)
Loa

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (54/90)
Loading signals 27000-27500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (55/90)
Loading signals 27500-28000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (56/90)
Loading signals 28000-28500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (57/90)
Loading signals 28500-29000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (58/90)
Loading signals 29000-29500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (59/90)
Loading signals 29500-30000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (60/90)
Loading signals 30000-30500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (61/90)
Loading signals 30500-31000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (62/90)
Loading signals 31000-31500
D

In [3]:
# LOAD Y DATA SEPARATED BY SEX

#cond = 713427006 # Snomed Code
# md[0] = Age
# md[1] = Sex
# md[2] = Dx

snomed_conds = [426177001,
426783006,
164890007,
427084000,
164934002]

# 55827005,
# 55930002,
# 59931005,
# 427393009,
# 164889003 


def make_ydata(cond):
    # Make paths if they don't exist
    malepath = RELPATH + 'YDATA/{}/MALE'.format(get_acronym(cond))
    femalepath = RELPATH + 'YDATA/{}/FEMALE'.format(get_acronym(cond))
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)

    for i in range(0, 70):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        sexes = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y_male = []
        y_female = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                if sexes[j] == 1:
                    y_male.append(1)
                else:
                    y_female.append(1)
            else:
                if sexes[j] == 1:
                    y_male.append(0)
                else:
                    y_female.append(0)
                
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(y_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(y_female))
        print("Data file saved. ({}/{})".format(i+1, 90))
    
def make_ytest(cond):
    # Make paths if they don't exist
    malepath = RELPATH + 'YTEST/{}/MALE'.format(get_acronym(cond))
    femalepath = RELPATH + 'YTEST/{}/FEMALE'.format(get_acronym(cond))
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)
    for i in range(70, 80):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        sexes = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y_male = []
        y_female = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                if sexes[j] == 1:
                    y_male.append(1)
                else:
                    y_female.append(1)
            else:
                if sexes[j] == 1:
                    y_male.append(0)
                else:
                    y_female.append(0)
                
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(y_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(y_female))
        print("Data file saved. ({}/{})".format(i+1, 90))
        
def make_yval(cond):
    # Make paths if they don't exist
    malepath = RELPATH + 'YVALIDATE/{}/MALE'.format(get_acronym(cond))
    femalepath = RELPATH + 'YVALIDATE/{}/FEMALE'.format(get_acronym(cond))
    if not os.path.exists(malepath):
        os.makedirs(malepath)
    if not os.path.exists(femalepath):
        os.makedirs(femalepath)
        
    for i in range(80, 90):
        rd = load_data(i*500, 500) # Load first 10,000
        metadata = separate_y_data(rd)
        diagnostics = []
        sexes = []
        for m in metadata:
            sex = m.get('comments')[1] # Get sex column in comments
            sexes.append(parse_sex(sex))
            dx = m.get('comments')[2] # Get Dx column in comments
            diagnostics.append(get_dx(dx))
        y_male = []
        y_female = []
        for j in range(0, len(diagnostics)):
            if cond in diagnostics[j]:
                if sexes[j] == 1:
                    y_male.append(1)
                else:
                    y_female.append(1)
            else:
                if sexes[j] == 1:
                    y_male.append(0)
                else:
                    y_female.append(0)
                
        np.save(malepath + '/data-{}.npy'.format(i), np.asarray(y_male))
        np.save(femalepath + '/data-{}.npy'.format(i), np.asarray(y_female))
        print("Data file saved. ({}/{})".format(i+1, 90))

start = time.time()
for c in snomed_conds:
    make_ydata(c)
    make_ytest(c)
    make_yval(c)
print(f"Done exporting 5 conditions in {time.time() - start} seconds.".format(time.time() - start))

Loading signals 0-500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (1/90)
Loading signals 500-1000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (2/90)
Loading signals 1000-1500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (3/90)
Loading signals 1500-2000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (4/90)
Loading signals 2000-2500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (5/90)
Loading signals 2500-3000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (6/90)
Loading signals 3000-3500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (7/90)
Loading signals 3500-4000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (8/90)
Loading signals 4000-4500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (9/90)
Loading signals 4500-5000
Done lo

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (76/90)
Loading signals 38000-38500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (77/90)
Loading signals 38500-39000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (78/90)
Loading signals 39000-39500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (79/90)
Loading signals 39500-40000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (80/90)
Loading signals 40000-40500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (81/90)
Loading signals 40500-41000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (82/90)
Loading signals 41000-41500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (83/90)
Loading signals 41500-42000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (84/90)
Loading signals 42000-42500
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (61/90)
Loading signals 30500-31000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (62/90)
Loading signals 31000-31500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (63/90)
Loading signals 31500-32000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (64/90)
Loading signals 32000-32500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (65/90)
Loading signals 32500-33000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (66/90)
Loading signals 33000-33500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (67/90)
Loading signals 33500-34000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (68/90)
Loading signals 34000-34500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (69/90)
Loading signals 34500-35000
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (46/90)
Loading signals 23000-23500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (47/90)
Loading signals 23500-24000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (48/90)
Loading signals 24000-24500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (49/90)
Loading signals 24500-25000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (50/90)
Loading signals 25000-25500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (51/90)
Loading signals 25500-26000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (52/90)
Loading signals 26000-26500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (53/90)
Loading signals 26500-27000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (54/90)
Loading signals 27000-27500
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (31/90)
Loading signals 15500-16000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (32/90)
Loading signals 16000-16500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (33/90)
Loading signals 16500-17000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (34/90)
Loading signals 17000-17500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (35/90)
Loading signals 17500-18000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (36/90)
Loading signals 18000-18500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (37/90)
Loading signals 18500-19000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (38/90)
Loading signals 19000-19500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (39/90)
Loading signals 19500-20000
D

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (16/90)
Loading signals 8000-8500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (17/90)
Loading signals 8500-9000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (18/90)
Loading signals 9000-9500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (19/90)
Loading signals 9500-10000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (20/90)
Loading signals 10000-10500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (21/90)
Loading signals 10500-11000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (22/90)
Loading signals 11000-11500
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (23/90)
Loading signals 11500-12000
Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (24/90)
Loading signals 12000-12500
Done loa

Done loading data.
Parsing labels...
Done parsing labels.
Data file saved. (90/90)
Done exporting 5 conditions in 856.0510852336884 seconds.


In [19]:
ytest = np.load("BIG_DATASET/YTEST/data-2.npy")

(100,)

In [11]:
unique, count

(array([0, 1]), array([93,  7], dtype=int64))

14270

In [134]:
761+19969

20730