### Imports

In [1]:
import tensorflow as tf # Models ran in venv python 3.9.16 with GPU computing support
from tensorflow.keras.utils import plot_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import wfdb
import ast
import ecg_plot
import xgboost as xgb 
import tensorflow as tf # Models ran in venv python 3.9.16 with GPU computing support
import os
import scipy.io as sio

### PTB-XL Loading

In [3]:
sr = 100
quick_load = False
load_model = True
save_model = False

In [13]:
with tf.device('/CPU:0'):
    ### Loading raw data into mutable Datframes
    def load_data():
        ptb = pd.read_csv('../data/PTBXL/ptbxl_database.csv')
        def load_raw_data(df, sampling_rate, path):
            if(sampling_rate == 100):
                data = [wfdb.rdsamp(path+f) for f in df.filename_lr]
            else:
                data = [wfdb.rdsamp(path+f) for f in df.filename_hr]
            data = np.array([signal for signal, meta in data])
            return data
        
        # load and convert annotation data
        Y = pd.read_csv('../data/PTBXL/ptbxl_database.csv', index_col='ecg_id')
        Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))

        # Load raw signal data
        X = load_raw_data(Y, sr, '../data/PTBXL/')

        # Load scp_statements.csv for diagnostic aggregation
        agg_df = pd.read_csv('../data/PTBXL/scp_statements.csv', index_col=0)
        agg_df = agg_df[agg_df.diagnostic == 1]

        def aggregate_diagnostic(y_dic):
            tmp = []
            for key in y_dic.keys():
                if key in agg_df.index:
                    tmp.append(agg_df.loc[key].diagnostic_class)
            return list(set(tmp))


        # Apply diagnostic superclass
        Y['diagnostic_superclass'] = Y.scp_codes.apply(aggregate_diagnostic)
    
        return X,Y


if(quick_load):
    X_xl = np.load('../data/PTBXL/X_raw.npy')
    Y_xl = pd.read_csv('../data/PTBXL/Y_raw.csv')
    Y_xlPlus = pd.read_csv('../data/PTBXLPlus/Y_rawPlus.csv')
else:
    X_xl,Y_xl = load_data()

### PTB-XL+ Loading

In [17]:
with tf.device('/CPU:0'):
    def load_delifeatures():
        # Load Deli features
        deli_csv = pd.read_csv('../data/PTBXLPlus/ecgdeli_features.csv')

        return deli_csv

deli_csv = load_delifeatures()

In [20]:
Y = deli_csv.join(Y_xl, on='ecg_id', how='inner')

In [30]:
Y.columns.to_list()

['ecg_id',
 'PQ_Int_I',
 'PQ_Int_I_iqr',
 'PQ_Int_I_count',
 'PQ_Int_II',
 'PQ_Int_II_iqr',
 'PQ_Int_II_count',
 'PQ_Int_III',
 'PQ_Int_III_iqr',
 'PQ_Int_III_count',
 'PQ_Int_V1',
 'PQ_Int_V1_iqr',
 'PQ_Int_V1_count',
 'PQ_Int_V2',
 'PQ_Int_V2_iqr',
 'PQ_Int_V2_count',
 'PQ_Int_V3',
 'PQ_Int_V3_iqr',
 'PQ_Int_V3_count',
 'PQ_Int_V4',
 'PQ_Int_V4_iqr',
 'PQ_Int_V4_count',
 'PQ_Int_V5',
 'PQ_Int_V5_iqr',
 'PQ_Int_V5_count',
 'PQ_Int_V6',
 'PQ_Int_V6_iqr',
 'PQ_Int_V6_count',
 'PQ_Int_aVF',
 'PQ_Int_aVF_iqr',
 'PQ_Int_aVF_count',
 'PQ_Int_aVL',
 'PQ_Int_aVL_iqr',
 'PQ_Int_aVL_count',
 'PQ_Int_aVR',
 'PQ_Int_aVR_iqr',
 'PQ_Int_aVR_count',
 'PQ_Int_Global',
 'PQ_Int_Global_iqr',
 'PQ_Int_Global_count',
 'PR_Int_I',
 'PR_Int_I_iqr',
 'PR_Int_I_count',
 'PR_Int_II',
 'PR_Int_II_iqr',
 'PR_Int_II_count',
 'PR_Int_III',
 'PR_Int_III_iqr',
 'PR_Int_III_count',
 'PR_Int_V1',
 'PR_Int_V1_iqr',
 'PR_Int_V1_count',
 'PR_Int_V2',
 'PR_Int_V2_iqr',
 'PR_Int_V2_count',
 'PR_Int_V3',
 'PR_Int_V3_iqr',


In [26]:
# Save Y as a csv
Y.to_csv('../data/PTBXLPlus/Y_rawPlus.csv', index=False)

# Load Y
Y = pd.read_csv('../data/PTBXLPlus/Y_rawPlus.csv')