In [2]:
import pandas as pd
import numpy as np
import os
import csv
import pickle
from scipy import signal

import tensorflow as tf
from tensorflow import keras

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

Num GPUs Available:  2


2022-05-04 21:55:55.265757: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:62:00.0, compute capability: 7.5
/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:63:00.0, compute capability: 7.5



2022-05-04 21:55:57.836552: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 161 MB memory:  -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:62:00.0, compute capability: 7.5
2022-05-04 21:55:57.841708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9329 MB memory:  -> device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:63:00.0, compute capability: 7.5


# Test Train Split

In [3]:
path = '/project/GutIntelligenceLab/bwl3xy/ecg'
os.chdir(path)

In [4]:
# Load data from cleaned pickles
database = pd.read_pickle('data/database.pkl')
ecg_data = pd.read_pickle('data/ecg_data.pkl')

In [5]:
mapping = dict(enumerate(database['subclass'].unique()))
mapping = {v: k for k, v in mapping.items()}
database['subclass'] = database['subclass'].replace(mapping)
mapping

{'NORM': 0, 'MI': 1, 'STTC': 2, 'HYP': 3, 'CD': 4}

In [7]:
X = np.array(ecg_data)
Y = np.array(database['subclass'])

In [8]:
# Split data from folds provided by ptb xl
val_fold  = [8,9]
test_fold = [10] 

train_idx = np.where(np.isin(database['strat_fold'], val_fold+test_fold, invert=True))
val_idx = np.where(np.isin(database['strat_fold'], val_fold))
test_idx = np.where(np.isin(database['strat_fold'], test_fold))

X_train = list(X[train_idx])
X_val   = list(X[val_idx])
X_test  = list(X[test_idx])
Y_train = list(Y[train_idx])
Y_val   = list(Y[val_idx])
Y_test  = list(Y[test_idx])

## Sliding Window

In [9]:
def sliding_window(array, label, sequence_length=250, sequence_stride=50):
    '''
    Creates a tuple of arrays of length sequence_length and stride sequence_stride. 
    
    INPUTS:
        array: numpy array of 1d or 2d sequential data
        sequence length: length of resulting sequences
        sequence stride: stride between initial index of sequences
        
    OUTPUTS:
        tuple of arrays
    '''
    n_sequences = int((array.shape[1]-sequence_length)/sequence_stride)
    labels = [label]*n_sequences
    
    start_idx = 0
    seqs = []
    for i in range(n_sequences):
        seq = array[:,start_idx:start_idx+sequence_length]
        start_idx += sequence_stride
        seqs.append(seq)
    
    return seqs, labels

def apply_sliding_window(X, Y, sequence_length=250, sequence_stride=50):
    '''
    Applies sliding window to a list of arrays
    '''
    X_windows = []
    Y_windows = []
    for i in range(len(X)):
        seqs, labels = sliding_window(X[i], Y[i], sequence_length, sequence_stride)
        X_windows += seqs 
        Y_windows += labels
    return X_windows, Y_windows

In [10]:
sequence_length = 250
sequence_stride = 50

X_train_windows, Y_train_windows = apply_sliding_window(X_train, Y_train, sequence_length, sequence_stride)
X_val_windows, Y_val_windows = apply_sliding_window(X_val, Y_val, sequence_length, sequence_stride)
X_test_windows, Y_test_windows = apply_sliding_window(X_test, Y_test, sequence_length, sequence_stride)

## Wite to TFRecord

In [17]:
# Function to serialize ECG data
def to_TFRecord(file_path, data):
    with tf.io.TFRecordWriter(file_path) as writer:
        for i in range(len(data)):
            serialized_ecg = tf.io.serialize_tensor(data[i])
            writer.write(serialized_ecg.numpy())

In [18]:
# to_TFRecord('data/X_train.tfrecord', X_train_windows)
# to_TFRecord('data/X_val.tfrecord', X_val_windows)
# to_TFRecord('data/X_test.tfrecord', X_test_windows)

to_TFRecord('data/Y_train.tfrecord', Y_train_windows)
to_TFRecord('data/Y_val.tfrecord', Y_val_windows)
to_TFRecord('data/Y_test.tfrecord', Y_test_windows)

2022-05-04 21:37:03.067872: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9644 MB memory:  -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:62:00.0, compute capability: 7.5
2022-05-04 21:37:03.069486: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9644 MB memory:  -> device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:63:00.0, compute capability: 7.5


# Spectrograms to TFRecords

In [28]:
def ecg_to_spec(ecg, nperseg=250, noverlap=50):
    n_leads = ecg.shape[0]
    spec = []
    for i in range(n_leads):
        f, t, Sxx = signal.spectrogram(ecg[i,:], nperseg=nperseg, noverlap=noverlap)
        spec.append(Sxx)
    return np.array(spec)

def apply_ecg_to_spec(ecg_array, nperseg=50, noverlap=25):
    spec_array = []
    for ecg in ecg_array:
        spec_array.append(ecg_to_spec(ecg, nperseg, noverlap))
    return spec_array

In [30]:
# X_train_spec = apply_ecg_to_spec(X_train)
# X_val_spec = apply_ecg_to_spec(X_val)
# X_test_spec = apply_ecg_to_spec(X_test)

# X_train_spec_windows = apply_ecg_to_spec(X_train_windows, nperseg=50, noverlap=25)
# X_val_spec_windows = apply_ecg_to_spec(X_val_windows, nperseg=50, noverlap=25)
# X_test_spec_windows = apply_ecg_to_spec(X_test_windows, nperseg=50, noverlap=25)

In [32]:
print("Raw signal:", X_train[0].shape)
print("Raw spectrogram:", X_train_spec[0].shape)
print('\n')
print("Window signal:", X_train_windows[0].shape)
print("Window spectrogram:", X_train_spec_windows[0].shape)

Raw signal: (12, 1000)
Raw spectrogram: (12, 26, 39)


Window signal: (12, 250)
Window spectrogram: (12, 26, 9)


In [50]:
def to_multiple_TFrecords(data, name_prefix, n_parts=10):
    data_dir = 'data'
    path_format = os.path.join(data_dir, "{}_{:02d}.tfrecord")

    filepaths = []
    m = len(data)
    
    for file_idx, row_indices in enumerate(np.array_split(np.arange(m), n_parts)):
        partial_tfr = path_format.format(name_prefix, file_idx)
        filepaths.append(partial_tfr)
        to_TFRecord(partial_tfr, [data[i] for i in row_indices])

    return filepaths

In [1]:
# to_multiple_TFrecords(X_train_spec, 'X_train_spec', n_parts=10)
# to_TFRecord('data/X_val_spec.tfrecord', X_val_spec)
# to_TFRecord('data/X_test_spec.tfrecord', X_test_spec)

# to_TFRecord('data/X_train_window_spec.tfrecord', X_train_spec_windows)
# to_TFRecord('data/X_val_window_spec.tfrecord', X_val_spec_windows)
# to_TFRecord('data/X_test_window_spec.tfrecord', X_test_spec_windows)