In [1]:
# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

import tensorflow as tf
import sys
import numpy as np
import pandas as pd

2024-01-15 14:06:59.655761: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-15 14:06:59.655788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-15 14:06:59.657187: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-15 14:06:59.663433: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
mimic_iv_path = "/cis/home/charr165/Documents/physionet.org/mimiciv/2.2"
mm_dir = "/cis/home/charr165/Documents/multimodal"

output_dir = os.path.join(mm_dir, "preprocessing")
os.makedirs(output_dir, exist_ok=True)

In [4]:
f_path = os.path.join(mimic_iv_path, "hosp", "admissions.csv")
admissions_df = pd.read_csv(f_path, low_memory=False)
admissions_df['admittime'] = pd.to_datetime(admissions_df['admittime'])
admissions_df['dischtime'] = pd.to_datetime(admissions_df['dischtime'])

icustays_df = pd.read_csv(os.path.join(mimic_iv_path, "icu", "icustays.csv"), low_memory=False)
icustays_df['intime'] = pd.to_datetime(icustays_df['intime'])
icustays_df['outtime'] = pd.to_datetime(icustays_df['outtime'])

In [5]:
ecg_folder = '/cis/home/charr165/Documents/physionet.org/files/mimic-iv-ecg/1.0'

records_list_df = pd.read_csv(os.path.join(ecg_folder, 'record_list.csv'))
records_list_df['ecg_time'] = pd.to_datetime(records_list_df['ecg_time'])

In [6]:
def calc_time_delta_hrs(icu_intime, charttime):
    return (charttime - icu_intime).total_seconds() / 3600


row = icustays_df.iloc[0]

out_df = pd.DataFrame()
for index, row in icustays_df.iterrows():
    curr_subject_no = row['subject_id']
    curr_hadm_id = row['hadm_id']
    curr_stay_id = row['stay_id']
    curr_intime = row['intime']
    curr_outtime = row['outtime']

    # Check if subject has ECG data
    curr_subject_ecg = records_list_df[records_list_df['subject_id'] == curr_subject_no]
    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] >= curr_intime]
    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] <= curr_outtime]

    if curr_subject_ecg.shape[0] == 0:
        continue

    for ecg_index, ecg_row in curr_subject_ecg.iterrows():
        tmp_dict = {'subject_id': curr_subject_no,
                    'hadm_id': curr_hadm_id,
                    'stay_id': curr_stay_id,
                    'icu_time_delta': calc_time_delta_hrs(curr_intime, ecg_row['ecg_time']),
                    'ecg_time': ecg_row['ecg_time'],
                    'path': ecg_row['path']}
        tmp_df = pd.DataFrame(tmp_dict, index=[0])
        out_df = pd.concat([out_df, tmp_df], axis=0, ignore_index=True)

In [None]:
import wfdb

f_path = '/cis/home/charr165/Documents/PCM/project_files/models/attia_encoder_256.keras'
encoder = tf.keras.models.load_model(f_path)

def load_ecg(path, stop_index=4096):
    rd_record = wfdb.rdrecord(path) 
    sig = rd_record.p_signal
    sig = sig[:stop_index, :]
    return sig

out_df['embeddings'] = None

from tqdm import tqdm
for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):
    curr_ecg_path = os.path.join(ecg_folder, row['path'])
    wf = load_ecg(curr_ecg_path)
    out_df.at[index, 'embeddings'] = encoder.predict(wf.reshape(1, -1, 12), verbose=0)

In [8]:
import wfdb
import tensorflow as tf
import os
import numpy as np
from tqdm import tqdm

# Load the model
f_path = '/cis/home/charr165/Documents/PCM/project_files/models/attia_encoder_256.keras'
encoder = tf.keras.models.load_model(f_path)

# Function to load ECG
def load_ecg(path, stop_index=4096):
    rd_record = wfdb.rdrecord(path) 
    sig = rd_record.p_signal
    sig = sig[:stop_index, :]
    return sig

# Prepare for batch processing
batch_size = 32  # You can adjust the batch size depending on your GPU memory
ecg_batch = []
batch_indices = []
out_df['embeddings'] = None

# Process in batches
for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):
    curr_ecg_path = os.path.join(ecg_folder, row['path'])
    wf = load_ecg(curr_ecg_path)
    ecg_batch.append(wf.reshape(1, -1, 12))
    batch_indices.append(index)

    # When batch is full, process it
    if len(ecg_batch) == batch_size:
        batch_ecgs = np.vstack(ecg_batch)
        embeddings = encoder.predict(batch_ecgs, verbose=0)

        # Assign embeddings to the correct rows
        for i, idx in enumerate(batch_indices):
            out_df.at[idx, 'embeddings'] = embeddings[i]

        # Reset for next batch
        ecg_batch = []
        batch_indices = []

# Process any remaining ECGs
if ecg_batch:
    batch_ecgs = np.vstack(ecg_batch)
    embeddings = encoder.predict(batch_ecgs, verbose=0)
    for i, idx in enumerate(batch_indices):
        out_df.at[idx, 'embeddings'] = embeddings[i]


  0%|          | 31/72167 [00:00<25:17, 47.53it/s] 2024-01-15 14:11:33.893174: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
100%|██████████| 72167/72167 [1:12:38<00:00, 16.56it/s]


In [9]:
mm_dir = "/cis/home/charr165/Documents/multimodal"
output_dir = os.path.join(mm_dir, "preprocessing")

out_df.to_pickle(os.path.join(output_dir, "ecg_embeddings_icu.pkl"))