Per-Patient ECG Analysis with SBS Scores

In [2]:
# Import Modules
import os
import pandas as pd
import numpy as np
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt
import neurokit2 as nk

In [3]:
def load_from_excel(file_path):
    # Implement this function to load data from Excel
    # Return the data and column names
    data = pd.read_excel(file_path)
    return data, data.columns.tolist()

In [4]:
# Initialize Variables
data_dir = 'C:/Users/sidha/OneDrive/Sid Stuff/PROJECTS/iMEDS Design Team/Data Analysis/PedAccel/ECG/PICU_ECG_Extract'
original_sampling_rate = 250
lead_time = 10
window_size = 15

In [6]:
# Loop through patient data files and iteratively store SBS scores and ECG data in .mat file for each patient

for patient in os.listdir(data_dir):
    print(f"Processing {patient}")
    patient_dir = os.path.join(data_dir, patient)
    print(patient_dir)

    sbs_file = os.path.join(patient_dir, f'{patient}_SBS_Scores_ECG.xlsx')
    if not os.path.isfile(sbs_file):
        raise FileNotFoundError(f'EPIC file not found: {sbs_file}')
    
    print(sbs_file)
    
    epic_data, epic_names = load_from_excel(sbs_file)
    
    epic_data.dropna(subset=['SBS'], inplace=True)
    epic_data['dts'] = pd.to_datetime(epic_data['Time_uniform'], format='mixed')
    epic_data['start_time'] = epic_data['dts'] - pd.Timedelta(lead_time, 'minutes')
    epic_data['end_time'] = epic_data['dts'] + pd.Timedelta(window_size - lead_time, 'minutes')
    print(len(epic_data))
    
    total_sbs_scores = len(epic_data)
    score_count = 0
    
    # Initialize the .mat file with empty arrays
    mat_file_path = os.path.join(patient_dir, f'{patient}_ECG.mat')
    if not os.path.exists(mat_file_path):
        initial_data = {
            'sbs_score': np.array([]),
            'start_time': np.array([], dtype='datetime64[ns]'),
            'end_time': np.array([], dtype='datetime64[ns]'),
            'ecg1': np.array([]),
            'ecg2': np.array([]),
            'ecg3': np.array([])
        }
        savemat(mat_file_path, initial_data)
    
    for index, row in epic_data.iterrows():
        start_time = row['start_time']
        end_time = row['end_time']
        sbs_score = row['SBS']
        
        for file in os.listdir(patient_dir):
            if file.endswith('.mat') and file != f'{patient}_ECG.mat':
                print(f"Processing {file}")
                raw_data = loadmat(os.path.join(patient_dir, file))

                time = raw_data['Time'].squeeze()
                relative_time = raw_data['Relative Time (sec)'].squeeze()
                ecg_1 = raw_data['GE_WAVE_ECG_1_ID'].squeeze()
                ecg_2 = raw_data['GE_WAVE_ECG_2_ID'].squeeze()
                ecg_3 = raw_data['GE_WAVE_ECG_3_ID'].squeeze()
                time_uniform = np.array(raw_data['Time_uniform'][0]).squeeze()
                
                time_uniform_list = [item[0] for item in time_uniform.flatten()]
                time_uniform_f = pd.to_datetime(time_uniform_list, format='%m/%d/%Y %I:%M:%S %p')
                
                if start_time in time_uniform_f:
                    print("Found SBS Score")
                    mask = (time_uniform_f >= start_time) & (time_uniform_f <= end_time)
                    ecg_1_segment = ecg_1[mask]
                    ecg_2_segment = ecg_2[mask]
                    ecg_3_segment = ecg_3[mask]
                    
                    # Process ECG and create plots
                    l1_signal, l1_info = nk.ecg_process(np.array(ecg_1_segment), sampling_rate=original_sampling_rate)
                    l2_signal, l2_info = nk.ecg_process(np.array(ecg_2_segment), sampling_rate=original_sampling_rate)
                    l3_signal, l3_info = nk.ecg_process(np.array(ecg_3_segment), sampling_rate=original_sampling_rate)
                    
                    # Plot and save ECG for leads 1, 2, 3
                    for lead, signal, info in [(1, l1_signal, l1_info), (2, l2_signal, l2_info), (3, l3_signal, l3_info)]:
                        nk.ecg_plot(signal, info)
                        fig = plt.gcf()
                        fig.suptitle(f'ECG Lead {lead}', fontsize=16)
                        fig.set_size_inches(10, 12, forward=True)
                        fig.savefig(os.path.join(patient_dir, f"ECG_Lead{lead}_{patient}_{score_count}_SBS{sbs_score}.png"))
                        plt.close(fig)
                    
                    plt.rc('font', size=8)
                    peaks, info = nk.ecg_peaks(l1_signal["ECG_Clean"], sampling_rate=100, correct_artifacts=True)
                    hrv_indices = nk.hrv(peaks, sampling_rate=100, show=True)
                    fig = plt.gcf()
                    fig.savefig(os.path.join(patient_dir, f"HRV_plot_{patient}_{score_count}_SBS{sbs_score}.png"), dpi=300, bbox_inches='tight')
                    plt.close(fig)
                    
                    score_count += 1
                    
                    # Load existing data
                    existing_data = loadmat(mat_file_path)
                    
                    # Convert start_time and end_time to object dtype
                    existing_data['start_time'] = existing_data['start_time'].astype('object')
                    existing_data['end_time'] = existing_data['end_time'].astype('object')
                    
                    # Append new data
                    existing_data['sbs_score'] = np.append(existing_data['sbs_score'], sbs_score)
                    existing_data['start_time'] = np.append(existing_data['start_time'], start_time.to_datetime64())
                    existing_data['end_time'] = np.append(existing_data['end_time'], end_time.to_datetime64())

                    # Handle ECG data
                    for ecg_key, ecg_segment in [('ecg1', ecg_1_segment), ('ecg2', ecg_2_segment), ('ecg3', ecg_3_segment)]:
                        ecg_segment = ecg_segment.reshape(1, -1)  # Ensure 2D array
                        if existing_data[ecg_key].size == 0:
                            existing_data[ecg_key] = ecg_segment
                        else:
                            # Ensure the new segment has the same number of columns as the existing data
                            min_cols = min(existing_data[ecg_key].shape[1], ecg_segment.shape[1])
                            existing_data[ecg_key] = np.vstack((existing_data[ecg_key][:, :min_cols], ecg_segment[:, :min_cols]))

                    # Ensure each column in existing_data is np.array
                    for key in existing_data:
                        if not isinstance(existing_data[key], np.ndarray):
                            existing_data[key] = np.array(existing_data[key])

                    # Save updated data
                    savemat(mat_file_path, existing_data)
                    print(f"Updated data for patient {patient}, SBS score {score_count}")
                    
                    break  # Break the inner loop after processing this SBS score
        
        if score_count >= total_sbs_scores:
            print(f"All {total_sbs_scores} SBS scores processed for patient {patient}")
            break  # Break the outer loop if all SBS scores have been processed

print("Processing complete for all patients")

Processing Patient3
C:/Users/sidha/OneDrive/Sid Stuff/PROJECTS/iMEDS Design Team/Data Analysis/PedAccel/ECG/PICU_ECG_Extract\Patient3
C:/Users/sidha/OneDrive/Sid Stuff/PROJECTS/iMEDS Design Team/Data Analysis/PedAccel/ECG/PICU_ECG_Extract\Patient3\Patient3_SBS_Scores_ECG.xlsx
11
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Found SBS Score
Updated data for patient Patient3, SBS score 1
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Found SBS Score
Updated data for patient Patient3, SBS score 2
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Found SBS Score


  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2


Updated data for patient Patient3, SBS score 3
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Found SBS Score
Updated data for patient Patient3, SBS score 4
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Found SBS Score


  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2


Updated data for patient Patient3, SBS score 5
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk1.mat
Found SBS Score
Updated data for patient Patient3, SBS score 6
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk1.mat
Found SBS Score
Updated data for patient Patient3, SBS score 7
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk1.mat
Found SBS Score
Updated data for patient Patient3, SBS score 8
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk1.mat
Found SBS Score
Updated data for patient Patient3, SBS score 9
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_or

  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2


Updated data for patient Patient3, SBS score 10
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk0.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk1.mat
Processing Patient3_Event_Row_1_Data_zero_order_interpolation_chunk2.mat
Found SBS Score


  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2
  mrrs /= th2


Updated data for patient Patient3, SBS score 11
All 11 SBS scores processed for patient Patient3
Processing complete for all patients
