# MIMIC-IV ECG and Potassium Level Analysis

This notebook implements matching between ECG waveforms and potassium measurements in MIMIC-IV.

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import wfdb
from pathlib import Path
import re
from datetime import datetime
from tqdm import tqdm

## 2. Define MIMIC4CSVMatcher Class

In [None]:
class MIMIC4CSVMatcher:
    def __init__(self, mimic_path, ecg_path):
        """
        Initialize matcher for MIMIC-IV using CSV files
        Args:
            mimic_path (str): Path to MIMIC-IV base directory containing hosp, icu modules
            ecg_path (str): Path to MIMIC-IV-ECG directory
        """
        self.mimic_path = Path(mimic_path)
        self.ecg_path = Path(ecg_path)

    def load_clinical_data(self):
        """Load required tables from CSV files"""
        print("Loading clinical data...")

        # Load required tables from different modules
        labevents = pd.read_csv(self.mimic_path / 'hosp' / 'labevents_potassium.csv')
        admissions = pd.read_csv(self.mimic_path / 'core' / 'admissions.csv')
        transfers = pd.read_csv(self.mimic_path / 'core' / 'transfers.csv')
        ecg_records = pd.read_csv(self.ecg_path / 'record_list.csv')

        # Convert time columns to datetime
        time_columns = {
            'labevents': ['charttime'],
            'admissions': ['admittime', 'dischtime', 'deathtime'],
            'transfers': ['intime', 'outtime']
        }

        for df_name, cols in time_columns.items():
            df = locals()[df_name]
            for col in cols:
                if col in df.columns:
                    df[col] = pd.to_datetime(df[col])

        return labevents, admissions, transfers, ecg_records

## 3. Implement Data Processing Methods

In [None]:
    def get_potassium_data(self, labevents, admissions):
        """Get potassium measurements with hospital stay information and K+ level labels"""
        print("Processing potassium measurements...")

        # MIMIC-IV Lab Item IDs for potassium 
        potassium_itemids = [50971]  # POTASSIUM serum

        # Filter for potassium measurements
        potassium_labs = labevents[
            (labevents['itemid'].isin(potassium_itemids)) &
            (labevents['valuenum'].notna()) &
            (labevents['valuenum'] > 0) &
            (labevents['valuenum'] <= 30)
        ]

        # Merge with admissions
        merged_data = pd.merge(
            potassium_labs,
            admissions[['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime']],
            on=['subject_id', 'hadm_id'],
            how='inner'
        )

        # Add labels
        def label_k_level(k_value):
            if k_value < 3.5:
                return 'hypokalemia'
            elif k_value >= 5.5:
                return 'hyperkalemia'
            else:
                return 'normal'

        merged_data['k_level_label'] = merged_data['valuenum'].apply(label_k_level)
        k_level_map = {'hypokalemia': 0, 'normal': 1, 'hyperkalemia': 2}
        merged_data['k_level_numeric'] = merged_data['k_level_label'].map(k_level_map)

        return merged_data

## 4. Implement Waveform Loading

In [None]:
    def load_waveform_data(self, record_path):
        """Load ECG waveform data"""
        try:
            record = wfdb.rdrecord(str(self.ecg_path / record_path))
            leads = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
            leads_upper = [lead.upper() for lead in record.sig_name]
            lead_indices = [leads_upper.index(lead.upper()) for lead in leads]
            signals = record.p_signal[:, lead_indices]
            return signals, record.fs, leads
        except Exception as e:
            print(f"Error loading record {record_path}: {str(e)}")
            return None, None, None

## 5. Implement Record Matching

In [None]:
    def find_matching_records(self, time_threshold_hours=3):
        """Find matching ECG records for potassium measurements"""
        # Load clinical data
        labevents, admissions, transfers, ecg_records = self.load_clinical_data()
        potassium_data = self.get_potassium_data(labevents, admissions)

        matches = []
        print("Finding matches...")

        # Process ECG records
        ecg_records['ecg_time'] = pd.to_datetime(ecg_records['ecg_time'])
        ecg_by_subject = dict(tuple(ecg_records.groupby('subject_id')))

        # Find matches
        for _, row in tqdm(potassium_data.iterrows(), total=len(potassium_data)):
            subject_id = row['subject_id']
            if subject_id not in ecg_by_subject:
                continue

            subject_ecgs = ecg_by_subject[subject_id]
            time_diffs = abs(subject_ecgs['ecg_time'] - row['charttime'])
            closest_idx = time_diffs.idxmin()

            if time_diffs[closest_idx].total_seconds() / 3600 <= time_threshold_hours:
                ecg_record = subject_ecgs.loc[closest_idx]
                signals, fs, leads = self.load_waveform_data(ecg_record['path'])

                if signals is not None:
                    matches.append({
                        'subject_id': subject_id,
                        'record_path': ecg_record['path'],
                        'Potassium (serum)': row['valuenum'],
                        'k_level_label': row['k_level_label'],
                        'k_level_numeric': row['k_level_numeric'],
                        'mort_hosp': row['mort_hosp'], 
                        'time_diff_hours': time_diffs[closest_idx].total_seconds() / 3600,
                        'signals': signals,
                        'fs': fs,
                        'leads': leads,
                        'lab_time': row['charttime'],
                        'ecg_time': ecg_record['ecg_time']
                    })

        return matches

## 6. Run Analysis

In [None]:
# Set paths
mimic_path = "/path/to/mimic"
ecg_path = "/path/to/ecg"

# Initialize matcher
matcher = MIMIC4CSVMatcher(mimic_path, ecg_path)

# Find matches
matches = matcher.find_matching_records(time_threshold_hours=1)

## 7. Analyze Results

In [None]:
# Print summary statistics
print(f"\nFound {len(matches)} matching records")

if matches:
    # Analyze time differences
    time_diffs = [m['time_diff_hours'] for m in matches]
    print(f"Average time difference: {np.mean(time_diffs):.2f} hours")

    # Analyze potassium distributions
    k_labels = [m['k_level_label'] for m in matches]
    label_counts = {
        'hypokalemia': k_labels.count('hypokalemia'),
        'normal': k_labels.count('normal'),
        'hyperkalemia': k_labels.count('hyperkalemia')
    }

    print("\nPotassium Level Distribution:")
    for label, count in label_counts.items():
        print(f"{label}: {count} cases ({count / len(k_labels) * 100:.1f}%)")

    # Save results
    np.save('mimic4_matched_data_with_labels.npy', matches)