In [12]:
from pathlib import Path, PurePosixPath
import wfdb
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
from collections import defaultdict
import posixpath

In [10]:
# each subject may be associated with multiple records
database_name = 'mimic3wdb'
subjects = wfdb.get_record_list(database_name)
print(f"The '{database_name}' database contains data from {len(subjects)} subjects")

The 'mimic3wdb' database contains data from 67830 subjects


In [13]:
# store all available records path. load database(wfdb.get_record_list), then load subjects, and load study

database_name = 'mimic3wdb'
all_records = wfdb.get_record_list(database_name)

max_records_to_load = int(0.005 * len(all_records))

random_records = random.sample(all_records, max_records_to_load)

records = [PurePosixPath(record) for record in random_records]

print(f"Loaded {len(records)} random records from the '{database_name}' database.")


Loaded 339 random records from the 'mimic3wdb' database.


In [15]:
# set all ECG leads, then calculate each leads time
required_sigs = {'AVF', 'AVL', 'AVR', 'I', 'II', 'III', 'MCL', 'MCL1', 'V', 'V1', 'V2'}

lead_duration = defaultdict(int)

for record in tqdm(records):
    # read signals
    record_name = record.name
    record_path = posixpath.join(database_name, record.parent, record_name)

    try:
        record_data = wfdb.rdheader(record_name, pn_dir=record_path, rd_segments=True)
    except FileNotFoundError:
        print(f"Record {record_name} not found, skipping...")
        continue  # Skip to the next record if this one is not found

    segments = record_data.seg_name

    for segment in segments:
        if segment == "~":
              continue
        segment_metadata = wfdb.rdheader(record_name=segment, pn_dir=record_path)
        sigs_present = segment_metadata.sig_name

        # calculate time of each record
        sigs_len = segment_metadata.sig_len
        fs = segment_metadata.fs
        sig_sec = int(sigs_len / fs )  # hours

        print(f"Record: {record_name}, Leads: {sigs_present}, Time: {sig_sec} seconds")

        # calculate each leads time
        for sig in sigs_present:
            sig_upper = sig.upper()
            if sig_upper in required_sigs:
                lead_duration[sig_upper] += sig_sec


# print statistic results
print("\nTotal duration per lead:")
for lead, duration in lead_duration.items():
    print(f"{lead}: {duration} seconds")


leads = list(lead_duration.keys())
durations = list(lead_duration.values())

plt.figure(figsize=(10, 5))
plt.bar(leads, durations, color='skyblue')

plt.xlabel("ECG Leads")
plt.ylabel("Total Duration (hours)")
plt.title("Total Duration per ECG Lead")
plt.xticks(rotation=45)

for i, duration in enumerate(durations):
    plt.text(i, duration + 0.1, f"{int(duration/3600)}", ha='center')

plt.legend(["Lead Duration"], loc="upper right")

plt.show()

  0%|          | 0/339 [00:00<?, ?it/s]

3564572_layout
mimic3wdb/35/3564572
Record: 3564572, Leads: ['II', 'V', 'PLETH', 'ICP', 'I', 'III', 'ECG'], Time: 0 seconds
~
mimic3wdb/35/3564572
3564572_0001
mimic3wdb/35/3564572
Record: 3564572, Leads: ['II', 'V'], Time: 1 seconds
3564572_0002
mimic3wdb/35/3564572


  0%|          | 0/339 [00:05<?, ?it/s]


KeyboardInterrupt: 