### This notebook shows data extraction and initial cleaning of MIMIC-IV raw database

#### Extract 52 subjects with valid PPG, ECG, ABP and save locally in .mat format 

In [None]:
# All necessary libraries should be here

import wfdb
from scipy.io import savemat
import scipy.io as sio
import matplotlib.pyplot as plt
import scipy.signal as sp
from scipy import signal
import neurokit2 as nk

In [None]:
# Here we have already 52 records from 52 subjects that contain all 3 signals: PPG, ECG, ABP and the duration of all signals at least 10 min. 

database_name = 'mimic4wdb/0.1.0' # The name of the MIMIC IV Waveform Database on Physionet

# Segment for analysis
segment_names = ['83404654_0005', '82924339_0007', '84248019_0005', 
                 '82439920_0004', '82800131_0002', '84304393_0001', 
                 '89464742_0001', '88958796_0004', '88995377_0001', 
                 '85230771_0004', '86643930_0004', '81250824_0005', 
                 '87706224_0003', '83058614_0005', '82803505_0017', 
                 '88574629_0001', '87867111_0012', '84560969_0001', 
                 '87562386_0001', '88685937_0001', '86120311_0001', 
                 '89866183_0014', '89068160_0002', '86380383_0001', 
                 '85078610_0008', '87702634_0007', '84686667_0002', 
                 '84802706_0002', '81811182_0004', '84421559_0005', 
                 '88221516_0007', '80057524_0005', '84209926_0018', 
                 '83959636_0010', '89989722_0016', '89225487_0007', 
                 '84391267_0001', '80889556_0002', '85250558_0011', 
                 '84567505_0005', '85814172_0007', '88884866_0005', 
                 '80497954_0012', '80666640_0014', '84939605_0004', 
                 '82141753_0018', '86874920_0014', '84505262_0010', 
                 '86288257_0001', '89699401_0001', '88537698_0013', 
                 '83958172_0001']

segment_dirs = ['mimic4wdb/0.1.0/waves/p100/p10020306/83404654', 'mimic4wdb/0.1.0/waves/p101/p10126957/82924339', 
                'mimic4wdb/0.1.0/waves/p102/p10209410/84248019', 'mimic4wdb/0.1.0/waves/p109/p10952189/82439920', 
                'mimic4wdb/0.1.0/waves/p111/p11109975/82800131', 'mimic4wdb/0.1.0/waves/p113/p11392990/84304393', 
                'mimic4wdb/0.1.0/waves/p121/p12168037/89464742', 'mimic4wdb/0.1.0/waves/p121/p12173569/88958796', 
                'mimic4wdb/0.1.0/waves/p121/p12188288/88995377', 'mimic4wdb/0.1.0/waves/p128/p12872596/85230771', 
                'mimic4wdb/0.1.0/waves/p129/p12933208/86643930', 'mimic4wdb/0.1.0/waves/p130/p13016481/81250824', 
                'mimic4wdb/0.1.0/waves/p132/p13240081/87706224', 'mimic4wdb/0.1.0/waves/p136/p13624686/83058614', 
                'mimic4wdb/0.1.0/waves/p137/p13791821/82803505', 'mimic4wdb/0.1.0/waves/p141/p14191565/88574629', 
                'mimic4wdb/0.1.0/waves/p142/p14285792/87867111', 'mimic4wdb/0.1.0/waves/p143/p14356077/84560969', 
                'mimic4wdb/0.1.0/waves/p143/p14363499/87562386', 'mimic4wdb/0.1.0/waves/p146/p14695840/88685937', 
                'mimic4wdb/0.1.0/waves/p149/p14931547/86120311', 'mimic4wdb/0.1.0/waves/p151/p15174162/89866183', 
                'mimic4wdb/0.1.0/waves/p153/p15312343/89068160', 'mimic4wdb/0.1.0/waves/p153/p15342703/86380383', 
                'mimic4wdb/0.1.0/waves/p155/p15552902/85078610', 'mimic4wdb/0.1.0/waves/p156/p15649186/87702634', 
                'mimic4wdb/0.1.0/waves/p158/p15857793/84686667', 'mimic4wdb/0.1.0/waves/p158/p15865327/84802706', 
                'mimic4wdb/0.1.0/waves/p158/p15896656/81811182', 'mimic4wdb/0.1.0/waves/p159/p15920699/84421559', 
                'mimic4wdb/0.1.0/waves/p160/p16034243/88221516', 'mimic4wdb/0.1.0/waves/p165/p16566444/80057524', 
                'mimic4wdb/0.1.0/waves/p166/p16644640/84209926', 'mimic4wdb/0.1.0/waves/p167/p16709726/83959636', 
                'mimic4wdb/0.1.0/waves/p167/p16715341/89989722', 'mimic4wdb/0.1.0/waves/p168/p16818396/89225487', 
                'mimic4wdb/0.1.0/waves/p170/p17032851/84391267', 'mimic4wdb/0.1.0/waves/p172/p17229504/80889556', 
                'mimic4wdb/0.1.0/waves/p173/p17301721/85250558', 'mimic4wdb/0.1.0/waves/p173/p17325001/84567505', 
                'mimic4wdb/0.1.0/waves/p174/p17490822/85814172', 'mimic4wdb/0.1.0/waves/p177/p17738824/88884866', 
                'mimic4wdb/0.1.0/waves/p177/p17744715/80497954', 'mimic4wdb/0.1.0/waves/p179/p17957832/80666640', 
                'mimic4wdb/0.1.0/waves/p180/p18080257/84939605', 'mimic4wdb/0.1.0/waves/p181/p18109577/82141753', 
                'mimic4wdb/0.1.0/waves/p183/p18324626/86874920', 'mimic4wdb/0.1.0/waves/p187/p18742074/84505262', 
                'mimic4wdb/0.1.0/waves/p188/p18824975/86288257', 'mimic4wdb/0.1.0/waves/p191/p19126489/89699401', 
                'mimic4wdb/0.1.0/waves/p193/p19313794/88537698', 'mimic4wdb/0.1.0/waves/p196/p19619764/83958172']

In [None]:
# Utility functions:
## load_all_records() extracts records from the MIMIC-IV Waveform Database 
def load_all_records(segment_names, segment_dirs):
    records = {}
    for i, segment_name in enumerate(segment_names):
        print(f"Loading Record {i}")
        segment_dir = segment_dirs[i]
        record_data = wfdb.rdrecord(record_name=segment_name, pn_dir=segment_dir)
        records[segment_name] = {
            'record': record_data,
            'fs': record_data.fs
        }
    return records

# Extract signals (ABP, PPG, ECG) from the record
def extract_signals(record):
    abp_col = ppg_col = ecg_col = None
    for sig_no, sig_name in enumerate(record.sig_name):
        if "ABP" in sig_name:
            abp_col = sig_no
        elif "Pleth" in sig_name:
            ppg_col = sig_no
        elif "II" in sig_name:
            ecg_col = sig_no
    return (record.p_signal[:, abp_col] if abp_col is not None else None,
            record.p_signal[:, ppg_col] if ppg_col is not None else None,
            record.p_signal[:, ecg_col] if ecg_col is not None else None)

# Split signals into 10-sec episodes
def split_signal(signal, duration=10, fs=62.5):
    fragment_length = int(duration * fs)
    return [signal[i:i + fragment_length] for i in range(0, len(signal), fragment_length)]

# Data Cleaning part: 
## --> remove NaN values from PPG, ECG, ABP signals
## --> establish initial range of ABP data "including" [60 180]
def initial_filter(ppg_fragments, ecg_fragments, abp_fragments):
    filtered_ecg, filtered_abp, filtered_ppg, removed_indices = [], [], [], []
    for i, (ecg, abp, ppg) in enumerate(zip(ecg_fragments, abp_fragments, ppg_fragments)):
        if (np.isnan(ecg).any() or np.isnan(abp).any() or np.isnan(ppg).any()
                #(ecg < -6).any() or (ecg > 6).any() or
                (abp < 60).any() or (abp > 180).any()):
            removed_indices.append(i)
        else:
            filtered_ecg.append(ecg)
            filtered_abp.append(abp)
            filtered_ppg.append(ppg)
    return filtered_ecg, filtered_abp, filtered_ppg, removed_indices

In [None]:
# Extracted records from the MIMIC-IV Waveform Database
extracted_records = load_all_records(segment_names, segment_dirs)

In [None]:
# Extracted ABP, ECG, PPG signals from records and saved in .mat file to easily access raw data

M = {}  # Initialize the dictionary to store all records
w = 1  # Start indexing from 1

for i in extracted_records:
    record = extracted_records.get(i).get('record')
    
    abp, ppg, ecg = extract_signals(record)
    
    M[str(w)] = {
        'abp': abp,  
        'ecg': ecg,  
        'ppg': ppg   
    }
    w += 1  


M['comments'] = "This file contains raw ABP, ECG, and PPG signals for multiple records extracted from the dataset. No cleaning, no filtering."

savemat('MIMIC-IV_raw.mat', M)

print("Data saved to 'MIMIC-IV_raw.mat' with comments.")
