In [14]:
import glob
import os
import sys

import matlab.engine
import pandas as pd
from pymatreader import read_mat
from pathlib import Path

In [15]:
def load_matlab_table(engine, mat_file_path):
    # Create a temporary filename for the processed data
    mat_file_path = str(mat_file_path)
    base_dir = os.path.dirname(mat_file_path)
    filename = os.path.basename(mat_file_path)
    temp_file = os.path.join(base_dir, f"temp_processed_{filename}")

    try:
        # Convert table to struct
        engine.convert_table_to_struct(mat_file_path, temp_file, nargout=0)

        # Read with pymatreader
        data_dict = read_mat(temp_file)

        # Convert to dataframe
        clean_dict = {k: v for k, v in data_dict.items() if not k.startswith("__")}
        df = pd.DataFrame(clean_dict)

        return df

    except Exception as e:
        print(f"Error processing {mat_file_path}: {e}")
        return None

    finally:
        # Delete the temp file
        if os.path.exists(temp_file):
            os.remove(temp_file)

In [None]:
data = []

print("Starting MATLAB Engine...")
eng = matlab.engine.start_matlab()

matlab_script_dir = "/home/alanh/projects/ecg/matlab"
eng.addpath(matlab_script_dir, nargout=0)

data_dir_path = Path("/home/alanh/projects/ecg/data/raw")

for file_path in data_dir_path.rglob("*.mat"):
    print(f"Processing: {file_path.name}")
    df = load_matlab_table(eng, file_path)
    
    if df is not None:
        data.append(df)

print(f"Successfully loaded {len(data)} files.")

Starting MATLAB Engine...
Processing: sample_data.mat
Processed: /home/alanh/projects/ecg/data/raw/sample_data.mat -> /home/alanh/projects/ecg/data/raw/temp_processed_sample_data.mat
Successfully loaded 1 files.


In [35]:
display(data[0].describe())
display(data[0].info())
display(data[0].head())
display(data[0]["ECG_Data"].loc[0])

Unnamed: 0,Device,Start_Time,ECG_Transition,CPR_Transition,Valid_ECG,ECG_SR,Valid_PCI,Valid
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,1.1,795.717326,0.15,0.05,1.0,168.75,1.0,1.0
std,0.307794,742.333737,0.366348,0.223607,0.0,61.170061,0.0,0.0
min,1.0,56.04,0.0,0.0,1.0,125.0,1.0,1.0
25%,1.0,93.39121,0.0,0.0,1.0,125.0,1.0,1.0
50%,1.0,659.54636,0.0,0.0,1.0,125.0,1.0,1.0
75%,1.0,1326.13475,0.0,0.0,1.0,250.0,1.0,1.0
max,2.0,2499.516,1.0,1.0,1.0,250.0,1.0,1.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Device             20 non-null     uint8  
 1   Start_Time         20 non-null     float64
 2   ECG_State          20 non-null     object 
 3   ECG_State_General  20 non-null     object 
 4   ECG_Transition     20 non-null     uint8  
 5   CPR_State          20 non-null     object 
 6   CPR_Transition     20 non-null     uint8  
 7   Valid_ECG          20 non-null     uint8  
 8   ECG_Data           20 non-null     object 
 9   ECG_SR             20 non-null     uint8  
 10  Valid_PCI          20 non-null     uint8  
 11  Valid              20 non-null     uint8  
dtypes: float64(1), object(4), uint8(7)
memory usage: 1.0+ KB


None

Unnamed: 0,Device,Start_Time,ECG_State,ECG_State_General,ECG_Transition,CPR_State,CPR_Transition,Valid_ECG,ECG_Data,ECG_SR,Valid_PCI,Valid
0,1,242.20772,VF,VF,0,CPR,0,1,"[-0.04250561064684294, -0.0160402504454218, -0...",125,1,1
1,1,287.20772,VF,VF,0,No CPR,0,1,"[-0.0461050700520934, -0.04934010102636918, -0...",125,1,1
2,1,942.20772,Organized 3,Organized,0,CPR,0,1,"[0.30100731318863555, 0.24830255414416078, 0.1...",125,1,1
3,1,984.70772,Organized 3,Organized,0,No CPR,0,1,"[-0.050434382056786166, -0.06309755212314758, ...",125,1,1
4,1,65.74511,Asystole,Asystole,0,CPR,0,1,"[-0.6570174535060926, -0.5292728498692529, -0....",125,1,1


array([-0.04250561, -0.01604025, -0.03249409, -0.08271824, -0.11622839,
       -0.10388394, -0.06571682, -0.04536908, -0.05133279, -0.06789087,
       -0.08573887, -0.10553779, -0.13361422, -0.14680492, -0.11369661,
       -0.05574702, -0.01132335,  0.01301202,  0.02734092,  0.04129231,
        0.0676628 ,  0.09493693,  0.10654147,  0.10901453,  0.09521038,
        0.05903113,  0.0113614 , -0.03494528, -0.06752006, -0.08237015,
       -0.0770562 , -0.03263251,  0.04215359,  0.10216883,  0.12694823,
        0.14133572,  0.16572329,  0.17960427,  0.17728989,  0.17263326,
        0.15496252,  0.11955643,  0.07384535,  0.01119277, -0.05226591,
       -0.10339109, -0.16339395, -0.21974352, -0.23545548, -0.19693688,
       -0.12102233, -0.05135179, -0.00777405,  0.01242344, -0.00142328,
       -0.03359833, -0.05503931, -0.07718633, -0.10756889, -0.10625387,
       -0.06275517, -0.02526909, -0.00985456, -0.01113846, -0.03086337,
       -0.04795411, -0.05154092, -0.05550672, -0.06318767, -0.06