In [69]:
import neurokit2 as nk
from PyEMD import EMD
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

In [None]:
def load_ecg_signals() -> list:
    folder_path = './ECG_BY_HEARTH_DISEASES/SR/'

    csv_list = [
        "MUSE_20180111_160053_89000.csv",
        "MUSE_20180111_160140_78000.csv",
        "MUSE_20180111_160159_36000.csv",
        "MUSE_20180111_160355_88000.csv",
        "MUSE_20180111_160410_42000.csv",
    ]

    column_names = ['lead_1', 'lead_2', 'lead_3', 'lead_4', 'lead_5', 'lead_6',
                'lead_7', 'lead_8', 'lead_9', 'lead_10', 'lead_11', 'lead_12']

    ecg_all_signals = []

    for csv_file in csv_list:
        full_path = folder_path + csv_file

        ecg_df = pd.read_csv(full_path, header=0, names=column_names, encoding='utf-8')

        ecg_one_lead = list(ecg_df.loc[:, ecg_df.columns[0]])
        ecg_all_signals.append(ecg_one_lead)

        full_path = ""

    return ecg_all_signals

### 1. Exploration to find the N IMF modes


In [None]:
def find_n_imf_modes(ecg_all_signals: list, sampling_frecuency: int) -> dict:
    N_MODES: int = 10
    SAMPLING_RATE: int = sampling_frecuency

    emd: EMD = EMD()

    emd_report: dict = {
        "emd_data": [], # All signals processed
        "total_ecg": 0, # Total ECG processed
        "imf_max_count": 0,
        "imf_min_count": 0,
        "imf_mean_count": 0,
        "imf_counts": []
    }

    imf_counts: list = []

    # Processing each ECG signal
    for ecg_signal in ecg_all_signals:
        emd_data = {}

        clean_signal = nk.ecg_clean(ecg_signal, sampling_rate=SAMPLING_RATE)

        clean_signal_np = np.asarray(clean_signal)

        # Extract IMFs from ECG signal
        imfs = emd.emd(clean_signal_np, max_imf=N_MODES)
        emd_data["imf_data"] = imfs
        residue = clean_signal_np - np.sum(imfs, axis=0)
        emd_data["imf_residue"] = residue

        num_imfs = imfs.shape[0] if imfs.ndim > 1 else 1
        imf_counts.append(num_imfs)

        # Store the EMD from ECG processed signal
        emd_report["emd_data"].append(emd_data)

    min_modes = np.min(imf_counts)
    max_modes = np.max(imf_counts)
    mean_modes = np.mean(imf_counts)

    emd_report["imf_max_count"] = max_modes
    emd_report["imf_min_count"] = min_modes
    emd_report["imf_mean_count"] = mean_modes
    emd_report["total_ecg"] = len(ecg_all_signals)
    emd_report["imf_counts"] = imf_counts

    print(f"Analizing the IMF in the Dataset")
    print(f"IMF min number: {min_modes}")
    print(f"IMF max number: {max_modes}")
    print(f"IMF mean number: {mean_modes}")

    N_MODES = min_modes
    print(f"\nDesicion: It will use de first {N_MODES} modes for all patients")

    return emd_report

### 2. Extracting the Features Vector


In [None]:
def extract_features_from_imf(imf) -> dict:
    features = {
        "variance": np.var(imf),
        "energy": np.sum(imf**2),
        "skewness": skew(imf),
        "kurtosis": kurtosis(imf)
    }

    return features

def process_imfs(imf_collection: np.ndarray, n_modes: int) -> list:
    patient_feature_vector = []

    if len(imf_collection) >= n_modes:
        for i in range(n_modes):
            current_imf = imf_collection[i,:]
            features = extract_features_from_imf(current_imf)

            patient_feature_vector.extend(list(features.values()))
    else:
        print(f"Warning: A signal had less than {n_modes} IMF and it was omited")

    return patient_feature_vector

### 3. Transform to Dataframe


In [None]:
def to_Dataframe(all_features_vector, n_modes) -> pd.DataFrame:
    feature_names = []

    for i in range(n_modes):
        feature_names.extend(
            [f"IMF{i+1}_var", f"IMF{i+1}_enegy", f"IMF{i+1}_skew", f"IMF{i+1}_kurt"]
        )

    df_features = pd.DataFrame(all_features_vector, columns=feature_names)

    return df_features

### Main


In [None]:
if __name__ == "__main__":
    ecg_all_signals: list = load_ecg_signals()
    emd_content: dict = find_n_imf_modes(ecg_all_signals, sampling_frecuency=500)

    emd_data = emd_content.get("emd_data", [])
    n_modes = emd_content.get("imf_min_count", 0)

    all_features_vector: list = []

    for emd in emd_data:
        imfs = emd.get("imf_data", [])

        patient_features_vector : list= process_imfs(imf_collection=imfs, n_modes=n_modes)
        all_features_vector.append(patient_features_vector)

    df_imf_features: pd.DataFrame = to_Dataframe(all_features_vector, n_modes)

    df_imf_features.to_csv('./emd_imf_features.csv', sep=";", na_rep="N/A", index=False)

Analizing the IMF in the Dataset
IMF min number: 6
IMF max number: 8
IMF mean number: 7.0

Desicion: It will use de first 6 modes for all patients
