In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from biosppy.signals.tools import filter_signal
from pyentrp import entropy as ent
from tqdm import tqdm
from scipy import stats
from tsfresh.feature_extraction.feature_calculators import (
    fourier_entropy, sample_entropy, approximate_entropy, binned_entropy,
    permutation_entropy
)
import neurokit2 as nk
from hrvanalysis.preprocessing import get_nn_intervals

import sys
sys.path.append('..')
from util import load_base_data, hist_statistic_per_class

import warnings
warnings.simplefilter("ignore")

plt.rcParams['figure.figsize'] = [20, 5]

SAMPLING_RATE = 300



In [2]:
DATA_PATH = "../data/base"
X_train, y_train, X_test = load_base_data()

In [3]:
def calculate_rri_derivatives(X):
    df = pd.DataFrame(index = X.index)
    df["rri_vel_min"] = 0
    df["rri_vel_max"] = 0
    df["rri_vel_mean"] = 0
    df["rri_vel_median"] = 0
    df["rri_vel_std"] = 0
    df["rri_vel_skew"] = 0
    df["rri_vel_kurtosis"] = 0
    
    df["rri_acc_min"] = 0
    df["rri_acc_max"] = 0
    df["rri_acc_mean"] = 0
    df["rri_acc_median"] = 0
    df["rri_acc_std"] = 0
    df["rri_acc_skew"] = 0
    df["rri_acc_kurtosis"] = 0
    
    for i in tqdm(range(len(X))):
        signal = X.loc[i].dropna().to_numpy(dtype='float32')

        try:
            signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
        except:
            signal = 1-signal
            signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
        r_peaks = info["ECG_R_Peaks"]
        rri = r_peaks[1:]-r_peaks[:-1]
        rri_vel = rri[1:] - rri[:-1]
        rri_acc = rri_vel[1:] - rri_vel[:-1]
        
        rri = pd.Series(rri)
        rri_vel = pd.Series(rri_vel)
        rri_acc = pd.Series(rri_acc)
        
        df["rri_vel_min"].iloc[i] = rri_vel.min()
        df["rri_vel_max"].iloc[i] = rri_vel.max()
        df["rri_vel_mean"].iloc[i] = rri_vel.mean()
        df["rri_vel_median"].iloc[i] = rri_vel.median()
        df["rri_vel_std"].iloc[i] = rri_vel.std()
        df["rri_vel_skew"].iloc[i] = rri_vel.skew()
        df["rri_vel_kurtosis"].iloc[i] = rri_vel.kurtosis()
        
        df["rri_acc_min"].iloc[i] = rri_acc.min()
        df["rri_acc_max"].iloc[i] = rri_acc.max()
        df["rri_acc_mean"].iloc[i] = rri_acc.mean()
        df["rri_acc_median"].iloc[i] = rri_acc.median()
        df["rri_acc_std"].iloc[i] = rri_acc.std()
        df["rri_acc_skew"].iloc[i] = rri_acc.skew()
        df["rri_acc_kurtosis"].iloc[i] = rri_acc.kurtosis()
        
    return df

def calculate_nni_derivatives(X):
    df = pd.DataFrame(index = X.index)
    df["nni_vel_min"] = 0
    df["nni_vel_max"] = 0
    df["nni_vel_mean"] = 0
    df["nni_vel_median"] = 0
    df["nni_vel_std"] = 0
    df["nni_vel_skew"] = 0
    df["nni_vel_kurtosis"] = 0
    
    df["nni_acc_min"] = 0
    df["nni_acc_max"] = 0
    df["nni_acc_mean"] = 0
    df["nni_acc_median"] = 0
    df["nni_acc_std"] = 0
    df["nni_acc_skew"] = 0
    df["nni_acc_kurtosis"] = 0
    
    for i in tqdm(range(len(X))):
        signal = X.loc[i].dropna().to_numpy(dtype='float32')

        try:
            signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
        except:
            signal = 1-signal
            signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
        r_peaks = info["ECG_R_Peaks"]
        rri = r_peaks[1:]-r_peaks[:-1]
        nni = np.array(get_nn_intervals(rri, low_rri=30, verbose=False))
        nni_vel = nni[1:] - nni[:-1]
        nni_acc = nni_vel[1:] - nni_vel[:-1]
        
        nni = pd.Series(nni)
        nni_vel = pd.Series(nni_vel)
        nni_acc = pd.Series(nni_acc)
        
        df["nni_vel_min"].iloc[i] = nni_vel.min()
        df["nni_vel_max"].iloc[i] = nni_vel.max()
        df["nni_vel_mean"].iloc[i] = nni_vel.mean()
        df["nni_vel_median"].iloc[i] = nni_vel.median()
        df["nni_vel_std"].iloc[i] = nni_vel.std()
        df["nni_vel_skew"].iloc[i] = nni_vel.skew()
        df["nni_vel_kurtosis"].iloc[i] = nni_vel.kurtosis()
        
        df["nni_acc_min"].iloc[i] = nni_acc.min()
        df["nni_acc_max"].iloc[i] = nni_acc.max()
        df["nni_acc_mean"].iloc[i] = nni_acc.mean()
        df["nni_acc_median"].iloc[i] = nni_acc.median()
        df["nni_acc_std"].iloc[i] = nni_acc.std()
        df["nni_acc_skew"].iloc[i] = nni_acc.skew()
        df["nni_acc_kurtosis"].iloc[i] = nni_acc.kurtosis()
        
    return df

def calculate_interval_derivatives(X):
    df1 = calculate_rri_derivatives(X)
    df2 = calculate_nni_derivatives(X)
    return pd.concat([df1,df2], axis=1)

In [4]:
df_train = calculate_interval_derivatives(X_train)

100%|██████████| 5117/5117 [28:05<00:00,  3.04it/s]
100%|██████████| 5117/5117 [27:38<00:00,  3.09it/s]


In [5]:
df_test = calculate_interval_derivatives(X_test)

100%|██████████| 3411/3411 [18:22<00:00,  3.10it/s]
100%|██████████| 3411/3411 [18:18<00:00,  3.11it/s]


In [6]:
df_train.to_csv("../data/interval_derivatives/flipped/X_train.csv", index_label="id")

In [7]:
df_test.to_csv("../data/interval_derivatives/flipped/X_test.csv", index_label="id")