In [14]:
import csv
import os

import tqdm

import biosppy.signals.ecg as ecg
import biosppy
import neurokit2 as nk

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy as sp

import hrvanalysis
import heartpy as hp

import sys

sys.path.append("..")
from util import load_base_data


SAMPLING_RATE = 300.0
DATAPATH = "/Users/ericschreiber/dev/ETH/AML/Project_1/aml-2023/task2/data"

In [9]:
X_train, y_train, X_test = load_base_data()

In [47]:
def compute_hrv_features(peaks):
    try:
        tdf = hrvanalysis.get_time_domain_features(peaks)
        gf = hrvanalysis.get_geometrical_features(peaks)
        fdf = hrvanalysis.get_frequency_domain_features(peaks)
        pcp = hrvanalysis.get_poincare_plot_features(peaks)
        samp = hrvanalysis.get_sampen(peaks)
    except:
        return [0] * 22

    return [
        tdf["mean_nni"],
        tdf["sdnn"],
        tdf["sdsd"],
        tdf["nni_50"],
        tdf["pnni_50"],
        tdf["nni_20"],
        tdf["pnni_20"],
        tdf["rmssd"],
        tdf["median_nni"],
        tdf["range_nni"],
        tdf["cvsd"],
        tdf["cvnni"],
        tdf["mean_hr"],
        tdf["max_hr"],
        tdf["min_hr"],
        fdf["lf"],
        fdf["hf"],
        fdf["lf_hf_ratio"],
        pcp["sd1"],
        pcp["sd2"],
        pcp["ratio_sd2_sd1"],
        samp["sampen"],
    ]

In [11]:
def compute_hp_features(ecg):
    try:
        _, measures = hp.process(ecg, SAMPLING_RATE)
    except:
        try:
            _, measures = hp.process(hp.flip_signal(ecg), SAMPLING_RATE)
        except:
            return [0] * 12

    return [
        measures["bpm"],
        measures["ibi"],
        measures["sdnn"],
        measures["sdsd"],
        measures["rmssd"],
        measures["pnn20"],
        measures["pnn50"],
        measures["hr_mad"],
        measures["sd1"],
        measures["sd2"],
        measures["s"],
        np.log10(measures["sd1/sd2"] ** 2),
    ]

In [12]:
def make_features(ecg):
    cleaned = nk.ecg_clean(ecg, sampling_rate=SAMPLING_RATE)
    _, info = nk.ecg_peaks(ecg_cleaned=cleaned, sampling_rate=SAMPLING_RATE)
    rpeaks = info["ECG_R_Peaks"]
    features = []
    features += compute_hp_features(cleaned)
    features += compute_hrv_features(rpeaks)
    features = np.array(features).flatten()
    return features


def make_features_from_df(df):
    features = []
    for i in tqdm.tqdm(range(len(df))):
        ecg = df.iloc[i].values
        features.append(make_features(ecg))
    numpy = np.array(features)
    df = pd.DataFrame(numpy).reset_index(drop=False)
    df.drop(columns=["index"], inplace=True)
    df.index.name = "id"

    return df

In [50]:
features_X_train = make_features_from_df(X_train)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing

In [51]:
features_X_train

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,32,33
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,148.323219,404.521964,138.731051,33.087106,267.346356,1.000000,0.992063,120.000000,189.042240,46.871917,...,27.321386,588.235294,3.690945,200.551149,0.223638,896.766297,13.869757,6761.671334,487.511877,0.005063
1,245.702730,244.197531,145.246376,126.019165,228.147397,0.944444,0.861111,76.666667,161.009174,135.155882,...,32.087007,310.880829,7.082989,613.311336,0.834230,735.182430,9.338994,3508.869917,375.722477,0.059423
2,141.176471,425.000000,163.952907,40.945613,237.993153,1.000000,1.000000,130.000000,163.446673,24.062675,...,33.602249,310.880829,7.472911,905.560707,1.186693,763.095814,7.619007,3366.650149,441.875213,-0.000000
3,304.061339,197.328605,88.688207,101.899254,140.948600,0.828571,0.507143,23.333333,99.665705,73.760091,...,34.480877,560.747664,6.960557,271.824481,1.355169,200.583470,15.518064,3529.170511,227.423374,0.021693
4,245.914198,243.987539,116.049045,97.557653,186.549013,0.971154,0.894231,70.000000,131.909634,93.435944,...,33.618730,377.358491,7.122507,333.185735,4.071303,81.837607,14.675273,3542.203067,241.372210,0.012121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5112,330.324910,181.639344,126.060706,104.365539,182.836589,0.907407,0.759259,96.666667,129.029829,120.681238,...,28.850907,108.892922,11.822660,774.784060,7.102584,109.084817,67.283219,1944.244673,28.896428,0.057158
5113,266.647490,225.016181,159.757687,130.676988,244.340757,0.948454,0.876289,116.666667,172.693781,143.700555,...,42.205843,540.540541,7.360157,625.990190,0.775245,807.473567,3.036870,3432.091437,1130.140969,-0.000000
5114,207.201178,289.573643,157.818458,122.964934,264.067442,0.964286,0.940476,116.666667,186.722106,118.835376,...,40.082699,468.750000,7.133516,703.109156,0.866982,810.984441,20.209886,3575.335004,176.910201,0.038466
5115,146.670413,409.080460,152.835014,53.769290,299.544883,1.000000,0.980769,31.666667,211.809980,57.463703,...,42.687403,582.524272,7.275373,630.753697,0.940980,670.315891,6.037517,3473.115641,575.255618,-0.000000


In [52]:
# How many infs
print(f"Number of infs: {np.sum(np.isinf(features_X_train), axis=0).sum()}")
# Which columns have infs
cols_with_infs = np.where(np.isinf(features_X_train))[1]
cols_with_infs_unique = np.unique(cols_with_infs)
print(f"Columns with infs: {len(cols_with_infs_unique)}")
print(
    f"biggest pos value except inf: {np.max(features_X_train[features_X_train != np.inf].max())}"
)
print(f"biggest neg value: {np.min(features_X_train).min()}")
biggest_pos = np.max(features_X_train[features_X_train != np.inf].max())
biggest_neg = np.min(features_X_train).min()
# Replace infs with biggest pos value
features_X_train[features_X_train == np.inf] = biggest_pos
# Replace -infs with biggest neg value
features_X_train[features_X_train == -np.inf] = biggest_neg

Number of infs: 246
Columns with infs: 1
biggest pos value except inf: 785327.1102283916
biggest neg value: -2.247994842224999


  return reduction(axis=axis, out=out, **passkwargs)


In [60]:
X_train_save_path = os.path.join(DATAPATH, "feature_extraction/hrv_X_train.csv")
features_X_train.to_csv(X_train_save_path, index=True)

# X Test

In [57]:
# test_features = make_features_from_df(X_test)

# How many infs
print(f"Number of infs: {np.sum(np.isinf(test_features), axis=0).sum()}")
# Which columns have infs
cols_with_infs = np.where(np.isinf(test_features))[1]
cols_with_infs_unique = np.unique(cols_with_infs)
print(f"Columns with infs: {len(cols_with_infs_unique)}")
print(
    f"biggest pos value except inf: {np.max(test_features[test_features != np.inf].max())}"
)
print(
    f"biggest neg value excpet inf : {np.min(test_features[test_features != -np.inf]).min()}"
)
biggest_pos = np.max(test_features[test_features != np.inf].max())
biggest_neg = np.min(test_features[test_features != -np.inf]).min()
# Replace infs with biggest pos value
test_features[test_features == np.inf] = biggest_pos
# Replace -infs with biggest neg value
test_features[test_features == -np.inf] = biggest_neg

Number of infs: 1
Columns with infs: 1
biggest pos value except inf: 1419764.0845090873
biggest neg value excpet inf : -2.235093400946307


In [59]:
X_test_save_path = os.path.join(DATAPATH, "feature_extraction/hrv_X_test.csv")
test_features.to_csv(X_test_save_path, index=True)