In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from hrvanalysis import get_time_domain_features, get_frequency_domain_features
from hrvanalysis.preprocessing import get_nn_intervals
import biosppy.signals.ecg as ecg
from tqdm import tqdm
import neurokit2 as nk
from sklearn.utils import class_weight
import json

import warnings
warnings.simplefilter("ignore")

plt.rcParams['figure.figsize'] = [20, 5]

In [None]:
DATA_PATH = "../data/base"
SAMPLING_RATE = 300
X_test = pd.read_csv(f'{DATA_PATH}/X_test.csv')
X_train = pd.read_csv(f'{DATA_PATH}/X_train.csv')
y_train = pd.read_csv(f'{DATA_PATH}/y_train.csv', index_col='id')

In [None]:
def read_data(dataname):
    X_test = pd.read_csv(f'../data/{dataname}/X_test.csv')
    X_train = pd.read_csv(f'../data/{dataname}/X_train.csv')
    return X_train, X_test
def concat_data(x1, x2):
    concated_x = pd.concat([x1, x2.drop(columns=['id'])], axis=1)
    return concated_x

In [None]:
# https://aura-healthcare.github.io/hrv-analysis/hrvanalysis.html#hrvanalysis.extract_features.get_frequency_domain_features

feature_names = ["total_power","vlf","lf","hf","lf_hf_ratio","lfnu","hfnu"]

def get_features(signal, extraction_method):
#     r_peaks = ecg.engzee_segmenter(signal, SAMPLING_RATE)['rpeaks']
    signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
    r_peaks = info["ECG_R_Peaks"]
    rr_intervals = r_peaks[1:]-r_peaks[:-1]
    features = extraction_method(rr_intervals)
    return features

def generate_features(data, extraction_method, feature_names):
    feature_vecs = []
    error_counts = 0
    for i in tqdm(range(len(data))):
        signal = data.loc[i].dropna().to_numpy(dtype='float32')
        features = None
        try:
            features = get_features(signal, extraction_method)
        except:
            pass
        feature_vector = []
        if features is None:
            error_counts += 1
        for fn in feature_names:
            if features is None:
                feature_vector.append(np.nan)
            else:
                feature_vector.append(features[fn])
        feature_vecs.append(feature_vector)
    return feature_names, np.array(feature_vecs), error_counts

In [None]:
fd_X_train = generate_features(X_train, get_frequency_domain_features, feature_names)

In [None]:
fd_X_train_df = pd.DataFrame(fd_X_train[1], columns = fd_X_train[0])
fd_X_train_df.to_csv("X_train.csv", index_label="id")

In [None]:
fd_X_test = generate_features(X_test, get_frequency_domain_features, feature_names)

In [None]:
fd_X_test_df = pd.DataFrame(fd_X_test[1], columns = fd_X_test[0])
fd_X_test_df.to_csv("X_test.csv", index_label="id")