In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from hrvanalysis import get_time_domain_features, get_frequency_domain_features
from hrvanalysis.preprocessing import get_nn_intervals
import biosppy.signals.ecg as ecg
from tqdm import tqdm
import neurokit2 as nk
from sklearn.utils import class_weight
import json

import warnings
warnings.simplefilter("ignore")

plt.rcParams['figure.figsize'] = [20, 5]

In [2]:
DATA_PATH = "../data/base"
SAMPLING_RATE = 300
X_test = pd.read_csv(f'{DATA_PATH}/X_test.csv')
X_train = pd.read_csv(f'{DATA_PATH}/X_train.csv')
y_train = pd.read_csv(f'{DATA_PATH}/y_train.csv', index_col='id')

In [3]:
def read_data(dataname):
    X_test = pd.read_csv(f'../data/{dataname}/X_test.csv')
    X_train = pd.read_csv(f'../data/{dataname}/X_train.csv')
    return X_train, X_test
def concat_data(x1, x2):
    concated_x = pd.concat([x1, x2.drop(columns=['id'])], axis=1)
    return concated_x

In [4]:
# https://aura-healthcare.github.io/hrv-analysis/hrvanalysis.html#hrvanalysis.extract_features.get_time_domain_features

In [5]:
def get_td_features(signal):
#     r_peaks = ecg.engzee_segmenter(signal, SAMPLING_RATE)['rpeaks']
    signals, info = nk.ecg_process(signal, sampling_rate=SAMPLING_RATE)
    r_peaks = info["ECG_R_Peaks"]
    rr_intervals = r_peaks[1:]-r_peaks[:-1]
    time_domain_features = get_time_domain_features(rr_intervals)
    return time_domain_features

def generate_td_features(data):
    feature_names = [
        "mean_nni", "sdnn" , "sdsd", "rmssd", "median_nni", 
        "nni_50", "pnni_50", "nni_20", "pnni_20", "range_nni", 
        "cvsd", "cvnni", "mean_hr", "max_hr", "min_hr", "std_hr"]
    feature_vecs = []
    error_counts = 0
    for i in tqdm(range(len(data))):
        signal = data.loc[i].dropna().to_numpy(dtype='float32')
        time_domain_features = None
        try:
            time_domain_features = get_td_features(signal)
        except:
            pass
        feature_vector = []
        if time_domain_features is None:
            error_counts += 1
        for fn in feature_names:
            if time_domain_features is None:
                feature_vector.append(np.nan)
            else:
                feature_vector.append(time_domain_features[fn])
        feature_vecs.append(feature_vector)
    return feature_names, np.array(feature_vecs), error_counts

In [1]:
td_feature_names, td_features, error_counts = generate_td_features(X_train)

NameError: name 'generate_td_features' is not defined

In [None]:
df = pd.DataFrame(data=td_features, columns=td_feature_names)
with open("X_train.csv", "w") as f:
    f.write("id,"+",".join(td_feature_names) + "\n")
    for i, d in enumerate(df.to_numpy()):
        f.write(f"{str(i)},"+",".join([str(x) for x in d])+"\n")

In [None]:
td_feature_names, td_features, error_counts = generate_td_features(X_test)

In [None]:
df = pd.DataFrame(data=td_features, columns=td_feature_names)
with open("X_test.csv", "w") as f:
    f.write("id,"+",".join(td_feature_names) + "\n")
    for i, d in enumerate(df.to_numpy()):
        f.write(f"{str(i)},"+",".join([str(x) for x in d])+"\n")