In [1]:
import numpy as np
import pandas as pd
from biosppy.signals import ecg

* wavelate (+++++)
    * cD_list,cA_list (sym6)
* U_list,U_value_list (++++)
* fft (++++)
* wave indices list (++++)
    * T_list,S_list,R_list,Q_list,P_list,ST_list,QRS_list,PR_list,QRS_T_list,QRS_P_list
* correlation (+++??)
* ptp (largest - smallest) (+++)
* energy list (++)

In [2]:
def mean_squared_distance(rpeaks):
    diff = np.diff(rpeaks)
    dist = np.mean(diff*diff)
    return dist

def get_feature_list(feature, axis=-1):
    feats = np.array([])
    
    if axis == -1:
        feats = np.append(feats, np.mean(feature))
        feats = np.append(feats, np.median(feature))
        feats = np.append(feats, np.min(feature))
        feats = np.append(feats, np.max(feature))
        feats = np.append(feats, np.std(feature))
    elif axis == 0:
        feats = np.append(feats, np.mean(feature, axis = 0))
        feats = np.append(feats, np.median(feature, axis = 0))
        feats = np.append(feats, np.min(feature, axis = 0))
        feats = np.append(feats, np.max(feature, axis = 0))
        feats = np.append(feats, np.std(feature, axis = 0))
        
    return feats

def extract_feature(X_signal, sampling_rate):
    ts, filtered, rpeaks, templates_ts, templates, heart_rate_ts, heart_rate = ecg.ecg(X_signal, sampling_rate, show = False)
    rpeaks = ecg.correct_rpeaks(signal = X_signal, rpeaks = rpeaks, sampling_rate = sampling_rate, tol = 0.01)  
    peak_values = X_signal[rpeaks]

    feats = np.array([])
    
    feats = np.concatenate([feats, get_feature_list(peak_values)])
    feats = np.concatenate([feats, get_feature_list(rpeaks)])
    feats = np.append(feats, np.sqrt(mean_squared_distance(rpeaks)))
    feats = np.concatenate([feats, get_feature_list(np.diff(rpeaks))])
    
    feats = np.concatenate([feats, get_feature_list(templates, axis = 0)])
    
    heart_rate = np.array([np.nan, np.nan])  if len(heart_rate) == 0 else heart_rate
    feats = np.concatenate([feats, get_feature_list(heart_rate)])
    heart_rate_ts = np.array([np.nan, np.nan])  if len(heart_rate_ts) == 0 else heart_rate_ts
    feats = np.concatenate([feats, get_feature_list(heart_rate_ts)])
    
    heart_rate = np.array([np.nan, np.nan])  if len(heart_rate) == 1 else heart_rate
    feats = np.concatenate([feats, get_feature_list(np.diff(heart_rate))])
    heart_rate_ts = np.array([np.nan, np.nan])  if len(heart_rate_ts) == 1 else heart_rate_ts
    feats = np.concatenate([feats, get_feature_list(np.diff(heart_rate_ts))])
    
    
    return feats

def extract_feature_batch(X_signal_batch):
    X_feat = []
    for i in range(X_signal_batch.shape[0]):
        signal = X_signal_batch.iloc[i]
        signal_cut = np.array(signal.dropna())
        X_feat.append(extract_feature(signal_cut, 300))
    return np.array(X_feat)

In [3]:
X_train_data = pd.read_csv("X_train.csv")
indices_train = np.array(X_train_data)[:,0]
X_train_data.drop("id", axis=1, inplace = True)

X_test_data =  pd.read_csv("X_test.csv")
indices_test = np.array(X_test_data)[:,0]
X_test_data.drop("id", axis=1, inplace = True)


In [4]:
X_train = extract_feature_batch(X_train_data)
X_test = extract_feature_batch(X_test_data)

In [5]:
X_train_feat = np.insert(X_train,0,[indices_train],axis=1)
X_test_feat = np.insert(X_test,0,[indices_test],axis= 1)

In [6]:
df_X_train = pd.DataFrame(X_train_feat)
df_X_test = pd.DataFrame(X_test_feat)
df_X_train.to_csv("X_train_feature_rm.csv", index=False)
df_X_test.to_csv("X_test_feature_rm.csv", index=False)