In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from biosppy.signals.tools import filter_signal
from pyentrp import entropy as ent
from tqdm import tqdm
from scipy import stats
from tsfresh.feature_extraction.feature_calculators import fourier_entropy

import sys
sys.path.append('..')
from util import load_base_data

import warnings
warnings.simplefilter("ignore")

plt.rcParams['figure.figsize'] = [20, 5]



In [2]:
DATA_PATH = "../data/base"
SAMPLING_RATE = 300
X_train, y_train, X_test = load_base_data(standard=True)

In [3]:
def calculate_raw_global_features(data, slices):
    global_features = pd.DataFrame()
    global_features["raw_global_min"] = data.min(axis=1)
    global_features["raw_global_max"] = data.max(axis=1)
    global_features["raw_global_mean"] = data.mean(axis=1)
    global_features["raw_global_median"] = data.median(axis=1)
    global_features["raw_global_std"] = data.std(axis=1)
    
    global_features["raw_global_skew"] = 0
    for l, r in tqdm(zip(slices[:-1], slices[1:])):
        global_features["raw_global_skew"].iloc[l:r] = data.iloc[l:r].skew(axis=1)
    
    global_features["raw_global_kurtosis"] = 0
    for l, r in tqdm(zip(slices[:-1], slices[1:])):
        global_features["raw_global_kurtosis"].iloc[l:r] = data.iloc[l:r].skew(axis=1)
    
    return global_features

def calculate_filtered_global_features(data, slices):
    data_f = pd.DataFrame(index = data.index)
    for l, r in zip(slices[:-1], slices[1:]):
        data.iloc[l:r] = filter_signal(signal=data.values[l:r], ftype='FIR', band='bandpass',
                order=40, frequency=[3,45],
                sampling_rate=SAMPLING_RATE)[0]
    
    global_features = pd.DataFrame()
    global_features["filtered_global_min"] = data.min(axis=1)
    global_features["filtered_global_max"] = data.max(axis=1)
    global_features["filtered_global_mean"] = data.mean(axis=1)
    global_features["filtered_global_median"] = data.median(axis=1)
    global_features["filtered_global_std"] = data.std(axis=1)
    
    global_features["filtered_global_skew"] = 0
    for l, r in tqdm(zip(slices[:-1], slices[1:])):
        global_features["filtered_global_skew"].iloc[l:r] = data.iloc[l:r].skew(axis=1)
    
    global_features["filtered_global_kurtosis"] = 0
    for l, r in tqdm(zip(slices[:-1], slices[1:])):
        global_features["filtered_global_kurtosis"].iloc[l:r] = data.iloc[l:r].skew(axis=1)
    
    return global_features

def calculate_global_features(data, slices):
    raw_df = calculate_raw_global_features(data, slices)
    filtered_df = calculate_filtered_global_features(data, slices)
    return pd.concat([raw_df, filtered_df], axis=1)

In [4]:
X_train.shape

(5117, 17807)

In [5]:
X_train_global_features = calculate_global_features(X_train, [0, 1000, 2000, 3000, 4000, 5117])
X_train_global_features.to_csv("../data/global/standardized/X_train.csv", index_label="id")

5it [00:02,  2.08it/s]
5it [00:02,  2.08it/s]
5it [00:02,  2.10it/s]
5it [00:02,  2.06it/s]


In [6]:
X_test.shape

(3411, 17807)

In [7]:
X_test_global_features = calculate_global_features(X_test, [0, 1000, 2000, 3411])
X_test_global_features.to_csv("../data/global/standardized/X_test.csv", index_label="id")

3it [00:01,  2.49it/s]
3it [00:01,  2.46it/s]
3it [00:01,  2.47it/s]
3it [00:01,  2.43it/s]
