In [79]:
import pandas as pd
import numpy as np
from pyeeg import (
    bin_power,
    hfd,
    hjorth,
    spectral_entropy,
    svd_entropy,
    fisher_info,
    ap_entropy,
    dfa,
    hurst,
    permutation_entropy,
    samp_entropy,
    pfd,
    LLE
)
from tsfresh.feature_extraction import feature_calculators

In [80]:
df = pd.read_csv('timeseries_NEW.csv')

In [81]:
df.isna().sum().unique()

array([0], dtype=int64)

In [82]:
df.drop(df.columns[0], axis=1, inplace=True)

In [83]:
df.head()

Unnamed: 0,s0209110,s0310211,s0310411,s0310703,s0310706,s0310710,s0311303,s0320307,s0408108,s0410108,...,s8010707,s8108205,s8420304,s8508108,s8608101,s8710205,s9309101,s9509011,s9709110,s9809003
0,8.32,0.0,26.75,0.0,72.66,74.72,0.0,0.13,12.36,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5.57,0.0,30.88,0.0,33.76,120.7,0.0,0.06,13.88,0.0,...,0.0,0.2,0.0,0.0,4.75,0.0,5.35,0.0,0.0,0.0
2,13.6,0.0,17.23,0.0,38.43,199.51,0.0,29.35,10.79,0.1,...,6.34,0.0,0.03,0.0,0.0,0.0,0.47,0.0,0.0,0.0
3,5.06,0.0,1.75,0.0,21.8,60.94,0.0,72.06,27.75,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,11.72,0.0,26.64,35.22,0.0,39.49,50.18,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
df.dtypes.unique()

array([dtype('float64')], dtype=object)

In [85]:
df_features = pd.DataFrame(index=df.columns)

In [86]:
df_features['mean'] = df.mean()
df_features['max'] = df.max()
df_features['var'] = df.var()
quantile = df.quantile([0.25, 0.5, 0.75]).T
df_features[quantile.columns] = quantile

In [87]:
daily_mean = df.groupby(df.index//(60*24)).mean()
daily_var = df.groupby(df.index//(60*24)).var()
daily_max = df.groupby(df.index//(60*24)).max()

In [88]:
cols_day_max = [f'max_day_{day+1}' for day in daily_max.index]
cols_day_var = [f'var_day_{day+1}' for day in daily_var.index]
cols_day_mean = [f'mean_day_{day+1}' for day in daily_mean.index]
df_features[cols_day_max] = daily_max.T
df_features[cols_day_mean] = daily_mean.T
df_features[cols_day_var] = daily_var.T

In [89]:
df_features['mean_day_var'] = daily_mean.var()

In [90]:
hjorth = np.array([hjorth(df[column]) for column in df])
df_features['hjorth_mobility'] = hjorth[:,0]
df_features['hjorth_complexity'] = hjorth[:,1]

In [91]:
df_features['dfa'] = [dfa(df[column]) for column in df]



In [92]:
df_features['pfd'] = [pfd(df[column]) for column in df]

In [93]:
df_features['permutation_entropy_n_3'] = [permutation_entropy(df[column], 3, 1) for column in df]
df_features['permutation_entropy_n_4'] = [permutation_entropy(df[column], 4, 1) for column in df]

In [94]:
def heaviside_filter(X, f, filter_value):
    return np.heaviside(X - f*filter_value,1).sum()

In [95]:
df_features['above_0.7*max'] = heaviside_filter(df, 0.7, df.max())
df_features['above_0.8*max'] = heaviside_filter(df, 0.8, df.max())
df_features['above_0.9*max'] = heaviside_filter(df, 0.9, df.max())
df_features['above_0.7*mean'] = heaviside_filter(df, 0.7, df.mean())
df_features['above_0.8*mean'] = heaviside_filter(df, 0.8, df.mean())
df_features['above_0.9*mean'] = heaviside_filter(df, 0.9, df.mean())
df_features['above_mean'] = heaviside_filter(df, 1, df.mean())

In [96]:
df_features['hfd'] = [hfd(df[column], 60) for column in df]



In [97]:
df_features['abs_energy'] = [feature_calculators.abs_energy(df[column]) for column in df]

In [98]:
param_adf = [{"attr": "pvalue", "autolag": "BIC"}]
df_features['augmented_dickey_fuller'] = [
        feature_calculators.augmented_dickey_fuller(df[column], param=param_adf)[0][1] for column in df
]

In [99]:
df_features['cid_ce'] = [feature_calculators.cid_ce(df[column], False) for column in df]

In [100]:
df_features['autocorrelation'] = [feature_calculators.autocorrelation(df[column], 100) for column in df]

In [101]:
df_features['fft_centroid'] = [next(feature_calculators.fft_aggregated(df[column], [{'aggtype': 'centroid'}]))[1] for column in df]

In [102]:
df_features['fft_variance'] = [next(feature_calculators.fft_aggregated(df[column], [{'aggtype': 'variance'}]))[1] for column in df]

In [103]:
df_features['fft_skew'] = [next(feature_calculators.fft_aggregated(df[column], [{'aggtype': 'skew'}]))[1] for column in df]

In [104]:
df_features['fft_kurtosis'] = [next(feature_calculators.fft_aggregated(df[column], [{'aggtype': 'kurtosis'}]))[1] for column in df]

In [105]:
df_features['kurtosis'] = [feature_calculators.kurtosis(df[column]) for column in df]

In [106]:
df_features['skewness'] = [feature_calculators.skewness(df[column]) for column in df]

In [107]:
df_features['mean_change'] = [feature_calculators.mean_change(df[column]) for column in df]

In [108]:
df_features['mean_2_deriv_central'] = [feature_calculators.mean_second_derivative_central(df[column]) for column in df]

In [109]:
def interdaily_stability(X: pd.DataFrame, h_freq: int) -> pd.Series:
    """
    Compute the interdaily stability of the time series

    Parameters
    ----------

    X: pandas.DataFrame
        dataframe with the series for each object in the columns
    freq: float
        number of data per hour
    """
    X_mean = X.mean()
    hourly_means = df.groupby(X.index//h_freq).mean()
    p = len(hourly_means)
    numerator = (1/p)*np.sum(np.square(hourly_means - X_mean))
    denominator = X.var()

    return numerator/denominator

In [110]:
df_features['interdaily_stability'] = interdaily_stability(df, 60)

In [111]:
def intradaily_variability(X: pd.DataFrame) -> pd.Series:
    """
    Compute the interdaily stability of the time series

    Parameters
    ----------

    X: pandas.DataFrame
        dataframe with the series for each object in the columns
    """
    numerator = ((df.iloc[1:].reset_index(drop=True) - df)**2).sum()
    denominator = (len(df) - 1)*X.var()

    return numerator/denominator

In [112]:
df_features['intradaily_variability'] = intradaily_variability(df)

In [114]:
df_features.to_csv('extracted_features.csv')