In [96]:
import numpy as np
import pandas as pd

--------------------------------------

### CLASSES

In [97]:
# takes the columns of a dataframe as the rows
def getFeatureRows(features, i, group, extractor):
    
    df = pd.DataFrame(columns=['id', 'group', 'names', 'values', 'method'])

    for column in features:
        df2 = {'id': i, 'group': group, 'names': column, 'values': features[column].values, 'method': extractor}
        df = pd.concat([df, pd.DataFrame.from_dict(df2)], ignore_index=True)
        
    return df.sort_values(['names', 'id'], ascending=[False, True], inplace=False, ignore_index=True)

In [98]:
def computeCatch22(d):
    ts = d.tolist()
    
    rawFeat = pycatch22.catch22_all(ts)
    
    # create a dictionary with the feature name as key and the value as value
    dictionary = {}
    for name,value in zip(rawFeat['names'],rawFeat['values']):
        dictionary[name] = [value]
         
    return pd.DataFrame.from_dict(dictionary)
    
    
def computeKats(d):
    ts = pd.DataFrame(d, columns = ['value'])
    rawFeatDict = TsFeatures().transform(ts)
        
    return pd.DataFrame.from_dict([rawFeatDict])
    
    
def computeTsfeatures(d):
    ts = pd.DataFrame(d, columns = ['y'])
    ts.insert(0, 'ds', pd.date_range(start='2020/12/01', periods=len(ts)))
    ts.insert(0, 'unique_id', len(ts) * [1])
    
    return tsfeatures(ts)

    
def computeTsfel(d):
    ts = pd.DataFrame(d, columns = ['value'])
                      
    return tsfel.time_series_features_extractor(cfg, ts)
    
    
def computeTsfresh(d):           
    ts = pd.DataFrame(d, columns = ['value'])
    ts[1] = len(ts) * [1]
    ts[2] = np.arange (1, len(ts)+1, 1.0)
    ts.columns = ['value', 'id', 'time']
                
    return  extract_features(ts, column_id='id', column_value = 'value', column_sort = 'time')


# create a switch which chooses the correct function depending on the chosen extractor
switch = {'catch22'   : computeCatch22, 
          'kats'      : computeKats, 
          'tsfeatures': computeTsfeatures, 
          'tsfel'     : computeTsfel, 
          'tsfresh'   : computeTsfresh}

In [99]:
def getFeats(df, method):
    
    # stores the computed features
    frame = pd.DataFrame()
    
    #print('shape = ', df.shape)
    
    nbrTS    = len(df)
    print('--------------------------------------------------')
    print(len(df[0,0]))# is the length of the interval
    print(df[0,0])

    # only for univariate time series
    for ts in range(nbrTS):
        
        tsArray = df[ts,0]
        features = switch[method](tsArray)
        
        frame = pd.concat([frame, features], axis=0, ignore_index=True)
        
        
    # replace infinite values by NaN
    #transformedFrame.replace([np.inf, -np.inf], np.nan, inplace=True)
    frame.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # remove features with more than 10% NaN
    transformedFrame = frame.drop(frame.columns[frame.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)
    
    # replace nan values by 0
    transformedFrame.replace([np.nan], 0, inplace=True)
     
    return transformedFrame

--------------------------------------

Calculates all features from a chosen feature set for all time series' in the dataset (df).

In [93]:
import pycatch22
from kats.tsfeatures.tsfeatures import TsFeatures
from kats.consts import TimeSeriesData
tsFeatures = TsFeatures()
from tsfeatures import tsfeatures
import tsfel
cfg = tsfel.get_features_by_domain()
from tsfresh import extract_features


def compCatch22(d, i, time_var, values_var, group):
    
    ts = dataFrameToList(d[values_var].to_frame())
    rawFeat = pycatch22.catch22_all(ts)
    
    # create a dictionary with the feature name as key and the value as value
    dictionary = {}
    for name,value in zip(rawFeat['names'],rawFeat['values']):
        dictionary[name] = [value]
        
    # then create a dataframe, and from that a dataframe row per feature
    features = pd.DataFrame.from_dict(dictionary)
    return getFeatureRows(features, i, group, 'catch22')
    
    
def compKats(d, i, time_var, values_var, group):
    
    rawFeatDict = TsFeatures().transform(d)
        
    # then create a dataframe, and from that a dataframe row per feature
    features = pd.DataFrame.from_dict([rawFeatDict])
    return getFeatureRows(features, i, group, 'kats')
    

def compTsfeatures(d, i, time_var, values_var, group):
    
    ts = d[values_var].to_frame()
    
    ts.rename(columns={values_var: "y"}, inplace=True)
    ts.insert(0, 'ds', pd.date_range(start='2020/12/01', periods=len(ts)))
    ts.insert(0, 'unique_id', len(ts) * [i])
    
    features = tsfeatures(ts)
    return getFeatureRows(features, i, group, 'tsfeatures')
    
    
def compTsfel(d, i, time_var, values_var, group):
    
    ts = d[values_var].to_frame()
    
    features = tsfel.time_series_features_extractor(cfg, ts)
    return getFeatureRows(features, i, group, 'tsfel')
    
    
def compTsfresh(d, i, time_var, values_var, group):
    features = extract_features(d, column_id='id', column_value = values_var, column_sort = time_var)
    return getFeatureRows(features, i, group, 'tsfresh')


# create a switch which chooses the correct function depending on the chosen extractor
switch2 = {'catch22' : compCatch22, 'kats' : compKats, 'tsfeatures': compTsfeatures, 'tsfel' : compTsfel, 'tsfresh' : compTsfresh}



# calculates all features for all time series' of the df
def calculate_features(df, id_var, time_var, values_var, group_var, feature_set):
    
    calculatedFeatures = pd.DataFrame()
    
    for i in df['id'].unique():
        
        print("Computing features for ", i)
        # d as all the data available for the current time series
        d = df.loc[df[id_var] == i]
        group = d[group_var].unique()[0]
        computeFeat = switch2[feature_set](d, i, time_var, values_var, group)
        calculatedFeatures = pd.concat([calculatedFeatures, computeFeat], ignore_index=True)
        
    return calculatedFeatures #.sort_values(['names', 'id'], ascending=[False, True], inplace=False, ignore_index=True) #needs to be sorted
        

--------------------------------------

### Classes

#### CATCH22

In [79]:
from sklearn.base import BaseEstimator
import pycatch22

class Catch22(BaseEstimator):
    
    #-----------------------------------
    # used for comparison
    
    # calculates the features for 1 time series X
    def calcFeats(X):
        switch['catch22'](X)
    
    #-----------------------------------
    # use for classification
    
    def __init__(self):
        self.feats  = None
        self.labels = None
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return Catch22
    
    # compute the features for all time series using the compute function
    def fit_transform(self, X, y):
        return getFeats(X, 'catch22')
    
    # compute the features for all time series using the compute function
    def _transform(self, X, y):
        return getFeats(X, 'catch22')
    
    

#### KATS

In [80]:
from sklearn.base import BaseEstimator
from kats.tsfeatures.tsfeatures import TsFeatures
from kats.consts import TimeSeriesData
tsFeatures = TsFeatures()

class Kats(BaseEstimator):
    
    #-----------------------------------
    # used for comparison
    
    # calculates the features for 1 time series X
    def calcFeats(X):
        switch['kats'](X)
    
    #-----------------------------------
    # use for classification
    
    def __init__(self):
        self.feats  = None
        self.labels = None
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return Kats
    
    # compute the features for all time series using the compute function
    def fit_transform(self, X, y):
        return getFeats(X, 'kats')
    
    # compute the features for all time series using the compute function
    def _transform(self, X, y):
        return getFeats(X, 'kats')
    
    

#### TSFEATURES

In [81]:
from sklearn.base import BaseEstimator
from tsfeatures import tsfeatures

class TSFeatures(BaseEstimator):
    
    #-----------------------------------
    # used for comparison
    
    # calculates the features for 1 time series X
    def calcFeats(X):
        switch['tsfeatures'](X)
    
    #-----------------------------------
    # use for classification
    
    def __init__(self):
        self.feats  = None
        self.labels = None
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return TSFeatures
    
    # compute the features for all time series using the compute function
    def fit_transform(self, X, y):
        return getFeats(X, 'tsfeatures')
    
    # compute the features for all time series using the compute function
    def _transform(self, X, y):
        return getFeats(X, 'tsfeatures')
    
    

#### TSFEL

In [82]:
from sklearn.base import BaseEstimator
import tsfel
# Retrieves a pre-defined feature configuration file to extract all available features
cfg = tsfel.get_features_by_domain()

class TSFel(BaseEstimator):
    
    #-----------------------------------
    # used for comparison
    
    # calculates the features for 1 time series X
    def calcFeats(X):
        switch['tsfel'](X)
    
    #-----------------------------------
    # use for classification
    
    def __init__(self):
        self.feats  = None
        self.labels = None
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return TSFel
    
    # compute the features for all time series using the compute function
    def fit_transform(self, X, y):
        return getFeats(X, 'tsfel')
    
    # compute the features for all time series using the compute function
    def _transform(self, X, y):
        return getFeats(X, 'tsfel')
    
    

#### TSFRESH

In [83]:
from sklearn.base import BaseEstimator
from tsfresh import extract_features

class TSFresh(BaseEstimator):
    
    #-----------------------------------
    # used for comparison
    
    # calculates the features for 1 time series X
    def calcFeats(X):
        switch['tsfresh'](X)
    
    #-----------------------------------
    # use for classification
    
    def __init__(self):
        self.feats  = None
        self.labels = None
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return TSFresh
    
    # compute the features for all time series using the compute function
    def fit_transform(self, X, y):
        return getFeats(X, 'tsfresh')
    
    # compute the features for all time series using the compute function
    def _transform(self, X, y):
        return getFeats(X, 'tsfresh')
    
    

--------------------------------------