### Import libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import random
import statistics
import time
import glob
import os
import feather
from sklearn.model_selection import train_test_split
sns.set(style="whitegrid")

In [None]:
# feature extractors

import pycatch22
from kats.tsfeatures.tsfeatures import TsFeatures
from kats.consts import TimeSeriesData
tsFeatures = TsFeatures()

from tsfeatures import tsfeatures
from tsfresh import extract_features
import tsfel

--------------------------------------

### Functions to calculate the features for _catch22_, _Kats_, _tsfel_ and _tsfresh_

In [None]:
# takes the columns of a dataframe as the rows
def getFeatureRows(features, i, group, extractor):
    
    df = pd.DataFrame(columns=['id', 'group', 'names', 'values', 'method'])

    for column in features:
        df2 = {'id': i, 'group': group, 'names': column, 'values': features[column].values, 'method': extractor}
        df = pd.concat([df, pd.DataFrame.from_dict(df2)], ignore_index=True)
        
    return df


In [None]:
#########################################################
# functions to compute according to the feature extractor


def computeCatch22(d, i, time_var, values_var, group):
    
    ts = dataFrameToList(d[values_var].to_frame())
    rawFeat = pycatch22.catch22_all(ts)
    
    # create a dictionary with the feature name as key and the value as value
    dictionary = {}
    for name,value in zip(rawFeat['names'],rawFeat['values']):
        dictionary[name] = [value]
        
    # then create a dataframe, and from that a dataframe row per feature
    features = pd.DataFrame.from_dict(dictionary)
    return getFeatureRows(features, i, group, 'catch22')
    
    
def computeKats(d, i, time_var, values_var, group):
    
    rawFeatDict = TsFeatures().transform(d)
        
    # then create a dataframe, and from that a dataframe row per feature
    features = pd.DataFrame.from_dict([rawFeatDict])
    return getFeatureRows(features, i, group, 'kats')
    

def computeTsfeatures(d, i, time_var, values_var, group):
    
    ts = d[values_var].to_frame()
    
    ts.rename(columns={values_var: "y"}, inplace=True)
    ts.insert(0, 'ds', pd.date_range(start='2020/12/01', periods=len(ts)))
    ts.insert(0, 'unique_id', len(ts) * [i])
    
    features = tsfeatures(ts)
    return getFeatureRows(features, i, group, 'tsfeatures')
    
    
def computeTsfel(d, i, time_var, values_var, group):
    
    ts = d[values_var].to_frame()
    
    # if no argument is passed retrieves all available features
    cfg = tsfel.get_features_by_domain()
    features = tsfel.time_series_features_extractor(cfg, ts)
    return getFeatureRows(features, i, group, 'tsfel')
    
    
def computeTsfresh(d, i, time_var, values_var, group):
    features = extract_features(d, column_id='id', column_value = values_var, column_sort = time_var)
    return getFeatureRows(features, i, group, 'tsfresh')


# create a switch which chooses the correct function depending on the chosen extractor
switch = {'catch22' : computeCatch22, 'kats' : computeKats, 'tsfeatures': computeTsfeatures, 'tsfel' : computeTsfel, 'tsfresh' : computeTsfresh}


In [None]:
def calculate_features(df, id_var, time_var, values_var, group_var, feature_set):
    
    calculatedFeatures = pd.DataFrame()
    
    for i in df['id'].unique():
        
        print("Computing features for ", i)
        # d as all the data available for the current time series
        d = df.loc[df[id_var] == i]
        group = d[group_var].unique()[0]
        computeFeat = switch[feature_set](d, i, time_var, values_var, group)
        calculatedFeatures = pd.concat([calculatedFeatures, computeFeat], ignore_index=True)
        
    return calculatedFeatures#.sort_values(['names', 'id'], ascending=[False, True], inplace=False, ignore_index=True) #needs to be sorted
        

--------------------------------------

### Load the whole data set and calculate features

In [None]:
# load the time series'
emp604        = feather.read_dataframe('data/emp604.feather')

outs_catch22    = calculate_features(emp604, 'id', 'timepoint', 'value', 'Keywords', 'catch22')
outs_kats       = calculate_features(emp604, 'id', 'timepoint', 'value', 'Keywords', 'kats')
outs_tsfel      = calculate_features(emp604, 'id', 'timepoint', 'value', 'Keywords', 'tsfel')
outs_tsfresh    = calculate_features(emp604, 'id', 'timepoint', 'value', 'Keywords', 'tsfresh')
outs_tsfeatures = calculate_features(emp604, 'id', 'timepoint', 'value', 'Keywords', 'tsfeatures')
        

In [None]:
# saved in files so no need to recalculate the features

outs_catch22.to_feather('data/outs_catch22_exp.feather')
outs_kats.to_feather('data/outs_kats_exp.feather')
outs_tsfel.to_feather('data/outs_tsfel_exp.feather')
outs_tsfresh.to_feather('data/outs_tsfresh_exp.feather')
outs_tsfeatures.to_feather('data/outs_tsfresh_exp.feather')

In [None]:
# merge all the feature dataframes from the python extractors and save in file
'''
pythonFeatures = pd.concat([outs_catch22, outs_kats, outs_tsfel, outs_tsfresh], ignore_index=True)
pythonFeatures.to_feather('data/pythonFeatures.feather')
'''

--------------------------------------

In [None]:
f.to_feather('data/EmpFeatMat.feather')

In [None]:
# merge all features (hctsa still missing)
Emp1000FeatMat = pd.concat([pythonFeatures, rFeatures], ignore_index=True)
Emp1000FeatMat.to_csv('data/Emp1000FeatMatOhneHCTSA.csv')
Emp1000FeatMat.to_feather('data/Emp1000FeatMatOhneHCTSA.feather')

--------------------------------------