In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import glob
import os
import feather
from sklearn.pipeline import make_pipeline
import sklearn
sns.set(style="whitegrid")

In [None]:
# feature extractors

import pycatch22
from kats.tsfeatures.tsfeatures import TsFeatures
from kats.consts import TimeSeriesData
tsFeatures = TsFeatures()
from tsfresh import extract_features
import tsfel

--------------------------------------

In [None]:
from sktime.datasets import load_unit_test

# load training data
X_train, y_train = load_unit_test(split="train", return_X_y=True)

# load test data
X_test, y_test = load_unit_test(split="test", return_X_y=True)

X_train

### CREATE CLASSES

#### CATCH22

In [None]:
class MyCatch22:
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return MyCatch22
    
    # compute the features for all time series using the compute function
    def transform(df):
        
        # stores the computed features
        transformedFrame = pd.DataFrame()
    
        colNames = df.columns
        nbrCols  = len(colNames)
        nbrTS    = len(df)

        for col in range(nbrCols):
            dim = colNames[col]
            currentDim = pd.DataFrame()
        
            for row in range(nbrTS):
            
                ts = df.iloc[row].values[0].tolist()
                rawFeat = pycatch22.catch22_all(ts)

                # create a dictionary with the feature name as key and the value as value
                dictionary = {}
                for name,value in zip(rawFeat['names'],rawFeat['values']):
                    dictionary[name] = [value]
            
                # store feature values in a frame (feature names = columns)
                features = pd.DataFrame.from_dict(dictionary)
                currentDim = pd.concat([currentDim, features], axis=0, ignore_index=True)
        
            currentDim.columns = [dim + '_' + originalName for originalName in currentDim.columns]
            transformedFrame = pd.concat([transformedFrame, currentDim], axis=1, ignore_index=False)
        
        return transformedFrame.dropna(axis=1)
    
    

In [None]:
#MyCatch22.transform(X_test)

#### KATS

In [None]:
class MyKats:
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return MyKats
    
    # compute the features for all time series using the compute function
    def transform(df):
        
        # stores the computed features
        transformedFrame = pd.DataFrame()
    
        colNames = df.columns
        nbrCols  = len(colNames)
        nbrTS    = len(df)

        for col in range(nbrCols):
            dim = colNames[col]
            currentDim = pd.DataFrame()
        
            for row in range(nbrTS):
                
                # transform data to dataframe
                ts = df.iloc[row].values[0].to_frame()
                ts.columns = ['value']
                
                # returns dictionary with features as keys and values as values
                rawFeatDict = TsFeatures().transform(ts)
        
                # then create a dataframe, and from that a dataframe row per feature
                features = pd.DataFrame.from_dict([rawFeatDict])
                currentDim = pd.concat([currentDim, features], axis=0, ignore_index=True)
        
            currentDim.columns = [dim + '_' + originalName for originalName in currentDim.columns]
            transformedFrame = pd.concat([transformedFrame, currentDim], axis=1, ignore_index=False)
        
        return transformedFrame.dropna(axis=1)
    

In [None]:
#MyKats.transform(X_train)

#### TSFRESH

In [None]:
class MyTsfresh:
    
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return MyTsfresh
    
    # compute the features for all time series using the compute function
    def transform(df):
        
        # stores the computed features
        transformedFrame = pd.DataFrame()
    
        colNames = df.columns
        nbrCols  = len(colNames)
        nbrTS    = len(df)

        for col in range(nbrCols):
            dim = colNames[col]
            currentDim = pd.DataFrame()
        
            for row in range(nbrTS):
                
                # transform data to dataframe
                ts = df.iloc[row].values[0].to_frame()
                ts[1] = len(ts) * [1]
                ts[2] = np.arange (1, len(ts)+1, 1.0)
                ts.columns = ['value', 'id', 'time']
                
                # get features
                features = extract_features(ts, column_id='id', column_value = 'value', column_sort = 'time')
                currentDim = pd.concat([currentDim, features], axis=0, ignore_index=True)
        
            currentDim.columns = [dim + '_' + originalName for originalName in currentDim.columns]
            transformedFrame = pd.concat([transformedFrame, currentDim], axis=1, ignore_index=False)
        
        return transformedFrame.dropna(axis=1)

In [None]:
#MyTsfresh.transform(X_train)

#### TSFEL

In [None]:
class MyTsfel:
    # returns the class itself (shouldn't do anything on data --> fitting, the transformation is done)
    def fit(X, y):
        return MyTsfel
    
    # compute the features for all time series using the compute function
    def transform(df):
        
        # stores the computed features
        transformedFrame = pd.DataFrame()
    
        colNames = df.columns
        nbrCols  = len(colNames)
        nbrTS    = len(df)

        for col in range(nbrCols):
            dim = colNames[col]
            currentDim = pd.DataFrame()
        
            for row in range(nbrTS):
                
                # transform data to dataframe
                ts = df.iloc[row].values[0].to_frame()
                
                # Retrieves a pre-defined feature configuration file to extract all available features
                cfg = tsfel.get_features_by_domain()
                
                # get features
                features = tsfel.time_series_features_extractor(cfg, ts)
                currentDim = pd.concat([currentDim, features], axis=0, ignore_index=True)
        
            currentDim.columns = [dim + '_' + originalName for originalName in currentDim.columns]
            transformedFrame = pd.concat([transformedFrame, currentDim], axis=1, ignore_index=False)
        
        return transformedFrame.dropna(axis=1)

In [None]:
#MyTsfel.transform(X_train)

In [None]:
# SOME TESTING

'''
from sklearn.ensemble import RandomForestClassifier

classifier = make_pipeline(
    MyTsfresh, RandomForestClassifier(n_estimators=5)
)

classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)
'''

--------------------------------------

In [None]:
# needed 
from sklearn.metrics import accuracy_score

#methods = ['catch22', 'kats', 'tsfel', 'tsfresh', 'feasts']
methods = ['catch22', 'kats', 'tsfel', 'tsfresh']
classes = [MyCatch22, MyKats, MyTsfel, MyTsfresh]

def plotAccuracy(l1, l2, name):
    
    y_pos = np.arange(len(l1))
    
    # Create bars
    plt.bar(y_pos, l1, color='navy')
    
    # Create names on the x-axis
    plt.xticks(y_pos, l2,rotation='vertical')

    # Show graphic
    plt.xlabel('Method')
    plt.ylabel('Accuracy')
    # displaying the title
    plt.title(name)
    plt.show()

In [None]:
from sktime.classification.feature_based import TSFreshClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.inspection import DecisionBoundaryDisplay
from sktime.classification.sklearn import RotationForest
from sklearn.dummy import DummyClassifier

names = ["Nearest Neighbors",
         "Linear SVM",
         "RBF SVM",
         "Gaussian Process",
         "Decision Tree",
         "Random Forest",
         "Neural Net",
         "AdaBoost",
         "Naive Bayes",
         "QDA",
         #"RotationForest",
         "DummyClassifier"]

classifiers = [KNeighborsClassifier(3),
               SVC(kernel="linear", C=0.025),
               SVC(gamma=2, C=1),
               GaussianProcessClassifier(1.0 * RBF(1.0)),
               DecisionTreeClassifier(max_depth=5),
               RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
               MLPClassifier(alpha=1, max_iter=1000),
               AdaBoostClassifier(),
               GaussianNB(),
               QuadraticDiscriminantAnalysis(),
               #RotationForest(),
               DummyClassifier()]

In [None]:
nbrClassifiers = len(classifiers)
scores = []

for c in range(nbrClassifiers):
    classifier = TSFreshClassifier(estimator=classifiers[c])
    classifier.fit(X_train, y_train)
    scores.append(classifier.score(X_test, y_test))
    


In [None]:
plotAccuracy(scores, names, 'TSFreshClassifier')

--------------------------------------