In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from sktime.classification.compose import ComposableTimeSeriesForestClassifier
from sktime.datasets import load_arrow_head
from sktime.utils.slope_and_trend import _slope


In [2]:
X, y = load_arrow_head(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(158, 1) (158,) (53, 1) (53,)


In [3]:
# univariate time series input data
X_train.head()

Unnamed: 0,dim_0
114,0 -2.1830 1 -2.1270 2 -2.1029 3 ...
19,0 -1.8822 1 -1.8806 2 -1.8658 3 ...
1,0 -1.7746 1 -1.7740 2 -1.7766 3 ...
106,0 -2.0000 1 -2.0029 2 -1.9696 3 ...
89,0 -1.7560 1 -1.7418 2 -1.7000 3 ...


In [4]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 158 entries, 114 to 31
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   dim_0   158 non-null    object
dtypes: object(1)
memory usage: 2.5+ KB


# TSFresh Feature Extraction

In [5]:
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

  warn(
Feature Extraction: 100%|██████████| 5/5 [00:17<00:00,  3.46s/it]


Unnamed: 0,dim_0__variance_larger_than_standard_deviation,dim_0__has_duplicate_max,dim_0__has_duplicate_min,dim_0__has_duplicate,dim_0__sum_values,dim_0__abs_energy,dim_0__mean_abs_change,dim_0__mean_change,dim_0__mean_second_derivative_central,dim_0__median,...,dim_0__fourier_entropy__bins_2,dim_0__fourier_entropy__bins_3,dim_0__fourier_entropy__bins_5,dim_0__fourier_entropy__bins_10,dim_0__fourier_entropy__bins_100,dim_0__permutation_entropy__dimension_3__tau_1,dim_0__permutation_entropy__dimension_4__tau_1,dim_0__permutation_entropy__dimension_5__tau_1,dim_0__permutation_entropy__dimension_6__tau_1,dim_0__permutation_entropy__dimension_7__tau_1
0,0.0,0.0,0.0,1.0,-0.000119,250.000476,0.055274,2e-06,-0.000151,0.33856,...,0.046288,0.046288,0.127671,0.127671,0.138673,1.321074,1.972178,2.540679,2.988064,3.340854
1,0.0,0.0,0.0,1.0,8.6e-05,249.999359,0.047289,-8e-06,2e-05,0.14572,...,0.046288,0.046288,0.092513,0.138673,0.138673,1.253832,1.693199,2.062162,2.350862,2.539538
2,0.0,0.0,0.0,1.0,1.6e-05,250.000139,0.047257,-5.9e-05,-2.9e-05,0.14465,...,0.046288,0.046288,0.092513,0.138673,0.138673,1.266562,1.774184,2.1592,2.400791,2.575144
3,0.0,0.0,0.0,1.0,-0.000107,249.999639,0.048889,6.8e-05,-3.7e-05,0.024956,...,0.046288,0.092513,0.092513,0.092513,0.184769,1.279366,1.808891,2.243079,2.592453,2.796296
4,0.0,0.0,0.0,1.0,0.000362,250.000289,0.048722,8e-06,-6e-06,0.12024,...,0.046288,0.046288,0.092513,0.138673,0.184769,1.115318,1.475115,1.742737,1.961991,2.119608


In [13]:
Xt.columns

Index(['dim_0__variance_larger_than_standard_deviation',
       'dim_0__has_duplicate_max', 'dim_0__has_duplicate_min',
       'dim_0__has_duplicate', 'dim_0__sum_values', 'dim_0__abs_energy',
       'dim_0__mean_abs_change', 'dim_0__mean_change',
       'dim_0__mean_second_derivative_central', 'dim_0__median',
       ...
       'dim_0__fourier_entropy__bins_2', 'dim_0__fourier_entropy__bins_3',
       'dim_0__fourier_entropy__bins_5', 'dim_0__fourier_entropy__bins_10',
       'dim_0__fourier_entropy__bins_100',
       'dim_0__permutation_entropy__dimension_3__tau_1',
       'dim_0__permutation_entropy__dimension_4__tau_1',
       'dim_0__permutation_entropy__dimension_5__tau_1',
       'dim_0__permutation_entropy__dimension_6__tau_1',
       'dim_0__permutation_entropy__dimension_7__tau_1'],
      dtype='object', length=773)

# TsFresh within a SciKit Learn Pipeline

In [11]:
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
classifier = make_pipeline(
    TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False),
    RandomForestClassifier(),
)
classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)

  warn(
Feature Extraction: 100%|██████████| 5/5 [00:51<00:00, 10.40s/it]
  warn(
Feature Extraction: 100%|██████████| 5/5 [00:19<00:00,  3.95s/it]


0.8113207547169812