In [5]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from sktime.classification.compose import ComposableTimeSeriesForestClassifier
from sktime.datasets import load_arrow_head
from sktime.utils.slope_and_trend import _slope


In [2]:
X, y = load_arrow_head(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(158, 1) (158,) (53, 1) (53,)


In [3]:
# univariate time series input data
X_train.head()

Unnamed: 0,dim_0
34,0 -2.0417 1 -2.0572 2 -2.0522 3 ...
132,0 -1.8902 1 -1.9055 2 -1.8857 3 ...
112,0 -1.8515 1 -1.8436 2 -1.8209 3 ...
79,0 -2.0399 1 -2.0382 2 -2.0384 3 ...
29,0 -1.8293 1 -1.8324 2 -1.8226 3 ...


# TSFresh Feature Extraction

In [6]:
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

  warn(
Feature Extraction: 100%|██████████| 5/5 [00:55<00:00, 11.20s/it]


Unnamed: 0,dim_0__variance_larger_than_standard_deviation,dim_0__has_duplicate_max,dim_0__has_duplicate_min,dim_0__has_duplicate,dim_0__sum_values,dim_0__abs_energy,dim_0__mean_abs_change,dim_0__mean_change,dim_0__mean_second_derivative_central,dim_0__median,...,dim_0__fourier_entropy__bins_2,dim_0__fourier_entropy__bins_3,dim_0__fourier_entropy__bins_5,dim_0__fourier_entropy__bins_10,dim_0__fourier_entropy__bins_100,dim_0__permutation_entropy__dimension_3__tau_1,dim_0__permutation_entropy__dimension_4__tau_1,dim_0__permutation_entropy__dimension_5__tau_1,dim_0__permutation_entropy__dimension_6__tau_1,dim_0__permutation_entropy__dimension_7__tau_1
0,0.0,0.0,0.0,1.0,-3.4e-05,249.998998,0.069971,8.4e-05,2.5e-05,0.01888,...,0.08151,0.092513,0.092513,0.138673,0.311663,1.116706,1.545256,1.889777,2.155644,2.374722
1,0.0,0.0,0.0,1.0,0.000449,250.0001,0.049964,-6.9e-05,2.6e-05,0.27091,...,0.046288,0.046288,0.092513,0.138673,0.138673,1.23878,1.737438,2.165529,2.462872,2.697619
2,0.0,0.0,0.0,0.0,0.00025,249.998715,0.050502,3e-06,-1.6e-05,-0.019897,...,0.046288,0.092513,0.092513,0.092513,0.230801,1.086513,1.366554,1.607919,1.815004,2.004975
3,0.0,0.0,0.0,0.0,-0.000137,250.000586,0.056978,2.7e-05,-1.3e-05,0.005708,...,0.046288,0.046288,0.046288,0.186791,0.34234,1.519592,2.338211,2.986158,3.424491,3.643719
4,0.0,0.0,0.0,1.0,0.000289,250.00088,0.046721,6.8e-05,-2.3e-05,-0.030424,...,0.046288,0.092513,0.092513,0.092513,0.173767,1.246112,1.712048,2.070686,2.30621,2.470692


In [8]:
Xt.shape

(158, 773)

# TsFresh within a SciKit Learn Pipeline

In [11]:
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
classifier = make_pipeline(
    TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False),
    RandomForestClassifier(),
)
classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)

  warn(
Feature Extraction: 100%|██████████| 5/5 [00:51<00:00, 10.40s/it]
  warn(
Feature Extraction: 100%|██████████| 5/5 [00:19<00:00,  3.95s/it]


0.8113207547169812