# Feature extraction with tsfresh transformer

In this tutorial, we show how you can use aeon with [tsfresh](https://tsfresh.readthedocs.io) to first extract features from time series, so that we can then use any scikit-learn estimator.

## Preliminaries
You have to install tsfresh if you haven't already. To install it, uncomment the cell below:

In [1]:
# !pip install --upgrade tsfresh

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from aeon.datasets import load_arrow_head, load_basic_motions
from aeon.transformations.collection.feature_based import TSFreshFeatureExtractor

## Univariate time series classification data

For more details on the data set, see the [univariate time series classification notebook](https://github.com/aeon-toolkit/aeon/blob/main/examples/02_classification_univariate.ipynb).

In [4]:
X, y = load_arrow_head(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(158, 1, 251) (158,) (53, 1, 251) (53,)


In [5]:
X_train[0]

array([[-1.8289016e+00, -1.8414675e+00, -1.8063030e+00, -1.8032227e+00,
        -1.7809696e+00, -1.7556218e+00, -1.7272368e+00, -1.6952102e+00,
        -1.6687684e+00, -1.6126463e+00, -1.5336377e+00, -1.4651511e+00,
        -1.4064236e+00, -1.3661493e+00, -1.2959210e+00, -1.2184957e+00,
        -1.1503546e+00, -1.0955135e+00, -9.5094029e-01, -8.3878234e-01,
        -6.9800914e-01, -5.6061853e-01, -5.0668426e-01, -3.8732931e-01,
        -2.5157962e-01, -1.1146854e-01,  1.2804341e-04,  7.4199503e-02,
         1.9386536e-01,  2.7104016e-01,  3.8894419e-01,  4.9771380e-01,
         5.4054023e-01,  6.3830131e-01,  7.2890283e-01,  7.4029711e-01,
         8.0125997e-01,  8.0969628e-01,  8.6046100e-01,  8.8470623e-01,
         8.8200448e-01,  8.6337722e-01,  8.6697572e-01,  8.3599266e-01,
         8.6452874e-01,  9.5284881e-01,  9.9656623e-01,  1.0399742e+00,
         1.1242956e+00,  1.2033834e+00,  1.2849656e+00,  1.3243663e+00,
         1.3866750e+00,  1.4440451e+00,  1.4040946e+00,  1.38161

In [5]:
#  binary classification task
np.unique(y_train)

array(['0', '1', '2'], dtype=object)

## Using tsfresh to extract features

In [6]:
# tf = TsFreshTransformer()
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

Unnamed: 0,dim_0__variance_larger_than_standard_deviation,dim_0__has_duplicate_max,dim_0__has_duplicate_min,dim_0__has_duplicate,dim_0__sum_values,dim_0__abs_energy,dim_0__mean_abs_change,dim_0__mean_change,dim_0__mean_second_derivative_central,dim_0__median,...,dim_0__fourier_entropy__bins_5,dim_0__fourier_entropy__bins_10,dim_0__fourier_entropy__bins_100,dim_0__permutation_entropy__dimension_3__tau_1,dim_0__permutation_entropy__dimension_4__tau_1,dim_0__permutation_entropy__dimension_5__tau_1,dim_0__permutation_entropy__dimension_6__tau_1,dim_0__permutation_entropy__dimension_7__tau_1,dim_0__query_similarity_count__query_None__threshold_0.0,dim_0__mean_n_absolute_max__number_of_maxima_7
0,0.0,0.0,0.0,1.0,2.8381e-07,250.000001,0.055389,1.5e-05,5.269116e-05,0.193865,...,0.092513,0.138673,0.219798,1.174119,1.64027,2.0598,2.414733,2.701776,0.0,1.831721
1,0.0,0.0,0.0,1.0,2.58e-07,249.999999,0.045311,-5.7e-05,1.038153e-07,0.061629,...,0.092513,0.092513,0.184769,1.137505,1.468234,1.721676,1.902331,2.054811,0.0,1.804313
2,0.0,0.0,0.0,1.0,-6.07e-08,250.000001,0.047934,3e-06,-6.866426e-05,0.387035,...,0.127671,0.138673,0.184769,1.293223,1.911246,2.447956,2.801935,3.039187,0.0,2.032458
3,0.0,0.0,0.0,1.0,2.28e-07,250.0,0.051441,6.6e-05,-4.210402e-05,-0.166196,...,0.092513,0.138673,0.184769,1.09068,1.494293,1.797282,2.033512,2.261121,0.0,1.892806
4,0.0,1.0,1.0,1.0,-3.65e-07,250.000001,0.051974,0.000161,-4.272932e-05,0.019023,...,0.092513,0.138673,0.219798,1.129314,1.506024,1.855356,2.165993,2.417581,0.0,1.834574


## Using tsfresh with aeon

In [7]:
classifier = make_pipeline(
    TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False),
    RandomForestClassifier(),
)
classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)

  "tsfresh requires a unique index, but found "
Feature Extraction: 100%|██████████| 5/5 [00:11<00:00,  2.21s/it]
  "tsfresh requires a unique index, but found "
Feature Extraction: 100%|██████████| 5/5 [00:03<00:00,  1.45it/s]


0.8490566037735849

## Multivariate time series classification data

In [8]:
X, y = load_basic_motions(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60, 6) (60,) (20, 6) (20,)


In [9]:
#  multivariate input data
X_train[0]

Unnamed: 0,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5
20,0 -0.294498 1 -0.294498 2 -0.050044 3...,0 0.540218 1 0.540218 2 -0.515245 3...,0 0.218114 1 0.218114 2 -0.301108 3...,0 -0.045277 1 -0.045277 2 0.103872 3...,0 -0.002663 1 -0.002663 2 -0.183773 3...,0 0.031960 1 0.031960 2 0.037287 3...
26,0 -0.761604 1 -0.761604 2 0.121078 3...,0 0.260125 1 0.260125 2 -1.423255 3...,0 -0.064487 1 -0.064487 2 0.075600 3...,0 0.069248 1 0.069248 2 -0.282318 3...,0 0.242367 1 0.242367 2 -0.332922 3...,0 -0.007990 1 -0.007990 2 0.239704 3...
7,0 -0.352746 1 -0.352746 2 -1.354561 3...,0 0.316845 1 0.316845 2 0.490525 3...,0 -0.473779 1 -0.473779 2 1.454261 3...,0 -0.327595 1 -0.327595 2 -0.269001 3...,0 0.106535 1 0.106535 2 0.021307 3...,0 0.197090 1 0.197090 2 0.460763 3...
8,0 -0.342233 1 -0.342233 2 -0.298542 3...,0 0.327415 1 0.327415 2 -0.527154 3...,0 0.157229 1 0.157229 2 0.248585 3...,0 0.394179 1 0.394179 2 -0.037287 3...,0 0.074574 1 0.074574 2 -0.087891 3...,0 -0.037287 1 -0.037287 2 -0.050604 3...
10,0 0.206148 1 0.206148 2 6.53436...,0 -0.658294 1 -0.658294 2 4.597327 3...,0 0.469612 1 0.469612 2 -2.723661 3...,0 -0.106535 1 -0.106535 2 -0.439456 3...,0 0.306288 1 0.306288 2 1.717875 3...,0 0.950824 1 0.950824 2 -1.041379 3...


In [10]:
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

  "tsfresh requires a unique index, but found "
Feature Extraction: 100%|██████████| 5/5 [00:18<00:00,  3.69s/it]


Unnamed: 0,dim_0__variance_larger_than_standard_deviation,dim_0__has_duplicate_max,dim_0__has_duplicate_min,dim_0__has_duplicate,dim_0__sum_values,dim_0__abs_energy,dim_0__mean_abs_change,dim_0__mean_change,dim_0__mean_second_derivative_central,dim_0__median,...,dim_5__fourier_entropy__bins_2,dim_5__fourier_entropy__bins_3,dim_5__fourier_entropy__bins_5,dim_5__fourier_entropy__bins_10,dim_5__fourier_entropy__bins_100,dim_5__permutation_entropy__dimension_3__tau_1,dim_5__permutation_entropy__dimension_4__tau_1,dim_5__permutation_entropy__dimension_5__tau_1,dim_5__permutation_entropy__dimension_6__tau_1,dim_5__permutation_entropy__dimension_7__tau_1
0,0.0,0.0,0.0,1.0,33.334188,110.735119,0.822452,0.000639,0.001751,0.164096,...,0.165443,0.165443,0.165443,0.192626,0.545824,1.279774,1.910772,2.565051,3.096812,3.567632
1,1.0,0.0,0.0,1.0,73.88848,220.949429,0.964075,-0.002087,-0.003908,0.613719,...,0.096509,0.096509,0.26116,0.26116,0.451359,1.313299,1.987599,2.593635,3.17389,3.696247
2,0.0,0.0,0.0,1.0,-17.42876,7.940863,0.170422,0.002326,-0.000244,-0.152038,...,0.223718,0.26116,0.356468,0.545824,1.82169,1.438857,2.291659,3.14044,3.819994,4.20771
3,0.0,0.0,0.0,1.0,-18.154841,5.56889,0.135705,0.001051,0.000688,-0.196623,...,0.399949,0.705356,1.127853,1.74282,3.274497,1.68301,2.766048,3.748502,4.303872,4.449241
4,1.0,0.0,0.0,1.0,395.985445,11192.65897,6.5837,0.099344,0.0,8.60897,...,0.165443,0.165443,0.165443,0.165443,0.706253,1.483926,2.279149,3.01413,3.525453,3.919983
