# Feature extraction with tsfresh transformer

In this tutorial, we show how you can use aeon with [tsfresh](https://tsfresh.readthedocs.io) to first extract features from time series, so that we can then use any scikit-learn estimator.

## Preliminaries
You have to install tsfresh if you haven't already. To install it, uncomment the cell below:

In [11]:
# !pip install --upgrade tsfresh

In [12]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from aeon.datasets import load_arrow_head, load_basic_motions
from aeon.transformations.collection.feature_based import TSFreshFeatureExtractor

## Univariate time series classification data

For more details on the data set, see the [univariate time series classification
notebook](https://github.com/aeon-toolkit/aeon/blob/main/examples/datasets/provided_data.ipynb).

In [13]:
X, y = load_arrow_head()
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(158, 1, 251) (158,) (53, 1, 251) (53,)


In [14]:
X_train[0]

array([[-2.0000055 , -2.0028617 , -1.9696021 , -1.9355154 , -1.8781111 ,
        -1.8344882 , -1.792426  , -1.7608833 , -1.7244086 , -1.6822684 ,
        -1.6168887 , -1.5276295 , -1.4674783 , -1.3656082 , -1.2734461 ,
        -1.1766173 , -1.142312  , -1.0500718 , -0.95069897, -0.8368424 ,
        -0.69139629, -0.56364906, -0.4466866 , -0.33268956, -0.22067468,
        -0.06692539,  0.06797007,  0.14719364,  0.2445016 ,  0.35250536,
         0.46878109,  0.5603892 ,  0.59047574,  0.63178718,  0.66661683,
         0.74868565,  0.84285494,  0.87039746,  0.90287039,  0.93092796,
         0.99071397,  1.0535122 ,  1.1475941 ,  1.1638321 ,  1.1692308 ,
         1.1668725 ,  1.164625  ,  1.1624899 ,  1.1431085 ,  1.1547923 ,
         1.1956449 ,  1.2304043 ,  1.2479846 ,  1.2285376 ,  1.2267756 ,
         1.2251212 ,  1.2056196 ,  1.2235791 ,  1.2221495 ,  1.2403447 ,
         1.2391429 ,  1.2575582 ,  1.2001508 ,  1.2001508 ,  1.218566  ,
         1.2176124 ,  1.2362306 ,  1.1787482 ,  1.1

In [15]:
#  binary classification task
np.unique(y_train)

array(['0', '1', '2'], dtype='<U1')

## Using tsfresh to extract features

In [16]:
# tf = TsFreshTransformer()
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

Unnamed: 0,dim_0__variance_larger_than_standard_deviation,dim_0__has_duplicate_max,dim_0__has_duplicate_min,dim_0__has_duplicate,dim_0__sum_values,dim_0__abs_energy,dim_0__mean_abs_change,dim_0__mean_change,dim_0__mean_second_derivative_central,dim_0__median,...,dim_0__fourier_entropy__bins_5,dim_0__fourier_entropy__bins_10,dim_0__fourier_entropy__bins_100,dim_0__permutation_entropy__dimension_3__tau_1,dim_0__permutation_entropy__dimension_4__tau_1,dim_0__permutation_entropy__dimension_5__tau_1,dim_0__permutation_entropy__dimension_6__tau_1,dim_0__permutation_entropy__dimension_7__tau_1,dim_0__query_similarity_count__query_None__threshold_0.0,dim_0__mean_n_absolute_max__number_of_maxima_7
0,0.0,0.0,0.0,1.0,-2.488e-07,250.000002,0.048888,6.74868e-05,-3.7e-05,0.024956,...,0.092513,0.092513,0.184769,1.279366,1.808891,2.243079,2.592453,2.796296,0.0,1.964976
1,0.0,0.0,0.0,0.0,1.976e-07,250.0,0.060783,-7.52468e-05,-4.7e-05,-0.16881,...,0.127671,0.184769,0.311663,1.081358,1.509147,1.881491,2.217191,2.492175,0.0,1.994372
2,0.0,0.0,0.0,1.0,-4.26e-08,250.0,0.063979,7.556e-07,2.8e-05,0.124552,...,0.173767,0.173767,0.357496,1.156286,1.572548,1.949349,2.284694,2.576462,0.0,2.003842
3,0.0,0.0,0.0,1.0,-1.64e-07,250.000001,0.048317,1.14328e-05,-4.9e-05,0.032475,...,0.092513,0.138673,0.184769,1.17263,1.58127,1.902762,2.128414,2.328659,0.0,1.742132
4,0.0,0.0,0.0,1.0,1.233e-07,250.0,0.050355,-3.9032e-06,-4.6e-05,-0.045353,...,0.092513,0.092513,0.230801,1.173933,1.628543,2.003443,2.303091,2.559695,0.0,1.93693


## Using tsfresh with aeon

In [17]:
classifier = make_pipeline(
    TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False),
    RandomForestClassifier(),
)
classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)

0.7735849056603774

## Multivariate time series classification data

In [18]:
X, y = load_basic_motions()
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60, 6, 100) (60,) (20, 6, 100) (20,)


In [19]:
#  multivariate input data
X_train[0]

array([[-1.088052e+00, -1.088052e+00, -6.836200e-01, -6.836200e-01,
         1.732735e+00, -3.606030e-01, -1.078340e-01, -1.078340e-01,
        -2.353600e-01,  9.832000e-02,  1.981450e-01,  2.754690e-01,
        -1.713190e-01, -2.036300e-01, -7.130000e-03,  1.647850e-01,
        -9.274400e-02, -2.564580e-01,  4.896200e-02, -1.463250e-01,
         2.093100e-02, -5.958200e-02, -4.558400e-01,  1.189300e-02,
        -2.628800e-01, -4.755600e-02,  8.333500e-02, -5.470400e-02,
        -1.982560e-01, -9.925700e-02, -2.043750e-01, -2.306840e-01,
        -2.306840e-01, -2.338050e-01, -9.477400e-02, -4.888900e-02,
        -2.203990e-01, -7.340000e-02, -1.524500e-02, -2.884580e-01,
        -3.128810e-01, -2.810570e-01, -2.802950e-01, -1.695040e-01,
        -1.150830e-01, -6.590400e-02, -1.720640e-01, -9.110700e-02,
        -1.116130e-01, -1.789600e-01, -4.327960e-01, -1.822240e-01,
        -1.436930e-01, -1.119100e-02, -3.135700e-02, -1.384120e-01,
        -2.509120e-01, -6.461700e-02,  5.924200e

In [None]:
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()