In [1]:
from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction.settings import ComprehensiveFCParameters, MinimalFCParameters, EfficientFCParameters
from tsfresh.feature_extraction.settings import from_columns

import numpy as np
import pandas as pd

  from pandas.core import datetools


This notebooks shows how the fc_parameters or kind_to_fc_parameters dictionaries work.

For a detailed explanation, see also http://tsfresh.readthedocs.io/en/latest/text/feature_extraction_settings.html

## Construct a time series container

We construct the time series container that includes two sensor time series, "temperature" and "pressure", for two devices a and b

In [2]:
df = pd.DataFrame({"id": ["a", "a", "b", "b"], "temperature": [1,2,3,1], "pressure": [-1, 2, -1, 7]})
df

Unnamed: 0,id,pressure,temperature
0,a,-1,1
1,a,2,2
2,b,-1,3
3,b,7,1


## The default_fc_parameters

The calculated features are controlled by a dicitonary that contains a mapping from feature calculator name to the parameters. While doing so, always the same names as in the tsfresh.feature_extraction.feature_calculators module are used.

In the following we load an exemplary dictionary

In [3]:
settings_minimal = MinimalFCParameters() # only a few basic features
settings_minimal

{'length': None,
 'maximum': None,
 'mean': None,
 'median': None,
 'minimum': None,
 'standard_deviation': None,
 'sum_values': None,
 'variance': None}

In [4]:
X_tsfresh = extract_features(df, column_id="id", default_fc_parameters = settings_minimal)
X_tsfresh.head()

Feature Extraction: 100%|██████████| 2/2 [00:00<00:00, 117.89it/s]


Unnamed: 0_level_0,pressure__sum_values,pressure__length,pressure__maximum,pressure__mean,pressure__median,pressure__minimum,pressure__standard_deviation,pressure__variance,temperature__minimum,temperature__standard_deviation,temperature__length,temperature__mean,temperature__sum_values,temperature__variance,temperature__median,temperature__maximum
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
a,1.0,2.0,2.0,0.5,0.5,-1.0,1.5,2.25,1.0,0.5,2.0,1.5,3.0,0.25,1.5,2.0
b,6.0,2.0,7.0,3.0,3.0,-1.0,4.0,16.0,1.0,1.0,2.0,2.0,4.0,1.0,2.0,3.0


By using the settings_minimal as value of the default_fc_parameters parameter, those settings are used for all type of time series. In this case Temperature and Pressure 

Now, lets say we want to remove the legnth feature. We just delete it from the dictionary.

In [5]:
del settings_minimal["length"]
settings_minimal

{'maximum': None,
 'mean': None,
 'median': None,
 'minimum': None,
 'standard_deviation': None,
 'sum_values': None,
 'variance': None}

In [6]:
X_tsfresh = extract_features(df, column_id="id", default_fc_parameters = settings_minimal)
X_tsfresh.head()

Feature Extraction: 100%|██████████| 2/2 [00:00<00:00, 65.42it/s]


Unnamed: 0_level_0,pressure__sum_values,pressure__maximum,pressure__mean,pressure__median,pressure__minimum,pressure__standard_deviation,pressure__variance,temperature__minimum,temperature__variance,temperature__mean,temperature__sum_values,temperature__median,temperature__standard_deviation,temperature__maximum
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
a,1.0,2.0,0.5,0.5,-1.0,1.5,2.25,1.0,0.25,1.5,3.0,1.5,0.5,2.0
b,6.0,7.0,3.0,3.0,-1.0,4.0,16.0,1.0,1.0,2.0,4.0,2.0,1.0,3.0


## The kind_to_fc_parameters

now, lets say we do not want to calculate the same features for both type of time series.

In [7]:
kind_to_fc_parameters = {
    "temperature": {"length": None, "sum_values": None},
    "pressure": {"maximum": None, "minimum": None}
}
kind_to_fc_parameters

{'pressure': {'maximum': None, 'minimum': None},
 'temperature': {'length': None, 'sum_values': None}}

So, in this case, for sensor pressure both max and min are calculated. For the temperature, the length and sum_values features are extracted

In [8]:
X_tsfresh = extract_features(df, column_id="id", kind_to_fc_parameters = kind_to_fc_parameters)
X_tsfresh.head()

Feature Extraction: 100%|██████████| 2/2 [00:00<00:00, 103.52it/s]


Unnamed: 0_level_0,pressure__minimum,pressure__maximum,temperature__sum_values,temperature__length
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a,-1.0,2.0,3.0,2.0
b,-1.0,7.0,4.0,2.0


## More complex dictionaries

We provide dictionaries with more greater sets of features.

The `EfficientFCParameters` contain features and parameters that should be calculated quite fastly:

In [9]:
settings_efficient = EfficientFCParameters()
settings_efficient

{'abs_energy': None,
 'absolute_sum_of_changes': None,
 'ar_coefficient': [{'coeff': 0, 'k': 10},
  {'coeff': 1, 'k': 10},
  {'coeff': 2, 'k': 10},
  {'coeff': 3, 'k': 10},
  {'coeff': 4, 'k': 10}],
 'augmented_dickey_fuller': None,
 'autocorrelation': [{'lag': 0},
  {'lag': 1},
  {'lag': 2},
  {'lag': 3},
  {'lag': 4},
  {'lag': 5},
  {'lag': 6},
  {'lag': 7},
  {'lag': 8},
  {'lag': 9}],
 'binned_entropy': [{'max_bins': 10}],
 'count_above_mean': None,
 'count_below_mean': None,
 'cwt_coefficients': [{'coeff': 0, 'w': 2, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 5, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 10, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 20, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 2, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 5, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 10, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 20, 'widths': (2, 5, 10, 20)},
  {'coeff': 2, 'w': 2, 'widths': (2, 5, 10, 20)},
  {'coeff': 2, 'w': 5, 'widths': (2, 5, 

The `ComprehensiveFCParameters` are the biggest set of features. It will take the longest to calculate

In [10]:
settings_comprehensive = ComprehensiveFCParameters()
settings_comprehensive

{'abs_energy': None,
 'absolute_sum_of_changes': None,
 'approximate_entropy': [{'m': 2, 'r': 0.1},
  {'m': 2, 'r': 0.3},
  {'m': 2, 'r': 0.5},
  {'m': 2, 'r': 0.7},
  {'m': 2, 'r': 0.9}],
 'ar_coefficient': [{'coeff': 0, 'k': 10},
  {'coeff': 1, 'k': 10},
  {'coeff': 2, 'k': 10},
  {'coeff': 3, 'k': 10},
  {'coeff': 4, 'k': 10}],
 'augmented_dickey_fuller': None,
 'autocorrelation': [{'lag': 0},
  {'lag': 1},
  {'lag': 2},
  {'lag': 3},
  {'lag': 4},
  {'lag': 5},
  {'lag': 6},
  {'lag': 7},
  {'lag': 8},
  {'lag': 9}],
 'binned_entropy': [{'max_bins': 10}],
 'count_above_mean': None,
 'count_below_mean': None,
 'cwt_coefficients': [{'coeff': 0, 'w': 2, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 5, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 10, 'widths': (2, 5, 10, 20)},
  {'coeff': 0, 'w': 20, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 2, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 5, 'widths': (2, 5, 10, 20)},
  {'coeff': 1, 'w': 10, 'widths': (2, 5, 10, 20)},
  {'coe