In [13]:
import pandas as pd
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
from tsfresh.feature_extraction import extract_features, extract_feature_dynamics
from tsfresh.feature_selection import select_features
from tsfresh.feature_extraction.settings import MinimalFCParameters

from tsfresh.feature_extraction.gen_example_timeseries_data import gen_example_timeseries_data ##TODO: move func
from tsfresh.feature_extraction.gen_input_timeseries_function import engineer_input_timeseries
from tsfresh.feature_extraction.gen_features_dicts_function import derive_features_dictionaries ## TODO move this to different chunk

from IPython.display import display
from json import dumps

# Introduction

## Topics

rough guide
- tsfresh intro
- our changes


## Load example Data
In this workbook we will considering an two datasets: 
1. [Robot Execution Failures Dataset](https://archive.ics.uci.edu/ml/datasets/Robot+Execution+Failures) to demonstrate how the extraction of feature dynamics. works. 
2. Then we will use another exemple dataset latter which was manually generated.

**copied from niels workbook**
The data set documents 88 robot executions (each has a unique id between 1 and 88), which is a subset of the Robot Execution Failures Data Set. 
For the purpose of simplicity we are only differentiating between successfull and failed executions (`y`).

For each execution 15 force (`F`) and torque (`T`) samples are given, which were measured at regular time intervals for the spatial dimensions x, y, and z. 
Therefore each row of the data frame references a specific execution (`id`), a time index (`index`) and documents the respective measurements of 6 sensors (`F_x, F_y, F_z, T_x, T_y, T_z`).

The Robot dataset is unbalanced and for the purposes of this tutorial we will not ignore the issue of undersampling as this is merely a toy example to demonstrate how the new code works.

In [14]:
download_robot_execution_failures()
timeseries, y = load_robot_execution_failures()
display(timeseries.head())

Unnamed: 0,id,time,F_x,F_y,F_z,T_x,T_y,T_z
0,1,0,-1,-1,63,-3,-1,0
1,1,1,0,0,62,-3,-1,0
2,1,2,-1,-1,61,-3,0,0
3,1,3,-1,-1,63,-2,-1,0
4,1,4,-1,-1,63,-3,-1,0


# Extract features from the Time Series
Let us start by demonstrating how a simple set of time series features (mean, median, max, variance, ...) are calculated from an example time series using tsfresh.

In [15]:
extracted_features = extract_features(timeseries, 
                                    column_id="id", 
                                    column_sort="time", 
                                    default_fc_parameters=MinimalFCParameters())
display(extracted_features.head())

Feature Extraction: 100%|██████████| 10/10 [00:04<00:00,  2.27it/s]


Unnamed: 0,T_z__sum_values,T_z__median,T_z__mean,T_z__length,T_z__standard_deviation,T_z__variance,T_z__maximum,T_z__minimum,F_x__sum_values,F_x__median,...,T_x__maximum,T_x__minimum,T_y__sum_values,T_y__median,T_y__mean,T_y__length,T_y__standard_deviation,T_y__variance,T_y__maximum,T_y__minimum
1,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,-14.0,-1.0,...,-2.0,-3.0,-10.0,-1.0,-0.666667,15.0,0.471405,0.222222,0.0,-1.0
2,-4.0,0.0,-0.266667,15.0,0.442217,0.195556,0.0,-1.0,-13.0,-1.0,...,1.0,-10.0,-20.0,-1.0,-1.333333,15.0,2.054805,4.222222,4.0,-5.0
3,-4.0,0.0,-0.266667,15.0,0.442217,0.195556,0.0,-1.0,-10.0,-1.0,...,3.0,-7.0,-29.0,-2.0,-1.933333,15.0,1.768867,3.128889,1.0,-5.0
4,-5.0,0.0,-0.333333,15.0,0.596285,0.355556,1.0,-1.0,-6.0,0.0,...,-1.0,-15.0,-16.0,-1.0,-1.066667,15.0,2.669998,7.128889,4.0,-6.0
5,-2.0,0.0,-0.133333,15.0,0.618241,0.382222,1.0,-1.0,-9.0,-1.0,...,-2.0,-12.0,-42.0,-3.0,-2.8,15.0,2.039608,4.16,3.0,-5.0


# How does the extraction of feature dynamics work?
Should we find that these features themselves are not sufficiently informative we can try an alternative approach: the extraction of feature dynamics! 
This is accomplished by the function `extract_feature_dynamics` in `tsfresh.feature_extraction.extraction`.

In principle this works as such:

1. The input time *X* series is windowed into groups based on id (e.g. "measurement_id") and the chosen set of N features are extracted. This returns a new matrix *M* where each column represents a particular **feature time series**.

2. For each feature in the resulting output *M*, step 1 is repeated and for the chosen feature time series. Each new column generated can be referred to as a  **feature-dynamic(s)**
    
3. Repeat for each column in *M*.

## Differences between `extract_features` & `extract_feature_dynamics`
`extract_feature_dynamics` shares most of the same parameters as `extract_features`, but the key differences are:
* `window_length` - this specifies the length of the time series window from which the first set of features is extracted.
* `feature_timeseries_fc_parameters` - this specifies the type of feature calculator dictionary object will be used to calculate the **feature timeseries** **from our input**.
* `feature_dynamics_fc_parameters` - this specifies the type of feature calculator dictionary object will be used to calculate the **feature dynamics** from our **feature time series**.
* `feature_timeseries_kind_to_fc_parameters` - this specifies the custom feature calculator to calculate **feature timeseries**.
* `feature_dynamics_kind_to_fc_parameters` - - this specifies the custom feature calculator to calculate **feature dynamics**.

## Computational challenges ##
A major caveat associated with this approach is that extracting feature dynamics *can* lead to an exponential number of columns being generated. 

#### For instance: 

If the input has just 1 time series (1 column) and we extract *N* features, then *N* feature dynamics this will result in *1*x*N*x*N* columns in total!

This *can* be highly computationally intensive and users should beware of. It is also worth noting that computational time/effort is affected by: window length, number of processors/ parallelisation, which features are computed amongst other factors.

For this reason a high performance computer with multiple was used to develop a proof of concept implementation of this feature-engineering algorithm.

#### Recommendation
Therefore, when testing code we *strongly* recommend using the feature set specified by `MinimalFCParameters()` located in `tsfresh.feature_extraction.settings`.

Below the function is demonstrated on the same robot executaion failures dataset.

In [16]:
##TODO fix window size bug
extracted_sub_features = extract_feature_dynamics(timeseries_container=timeseries,
                                    window_length=11,  # window size
                                    column_id="id",
                                    column_sort="time",
                                    feature_timeseries_fc_parameters=MinimalFCParameters(),
                                    feature_dynamics_fc_parameters=MinimalFCParameters())
                                    ##TODO: check if one of these isnt specified use the other)
display(extracted_sub_features.head())                                   

Feature Extraction: 100%|██████████| 10/10 [00:04<00:00,  2.48it/s]
Feature Extraction: 100%|██████████| 10/10 [00:06<00:00,  1.50it/s]


Unnamed: 0,F_x||length__sum_values,F_x||length__median,F_x||length__mean,F_x||length__length,F_x||length__standard_deviation,F_x||length__variance,F_x||length__maximum,F_x||length__minimum,F_x||maximum__sum_values,F_x||maximum__median,...,T_z||sum_values__maximum,T_z||sum_values__minimum,T_z||variance__sum_values,T_z||variance__median,T_z||variance__mean,T_z||variance__length,T_z||variance__standard_deviation,T_z||variance__variance,T_z||variance__maximum,T_z||variance__minimum
1,15.0,7.5,7.5,2.0,3.5,12.25,11.0,4.0,-1.0,-0.5,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
2,15.0,7.5,7.5,2.0,3.5,12.25,11.0,4.0,0.0,0.0,...,-1.0,-3.0,0.385847,0.192924,0.192924,2.0,0.005424,2.9e-05,0.198347,0.1875
3,15.0,7.5,7.5,2.0,3.5,12.25,11.0,4.0,0.0,0.0,...,-2.0,-2.0,0.39876,0.19938,0.19938,2.0,0.05062,0.002562,0.25,0.14876
4,15.0,7.5,7.5,2.0,3.5,12.25,11.0,4.0,1.0,0.5,...,-2.0,-3.0,0.630165,0.315083,0.315083,2.0,0.065083,0.004236,0.380165,0.25
5,15.0,7.5,7.5,2.0,3.5,12.25,11.0,4.0,1.0,0.5,...,1.0,-3.0,0.451963,0.225981,0.225981,2.0,0.038481,0.001481,0.264463,0.1875


The full feature feature engineering pipeline (including the selection of relevant features) is outlined in the following diagram:

<img src="./features_on_features_diagram.png"/> 

More detail on how this approach was used can be found in the IEEE paper: ["Data Mining on Extremely Long Time Series"](https://ieeexplore.ieee.org/document/9679945).

# Interpreting the results

As can be seen, running `extract_features_on_sub_features` results in significantly more columns

## Decomposing the column names
`"F_x||length__sum_values"`

This is demonstrated below

In [17]:
## Take a subset of the columns to demonstrate (reduce size of output)
sub_feature_names = extracted_sub_features.columns.tolist()[:120]
f,ff = derive_features_dictionaries(sub_feature_names)

print("The set f features calculated on the original time series:\n")
#[print(f[k],"\n") for k in f.keys()]
print(dumps(f,sort_keys=True, indent=4))


The set f features calculated on the original time series:

{
    "F_x": {
        "length": null,
        "maximum": null,
        "mean": null,
        "median": null,
        "minimum": null,
        "standard_deviation": null,
        "sum_values": null,
        "variance": null
    },
    "F_y": {
        "length": null,
        "maximum": null,
        "mean": null,
        "median": null,
        "minimum": null,
        "standard_deviation": null,
        "sum_values": null
    }
}


**talk about how to interpret this**
.........

TODO write

In [18]:
print("\nThe set of feature-dynamics/sub-features generate on the feature time-series:\n")
print(dumps(ff,sort_keys=True, indent=4)[:479])


The set of feature-dynamics/sub-features generate on the feature time-series:

{
    "F_x||length": {
        "length": null,
        "maximum": null,
        "mean": null,
        "median": null,
        "minimum": null,
        "standard_deviation": null,
        "sum_values": null,
        "variance": null
    },
    "F_x||maximum": {
        "length": null,
        "maximum": null,
        "mean": null,
        "median": null,
        "minimum": null,
        "standard_deviation": null,
        "sum_values": null,
        "variance": null
    },
  


## Select the most relevant time Series features from both of these datasets

In [19]:
## Typical feature extraction
selected_features = select_features(extracted_features,y)
display(selected_features.head())

Unnamed: 0,T_y__variance,T_y__standard_deviation,F_z__standard_deviation,F_z__variance,F_x__variance,F_x__standard_deviation,T_x__variance,T_x__standard_deviation,F_y__standard_deviation,F_y__variance,...,F_z__sum_values,F_z__median,F_y__maximum,F_x__minimum,T_x__minimum,F_x__maximum,T_y__minimum,T_z__maximum,T_z__minimum,F_z__maximum
1,0.222222,0.471405,1.203698,1.448889,0.062222,0.249444,0.115556,0.339935,0.339935,0.115556,...,938.0,63.0,0.0,-1.0,-3.0,0.0,-1.0,0.0,0.0,64.0
2,4.222222,2.054805,4.333846,18.782222,0.915556,0.956847,11.715556,3.422799,2.149935,4.622222,...,932.0,63.0,3.0,-3.0,-10.0,0.0,-5.0,0.0,-1.0,70.0
3,3.128889,1.768867,4.616877,21.315556,0.355556,0.596285,6.933333,2.633122,1.543445,2.382222,...,917.0,61.0,2.0,-1.0,-7.0,1.0,-5.0,0.0,-1.0,68.0
4,7.128889,2.669998,3.833188,14.693333,0.906667,0.95219,12.426667,3.525148,1.995551,3.982222,...,933.0,63.0,5.0,-2.0,-15.0,1.0,-6.0,1.0,-1.0,70.0
5,4.16,2.039608,4.841487,23.44,0.773333,0.879394,7.6,2.75681,1.730767,2.995556,...,909.0,59.0,3.0,-2.0,-12.0,2.0,-5.0,1.0,-1.0,73.0


In [20]:
## repeat the process but now we extract feature dynamics
selected_sub_features = select_features(extracted_sub_features,y)
display(selected_sub_features.head())

Unnamed: 0,F_z||variance__maximum,F_z||standard_deviation__maximum,T_y||variance__maximum,T_y||standard_deviation__maximum,F_z||variance__median,F_z||variance__sum_values,F_z||variance__mean,F_x||standard_deviation__maximum,F_x||variance__maximum,F_x||variance__sum_values,...,T_x||minimum__median,T_x||minimum__sum_values,T_x||minimum__mean,F_z||maximum__maximum,F_x||maximum__mean,F_x||maximum__median,F_x||maximum__sum_values,T_z||maximum__mean,T_z||maximum__median,T_z||maximum__sum_values
1,3.0,1.732051,0.231405,0.481046,1.88843,3.77686,1.88843,0.28748,0.082645,0.082645,...,-3.0,-6.0,-3.0,64.0,-0.5,-0.5,-1.0,0.0,0.0,0.0
2,19.107438,4.371206,4.628099,2.151302,14.678719,29.357438,14.678719,1.083307,1.173554,1.361054,...,-8.0,-16.0,-8.0,70.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22.25,4.716991,2.975207,1.724879,21.579545,43.159091,21.579545,0.655555,0.429752,0.429752,...,-7.0,-14.0,-7.0,68.0,0.0,0.0,0.0,0.0,0.0,0.0
4,16.975207,4.120098,7.107438,2.665978,9.831353,19.662707,9.831353,1.06794,1.140496,1.390496,...,-11.5,-23.0,-11.5,70.0,0.5,0.5,1.0,0.5,0.5,1.0
5,44.75,6.689544,4.561983,2.13588,30.019628,60.039256,30.019628,0.987525,0.975207,0.975207,...,-10.5,-21.0,-10.5,73.0,0.5,0.5,1.0,0.5,0.5,1.0


# Given this new set of subfeatures - we can decompose this into the useful features...


The convenience of this is that once we have selected --- we can generate a set of dictionaries...
pass them into the _____ arguments (not the ___ args)

In [21]:
## Given the chosen feature-dynamics generate dictionary represenations of them
fc1, fc2 = derive_features_dictionaries(selected_sub_features)

## now calculate only the relevant features!
extracted_v2 = extract_feature_dynamics(timeseries, 
                                        window_length=11,
                                        column_id="id", 
                                        column_sort="time",
                                        feature_timeseries_kind_to_fc_parameters=fc1, 
                                        feature_dynamics_kind_to_fc_parameters=fc2)

print(f"{extracted_v2.shape[1]} are calculated here vs {extracted_sub_features.shape[1]} calculated originally.")

display(extracted_v2.head())


Feature Extraction: 100%|██████████| 10/10 [00:05<00:00,  1.74it/s]
Feature Extraction: 100%|██████████| 10/10 [00:13<00:00,  1.36s/it]

184 are calculated here vs 384 calculated originally.





Unnamed: 0,F_x||maximum__standard_deviation,F_x||maximum__variance,F_x||maximum__maximum,F_x||maximum__mean,F_x||maximum__median,F_x||maximum__sum_values,F_x||mean__variance,F_x||mean__standard_deviation,F_x||median__variance,F_x||median__standard_deviation,...,T_z||standard_deviation__standard_deviation,T_z||standard_deviation__variance,T_z||sum_values__variance,T_z||sum_values__standard_deviation,T_z||variance__maximum,T_z||variance__mean,T_z||variance__sum_values,T_z||variance__median,T_z||variance__variance,T_z||variance__standard_deviation
1,0.5,0.25,0.0,-0.5,-0.5,-1.0,0.002066,0.045455,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.006327,0.079545,0.0,0.0,...,0.006175,3.8e-05,1.0,1.0,0.198347,0.192924,0.385847,0.192924,2.9e-05,0.005424
3,1.0,1.0,1.0,0.0,0.0,0.0,0.051653,0.227273,0.0,0.0,...,0.057153,0.003266,0.0,0.0,0.25,0.19938,0.39876,0.19938,0.002562,0.05062
4,0.5,0.25,1.0,0.5,0.5,1.0,0.004649,0.068182,0.0625,0.25,...,0.058288,0.003397,0.25,0.5,0.380165,0.315083,0.630165,0.315083,0.004236,0.065083
5,1.5,2.25,2.0,0.5,0.5,1.0,0.07438,0.272727,0.0,0.0,...,0.040623,0.00165,4.0,2.0,0.264463,0.225981,0.451963,0.225981,0.001481,0.038481


# Generating new time series
In addition to extracting features or sub_features from a time series, it may be of interest to generate new time series in the original dataset, from which we will then extract features.

The function `engineer_input_timeseries` helps to automate this process, by generating new time series from the original data. It has the option to compute first order differences and/or the (phase) differences between each of the time series in the original dataset 
See more at ...docstring

In [22]:
## Load in an arbitrary example
ts_example,_ = gen_example_timeseries_data(container_type="pandas")
display(ts_example.head())


Unnamed: 0,t,y1,y2,y3,measurement_id
0,1,0.0,457.0,3454.0,1
1,1,0.0,352.0,13452.0,1
2,1,0.0,3524.0,23534.0,1
3,1,345346.0,124532.0,12432.0,1
4,1,1356.0,24.0,412432.0,1


In [23]:
## Let us find the first order differences for each of the time series in the original dataframe
new_input_ts = engineer_input_timeseries(ts_example, 
                                        column_id="measurement_id",
                                        column_sort="t",
                                        compute_differences_within_series=True, 
                                        compute_differences_between_series=False)
display(new_input_ts.head())

Unnamed: 0,y1,y2,y3,dt_y1,dt_y2,dt_y3,measurement_id,t
0,0.0,457.0,3454.0,0.0,0.0,0.0,1,1
1,0.0,352.0,13452.0,0.0,-105.0,9998.0,1,1
2,0.0,3524.0,23534.0,0.0,3172.0,10082.0,1,1
3,345346.0,124532.0,12432.0,345346.0,121008.0,-11102.0,1,1
4,1356.0,24.0,412432.0,-343990.0,-124508.0,400000.0,1,1


Explanation

In [24]:
## now compute the differences between all paris of time series in our data
ts_diff_phase_diff = engineer_input_timeseries(ts_example,
                                                column_id="measurement_id",
                                                column_sort="t",                                         
                                                compute_differences_within_series=True, 
                                                compute_differences_between_series=True)
display(ts_diff_phase_diff.head())

Unnamed: 0,y1,y2,y3,dt_y1,dt_y2,dt_y3,D_y1y2,D_y1y3,D_y2y3,measurement_id,t
0,0.0,457.0,3454.0,0.0,0.0,0.0,-457.0,-3454.0,-2997.0,1,1
1,0.0,352.0,13452.0,0.0,-105.0,9998.0,-352.0,-13452.0,-13100.0,1,1
2,0.0,3524.0,23534.0,0.0,3172.0,10082.0,-3524.0,-23534.0,-20010.0,1,1
3,345346.0,124532.0,12432.0,345346.0,121008.0,-11102.0,220814.0,332914.0,112100.0,1,1
4,1356.0,24.0,412432.0,-343990.0,-124508.0,400000.0,1332.0,-411076.0,-412408.0,1,1


In [25]:
## display the new columns compared with `new_input_ts`
new_cols = list(ts_diff_phase_diff.columns.difference(new_input_ts.columns))
display(ts_diff_phase_diff[new_cols])

Unnamed: 0,D_y1y2,D_y1y3,D_y2y3
0,-457.0,-3454.0,-2997.0
1,-352.0,-13452.0,-13100.0
2,-3524.0,-23534.0,-20010.0
3,220814.0,332914.0,112100.0
4,1332.0,-411076.0,-412408.0
5,111.0,-189.0,-300.0
6,-213.0,-42.0,171.0
7,-20.0,-4.0,16.0
8,-45.0,-63.0,-18.0
9,-42520.0,-355.0,42165.0


# Generate a pdf that describes Features