In [2]:
%matplotlib inline

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from os.path import join

from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
data = pd.read_csv(join("./data/accelerometer_data_section_1.csv"), columns = ['Cars', 'LW', 'Clifton', ])

training_set, test_set, training_labels, test_labels = train_test_split(data[:,3:], data[:,0], test_size=0.25, random_state=42)
df = training_set
y = training_labels # number of cars, not direction
df

TypeError: '(slice(None, None, None), slice(3, None, None))' is an invalid key

In [23]:
# df[df.id == 3][['time', 'F_x', 'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time', title='Success example (id 3)', figsize=(12, 6));
# df[df.id == 20][['time', 'F_x', 'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time', title='Failure example (id 20)', figsize=(12, 6));

In [3]:
extraction_settings = ComprehensiveFCParameters()

X = extract_features(df, column_id='id', column_sort='time',
                     default_fc_parameters=extraction_settings,
                     # we impute = remove all NaN features automatically
                     impute_function=impute)

Feature Extraction: 100%|██████████| 20/20 [00:10<00:00,  1.96it/s]


In [4]:
X.head()

Unnamed: 0,F_x__variance_larger_than_standard_deviation,F_x__has_duplicate_max,F_x__has_duplicate_min,F_x__has_duplicate,F_x__sum_values,F_x__abs_energy,F_x__mean_abs_change,F_x__mean_change,F_x__mean_second_derivative_central,F_x__median,...,T_z__permutation_entropy__dimension_6__tau_1,T_z__permutation_entropy__dimension_7__tau_1,T_z__query_similarity_count__query_None__threshold_0.0,"T_z__matrix_profile__feature_""min""__threshold_0.98","T_z__matrix_profile__feature_""max""__threshold_0.98","T_z__matrix_profile__feature_""mean""__threshold_0.98","T_z__matrix_profile__feature_""median""__threshold_0.98","T_z__matrix_profile__feature_""25""__threshold_0.98","T_z__matrix_profile__feature_""75""__threshold_0.98",T_z__mean_n_absolute_max__number_of_maxima_7
1,0.0,0.0,1.0,1.0,-14.0,14.0,0.142857,0.0,-0.038462,-1.0,...,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,1.0,-13.0,25.0,1.0,0.0,-0.038462,-1.0,...,1.748067,1.83102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.571429
3,0.0,0.0,1.0,1.0,-10.0,12.0,0.714286,0.0,-0.038462,-1.0,...,2.163956,2.197225,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.571429
4,0.0,1.0,1.0,1.0,-6.0,16.0,1.214286,-0.071429,-0.038462,0.0,...,2.302585,2.197225,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,0.0,0.0,0.0,1.0,-9.0,17.0,0.928571,-0.071429,0.038462,-1.0,...,2.302585,2.197225,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.857143


In [5]:
X_filtered = select_features(X, y)

In [6]:
X_filtered.head()

Unnamed: 0,F_x__value_count__value_-1,F_x__abs_energy,F_x__root_mean_square,T_y__absolute_maximum,F_x__mean_n_absolute_max__number_of_maxima_7,F_x__range_count__max_1__min_-1,F_y__abs_energy,F_y__root_mean_square,F_y__mean_n_absolute_max__number_of_maxima_7,T_y__variance,...,"F_y__cwt_coefficients__coeff_14__w_5__widths_(2, 5, 10, 20)","F_y__cwt_coefficients__coeff_13__w_2__widths_(2, 5, 10, 20)",T_y__lempel_ziv_complexity__bins_3,T_y__quantile__q_0.1,F_z__time_reversal_asymmetry_statistic__lag_1,F_x__quantile__q_0.2,F_y__quantile__q_0.7,"T_x__change_quantiles__f_agg_""var""__isabs_False__qh_0.2__ql_0.0",T_z__large_standard_deviation__r_0.35000000000000003,T_z__quantile__q_0.9
1,14.0,14.0,0.966092,1.0,1.0,15.0,13.0,0.930949,1.0,0.222222,...,-0.751682,-0.310265,0.4,-1.0,-596.0,-1.0,-1.0,0.0,0.0,0.0
2,7.0,25.0,1.290994,5.0,1.571429,13.0,76.0,2.250926,3.0,4.222222,...,0.057818,-0.202951,0.533333,-3.6,-680.384615,-1.0,-1.0,0.0,1.0,0.0
3,11.0,12.0,0.894427,5.0,1.0,14.0,40.0,1.632993,2.142857,3.128889,...,0.912474,0.539121,0.533333,-4.0,-617.0,-1.0,0.0,0.0,1.0,0.0
4,5.0,16.0,1.032796,6.0,1.285714,10.0,60.0,2.0,2.428571,7.128889,...,-0.609735,-2.64139,0.533333,-4.6,3426.307692,-1.0,1.0,0.0,0.0,0.0
5,9.0,17.0,1.064581,5.0,1.285714,13.0,46.0,1.75119,2.285714,4.16,...,0.072771,0.591927,0.466667,-5.0,-2609.0,-1.0,0.8,0.0,0.0,0.6


In [8]:
X_full_train, X_full_test, y_train, y_test = train_test_split(X, y, test_size=.4)
X_filtered_train, X_filtered_test = X_full_train[X_filtered.columns], X_full_test[X_filtered.columns]

In [9]:
classifier_full = DecisionTreeClassifier()
classifier_full.fit(X_full_train, y_train)
print(classification_report(y_test, classifier_full.predict(X_full_test)))

              precision    recall  f1-score   support

       False       1.00      0.96      0.98        26
        True       0.91      1.00      0.95        10

    accuracy                           0.97        36
   macro avg       0.95      0.98      0.97        36
weighted avg       0.97      0.97      0.97        36



In [10]:
classifier_filtered = DecisionTreeClassifier()
classifier_filtered.fit(X_filtered_train, y_train)
print(classification_report(y_test, classifier_filtered.predict(X_filtered_test)))

              precision    recall  f1-score   support

       False       1.00      0.96      0.98        26
        True       0.91      1.00      0.95        10

    accuracy                           0.97        36
   macro avg       0.95      0.98      0.97        36
weighted avg       0.97      0.97      0.97        36

