In [55]:
import os

import numpy as np
import pandas as pd


In [56]:
data_path = './movement/' # the root dataset path, change if necessary

target_df = pd.read_csv(os.path.join(data_path, 'dataset', 'MovementAAL_target.csv')) # read the labels CSV file

In [58]:
labels = target_df[' class_label']
labels.rename('class_label', inplace=True) # remove the leading whitespace


0      1
1      1
2      1
3      1
4      1
      ..
309   -1
310   -1
311   -1
312   -1
313   -1
Name: class_label, Length: 314, dtype: int64

In [59]:
sequence_ids = target_df['#sequence_ID']


In [60]:
from sklearn.model_selection import train_test_split
train_ids, test_ids, train_labels, test_labels = train_test_split(sequence_ids, labels, test_size=0.2)

In [61]:
X_train = pd.DataFrame()
X_test = pd.DataFrame()


In [62]:
for i, sequence in enumerate(train_ids):
    df = pd.read_csv(os.path.join(data_path, 'dataset', f'MovementAAL_RSS_{sequence}.csv'))
    df.insert(0, 'sequence', i)
    df['step'] = np.arange(df.shape[0]) # creates a range of integers starting from 0 to the number of the measurements.
    X_train = pd.concat([X_train, df])

for i, sequence in enumerate(test_ids):
    df = pd.read_csv(os.path.join(data_path, 'dataset', f'MovementAAL_RSS_{sequence}.csv'))
    df.insert(0, 'sequence', i)
    df['step'] = np.arange(df.shape[0])
    X_test = pd.concat([X_test, df])


In [64]:
from tsfresh import extract_features

extracted_features = extract_features(X_train, column_id='sequence', column_sort='step')


Feature Extraction: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s]


In [65]:
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

impute(extracted_features)
train_labels = train_labels.reset_index() # reset the index 
features_filtered = select_features(extracted_features, train_labels['class_label'])


 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_66'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_67'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_68'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_69'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_70'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_71'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_72'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_73'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_74'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_75'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_76'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_77'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_78'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_79'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_80'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_81'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_82'
 '#RSS_anchor1__fft_coefficient__attr_"real"__co

In [66]:
test_features =  extract_features(X_test, column_id='sequence', column_sort='step')
impute(test_features)


Feature Extraction: 100%|██████████| 28/28 [00:05<00:00,  5.21it/s]
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_35'
 '#RSS_anchor1__fft_coefficient__attr_"real"__coeff_36' ...
 ' RSS_anchor4__friedrich_coefficients__coeff_3__m_3__r_30'
 ' RSS_anchor4__max_langevin_fixed_point__m_3__r_30'
 ' RSS_anchor4__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


Unnamed: 0,#RSS_anchor1__variance_larger_than_standard_deviation,#RSS_anchor1__has_duplicate_max,#RSS_anchor1__has_duplicate_min,#RSS_anchor1__has_duplicate,#RSS_anchor1__sum_values,#RSS_anchor1__abs_energy,#RSS_anchor1__mean_abs_change,#RSS_anchor1__mean_change,#RSS_anchor1__mean_second_derivative_central,#RSS_anchor1__median,...,RSS_anchor4__fourier_entropy__bins_5,RSS_anchor4__fourier_entropy__bins_10,RSS_anchor4__fourier_entropy__bins_100,RSS_anchor4__permutation_entropy__dimension_3__tau_1,RSS_anchor4__permutation_entropy__dimension_4__tau_1,RSS_anchor4__permutation_entropy__dimension_5__tau_1,RSS_anchor4__permutation_entropy__dimension_6__tau_1,RSS_anchor4__permutation_entropy__dimension_7__tau_1,RSS_anchor4__query_similarity_count__query_None__threshold_0.0,RSS_anchor4__mean_n_absolute_max__number_of_maxima_7
0,0.0,1.0,0.0,1.0,-13.714280,8.204077,0.108571,0.028571,-2.083333e-07,-0.500000,...,1.574097,2.143952,2.639057,1.524288,2.404234,2.965016,3.044522,2.995732,0.0,0.435714
1,0.0,1.0,1.0,1.0,-14.045428,9.774778,0.107708,-0.018775,5.051111e-04,-0.181820,...,1.076810,1.628194,2.831480,1.128018,1.678730,2.183865,2.605047,3.058381,0.0,0.404254
2,0.0,1.0,0.0,1.0,-6.772720,2.088852,0.068182,0.000988,0.000000e+00,-0.136360,...,1.498935,2.084201,3.004767,1.093491,1.760326,2.362737,2.834448,3.301665,0.0,0.598786
3,0.0,1.0,1.0,1.0,-2.863639,8.911176,0.107504,0.016595,3.665323e-04,-0.022728,...,0.404510,0.700040,2.007420,1.212501,1.931980,2.629458,3.086895,3.445168,0.0,0.720364
4,0.0,0.0,0.0,1.0,-4.933319,1.540734,0.090780,-0.003783,9.661739e-04,-0.066667,...,0.711386,1.305248,2.754332,1.262518,1.952538,2.400833,2.805254,3.123421,0.0,0.312921
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,0.0,1.0,0.0,1.0,13.333337,8.648531,0.228174,0.001984,1.035217e-03,0.571430,...,1.311431,1.732659,2.351673,1.591907,2.602381,2.978508,2.995732,2.944439,0.0,0.992857
59,0.0,0.0,0.0,1.0,8.761880,3.918355,0.162339,0.019480,0.000000e+00,0.380950,...,0.721464,1.098612,2.253858,1.608395,2.454743,2.770974,2.890372,2.833213,0.0,0.964286
60,0.0,1.0,1.0,1.0,4.380956,1.705216,0.102041,0.029479,-3.571425e-03,0.142860,...,0.450561,1.011404,2.138333,1.428551,2.081599,2.443306,2.639341,2.685945,0.0,0.871429
61,0.0,1.0,1.0,1.0,11.590914,7.258259,0.086364,0.028788,-3.134741e-03,0.363640,...,0.688567,1.037392,2.273966,1.058248,1.574737,2.230193,2.768507,3.087042,0.0,0.617021


In [67]:
test_features_filtered = test_features[features_filtered.columns]


In [74]:
features_filtered['class_label'] = train_labels['class_label']
test_features_filtered['class_label'] = test_labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_features_filtered['class_label'] = test_labels


In [75]:
features_filtered.to_csv("movement_deduction_train.csv",index=False)
test_features_filtered.to_csv("movement_deduction_test.csv",index=False)