In [10]:
# %load classifier.py
# get_ipython().magic('matplotlib inline')
from sklearn.model_selection import KFold
from sklearn.neighbors import  KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import numpy as np
import input_parser
import features as ft
import pandas as pd

def main():

    input_data, labels = input_parser.parse_input()
    X, Y = ft.get_features(input_data, labels)

    print("X.columns:")
    print(X.columns)

    folds = 5
    print("Selecting rows for " + str(folds) + "-fold validation")
    kf = KFold(n_splits=folds)
    kf.get_n_splits(X)

    summed_accuracy = 0

    fold_cnt = 1
    for train_index, test_index in kf.split(X):

        print('Fold: ' + str(fold_cnt))
        X_train, X_test = X.loc[train_index,], X.loc[test_index,]
        Y_train, Y_test = Y[train_index], Y[test_index]
        print(X_train)
        model = KNeighborsClassifier(n_neighbors=3)
        model.fit(X_train,Y_train)
        predictions = model.predict(X_test)

        summed_accuracy += accuracy_score(Y_test, predictions)
        print(confusion_matrix(Y_test,predictions))
        print(classification_report(Y_test,predictions))
        fold_cnt += 1

    print("Total accuracy: " + str(summed_accuracy / folds))

main()


Parse input files...
binary_test/binary_test.txt
{' target_class_1': 1, ' target_class_2': 2}
     id  time  value
0     0     1    760
1     0    11    761
2     0   466    761
3     0   473    765
4     0   481    763
5     0   488    761
6     0   516    763
7     0   532    763
8     0   542    756
9     1     1    760
10    1    11    761
11    1   466    761
12    1   473    765
13    1   481    763
14    1   488    761
15    1   516    763
16    1   532    763
17    1   542    756
18    2     1    760
19    2    11    761
20    2   466    761
21    2   473    765
22    2   481    763
23    2   488    761
24    2   516    763
25    2   532    763
26    2   542    756
27    3     1    760
28    3    11    761
29    3   466    761
..   ..   ...    ...
303  33   516    763
304  33   532    763
305  33   542    756
306  34     1    760
307  34    11    761
308  34   466    761
309  34   473    765
310  34   481    763
311  34   488    761
312  34   516    763
313  34   532    763
314

Feature Extraction: 100%|██████████| 1/1 [00:04<00:00,  4.48s/it]
 'value__max_langevin_fixed_point__m_3__r_30'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_9__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_10__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_11__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_14__w_2'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_9__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_10__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_11__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_12__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_13__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_14__w_5'
 'value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_9__w_10'
 'value__cwt_coefficients__widths_(2, 5, 10, 

    value__index_mass_quantile__q_0.7  value__index_mass_quantile__q_0.3  \
id                                                                         
0                            0.777778                           0.333333   
1                            1.777778                           1.333333   
2                            2.777778                           2.333333   
3                            3.777778                           3.333333   
4                            4.777778                           4.333333   
5                            5.777778                           5.333333   
6                            6.777778                           6.333333   
7                            7.777778                           7.333333   
8                            8.777778                           8.333333   
9                            9.777778                           9.333333   
10                          10.777778                          10.333333   
11          

In [11]:
# %load input_parser.py
import os, glob

def parse_input():
    
    print('Parse input files...')

    session_dir = 'binary_test'
    file_endings = '*.txt'

    # Top level cells (containing all files)
    input_data = []
    label_names = []

    first_file = True
    for file in glob.glob(os.path.join(session_dir, file_endings)):
        print(file)

        # Mid level cells(containing all the content of a file)
        values = []
        line_cnt = 0

        with open(file) as f:
            for line in f:

                # Low level cells(contain recorded traces)
                line_token = line.split(',')
                record_values = []

                number_of_data_points = len(line_token) - 2 # skip the label name at the last position
                for entry_cnt in range(0, number_of_data_points):

                    entry = line_token[entry_cnt].split('|')
                    value = int(entry[0]) # actual value
                    time_stamp = int(entry[1]) # relative time stamp
                    record_values.append([value, time_stamp])

                current_label = line_token[number_of_data_points]
                if first_file:
                    label_names.append(current_label)
                else:
                    if current_label != label_names[line_cnt]:
                        error_message = ('label name line mismatch - input files do not fit together')
                        raise ValueError(error_message)

                values.append(record_values)

                line_cnt += 1

            input_data.append([str(file), values])

        first_file = False

    #print(input_data)
    #print(label_names)
    return input_data, label_names


In [12]:
# %load features.py
from tsfresh.examples import load_robot_execution_failures
from tsfresh.examples import download_robot_execution_failures
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
import pandas as pd
import numpy as np

def get_features(input_data, labels):

    used_records = input_data[0][1]

    time_series, target_classes = construct_tkfresh_input(used_records, labels)
    #download_robot_execution_failures()
    #time_series, Y = load_robot_execution_failures()

    print(time_series)

    extracted_features = extract_features(time_series, column_id='id', column_sort='time', column_value='value')
    impute(extracted_features)
    X = select_features(extracted_features, target_classes)
    print(X)

    Y = pd.Series(labels)
    return X, Y

def construct_tkfresh_input(file_record, labels):

    label_mapping = {}
    label_id = 0
    for item in labels:
        if label_id > 0 and item in label_mapping:
            continue
        else:
            label_id += 1
            label_mapping[item] = label_id
    print(label_mapping)

    id_to_target = []
    df_rows = []

    cur_id = 0
    for trace in file_record:
        id_to_target.append(label_mapping[labels[cur_id]])
        for point in trace:
            time_stamp = point[1]
            value = point[0]
            df_rows.append([cur_id, time_stamp, value])
        cur_id += 1

    df = pd.DataFrame(df_rows, columns=['id', 'time', 'value'])
    #y = pd.Series(id_to_target)
    y = np.array(id_to_target)
    return df, y

In [13]:
# %load binary_test/binary_test.txt
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_1,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,
760|1, 761|11, 761|466, 765|473, 763|481, 761|488, 763|516, 763|532, 756|542, target_class_2,

NameError: name 'target_class_1' is not defined