In [14]:
import sys, os

In [15]:
from tqdm.notebook import tqdm

In [16]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

In [17]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [18]:
import sys, os
import numpy as np
import pandas as pd
import pickle

In [19]:
sys.path.append(os.path.abspath('../../'))
from utils import utils
from utils.utils import evaluate_experiment
sys.path.append(os.path.abspath('../'))
from timeseries_utils import *

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
path_to_data='../../../'

In [22]:
data=np.load(os.path.abspath(path_to_data+'data-002.npy'),allow_pickle=True)
Y=np.load(path_to_data+'Y.npy',allow_pickle=True)
labels =pd.read_csv(path_to_data+'labels.csv')

In [23]:
train_fold=8
val_fold=9
test_fold=10

# 10th fold for testing (9th for now)
X_test = data[labels.strat_fold == test_fold]
y_test = Y[labels.strat_fold == test_fold]
# 9th fold for validation (8th for now)
X_val = data[labels.strat_fold == val_fold]
y_val = Y[labels.strat_fold == val_fold]
# rest for training
X_train = data[labels.strat_fold <= train_fold]
y_train = Y[labels.strat_fold <= train_fold]

In [24]:
X_train.shape ,X_val.shape

((17111, 1000, 12), (2156, 1000, 12))

In [25]:
# Preprocess signal data
X_train, X_val, X_test = utils.preprocess_signals(X_train, X_val, X_test,'/content/')
n_classes = y_train.shape[1]
X_train = np.reshape(X_train,[X_train.shape[0],X_train.shape[2],X_train.shape[1]])
X_val = np.reshape(X_val,[X_val.shape[0],X_val.shape[2],X_val.shape[1]])
X_test = np.reshape(X_test,[X_test.shape[0],X_test.shape[2],X_test.shape[1]])

In [26]:
X_train.shape, X_val.shape

((17111, 12, 1000), (2156, 12, 1000))

In [27]:
y_train.shape, y_val.shape

((17111, 5), (2156, 5))

## Converting data to tsfresh format

In [28]:
list_of_sample_df = []
for sample_number, sample in tqdm(enumerate(X_train)):
    df = pd.DataFrame(sample).T
    df = df.reset_index().rename(columns={"index": "time"})
    df['id'] = sample_number
    list_of_sample_df.append(df)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [29]:
fresh_train = pd.concat(list_of_sample_df,axis=0)

In [30]:
fresh_train.columns.to_list()

['time', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 'id']

In [31]:
fresh_train = fresh_train[["id", "time", 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]

In [32]:
fresh_train.head()

Unnamed: 0,id,time,0,1,2,3,4,5,6,7,8,9,10,11
0,0,0,-0.506445,-0.442161,0.110678,-0.292166,-0.270738,0.277815,-0.31788,2.004902,0.119249,0.020681,0.05068,-0.099315
1,0,1,-0.232168,-0.112172,0.003538,-0.05646,0.316385,0.187818,-0.249311,-0.000747,0.114963,-0.275024,-0.073602,-0.1336
2,0,2,0.277815,-0.103601,-0.125029,0.243531,0.106392,0.179247,0.072108,-1.749262,0.080679,-0.292166,-0.013604,-0.116458
3,0,3,0.372098,-0.000747,0.05068,0.183533,0.342099,0.132106,0.290672,-1.50927,-0.335022,0.136391,-0.09503,0.153534
4,0,4,-0.386449,-0.129314,-0.309309,-0.262167,0.27353,0.123535,-0.193598,0.667803,0.509236,0.162105,-0.035032,0.226388


In [33]:
fresh_train.shape

(17111000, 14)

### Multilabel y to just multiclass y

percorre todos os labels, transforma em string

In [34]:
y_train_tsfresh = pd.DataFrame(y_train).astype(str)

e converte binario pra inteiro, python tem uma funcao de conversao, "0,0,0,0,0" vai vira 0 no caso

int('11111111', 2),
, 2 é base 2
'111111' é o numero bnario, 
entende?

In [35]:
def multilabel_to_int(line):
    line = ''.join(line)
    return int(line, 2)

y_train_tsfresh = y_train_tsfresh.apply(multilabel_to_int,axis=1)

### The robot example

## Extrating features

In [None]:
extraction_settings = ComprehensiveFCParameters()

X = extract_features(fresh_train[1000:], column_id='id', column_sort='time',
                     default_fc_parameters=extraction_settings,
                     # we impute = remove all NaN features automatically
                     impute_function=impute)


In [None]:
X.head()

In [None]:
with open('tsfresh_comprehensive_features.pickle', 'wb') as handle:
    pickle.dump(X, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
X.shape

## Feature selection

In [None]:
X_filtered = select_features(X, y)

In [None]:
X_filtered.head()

Save the selected features

In [None]:
with open('tsfresh_comprehensive_selected_features.pickle', 'wb') as handle:
    pickle.dump(X_filtered, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Train and evaluate classifier

In [73]:
X_full_train, X_full_test, y_train, y_test = train_test_split(X, y, test_size=.4)
X_filtered_train, X_filtered_test = X_full_train[X_filtered.columns], X_full_test[X_filtered.columns]

In [74]:
classifier_full = DecisionTreeClassifier()
classifier_full.fit(X_full_train, y_train)
print(classification_report(y_test, classifier_full.predict(X_full_test)))

              precision    recall  f1-score   support

       False       0.97      1.00      0.98        29
        True       1.00      0.86      0.92         7

    accuracy                           0.97        36
   macro avg       0.98      0.93      0.95        36
weighted avg       0.97      0.97      0.97        36



In [75]:
classifier_filtered = DecisionTreeClassifier()
classifier_filtered.fit(X_filtered_train, y_train)
print(classification_report(y_test, classifier_filtered.predict(X_filtered_test)))

              precision    recall  f1-score   support

       False       0.97      0.97      0.97        29
        True       0.86      0.86      0.86         7

    accuracy                           0.94        36
   macro avg       0.91      0.91      0.91        36
weighted avg       0.94      0.94      0.94        36

