# Libraries

In [17]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd

from tsfresh import extract_relevant_features
from tsfresh import extract_features
from tsfresh import select_features


from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression


In [18]:
with h5py.File('./human-activity/train.h5', 'r') as f:
  total_acc_x = f['total_acc_x'][()]
  total_acc_y = f['total_acc_y'][()]
  total_acc_z = f['total_acc_z'][()]

  body_acc_x = f['body_acc_x'][()]
  body_acc_y = f['body_acc_y'][()]
  body_acc_z = f['body_acc_z'][()]

  body_gyro_x = f['body_gyro_x'][()]
  body_gyro_y = f['body_gyro_y'][()]
  body_gyro_z = f['body_gyro_z'][()]


  y = f['y'][()]

# Formateando datos para la librería TSFresh 

In [19]:
num_time_points = total_acc_x.shape[1]
num_series = total_acc_x.shape[0]

df = pd.DataFrame()
time_index = np.tile(np.arange(num_time_points), num_series)

def flatten_and_extend(name, data):
    return pd.DataFrame({
        name: data.flatten(), 
        'id': np.repeat(np.arange(num_series), num_time_points),  
        'time': time_index  
    })

data_frames = [
    flatten_and_extend('total_acc_x', total_acc_x),
    flatten_and_extend('total_acc_y', total_acc_y),
    flatten_and_extend('total_acc_z', total_acc_z),
    flatten_and_extend('body_acc_x', body_acc_x),
    flatten_and_extend('body_acc_y', body_acc_y),
    flatten_and_extend('body_acc_z', body_acc_z),
    flatten_and_extend('body_gyro_x', body_gyro_x),
    flatten_and_extend('body_gyro_y', body_gyro_y),
    flatten_and_extend('body_gyro_z', body_gyro_z)
]

df = pd.concat(data_frames, axis=1)
df = df.loc[:,~df.columns.duplicated()]

df.head()

Unnamed: 0,body_gyro_x,id,time,body_gyro_y,body_gyro_z
0,0.030191,0,0,0.066014,0.022859
1,0.043711,0,1,0.042699,0.010316
2,0.035688,0,2,0.07485,0.01325
3,0.040402,0,3,0.05732,0.017751
4,0.047097,0,4,0.052343,0.002553


In [20]:
extracted_features = extract_features(df, column_id="id", column_sort="time")

Feature Extraction: 100%|██████████| 15/15 [15:11<00:00, 60.74s/it]


# Eliminando NA

In [21]:
nans_per_column = extracted_features.isna().sum()
columns_to_drop = nans_per_column[nans_per_column > 0].index
extracted_features = extracted_features.drop(columns=columns_to_drop)

In [23]:
target = pd.DataFrame(y, columns=['target'])
target  = target["target"]

In [24]:
selected_features = select_features(extracted_features, target)

In [25]:
selected_features.to_csv('./processed-data/TSFresh/selected_features_all.csv', index=True)

# Test Usando Librerías

In [26]:
X_train, X_test, y_train, y_test = train_test_split(selected_features, target, test_size=.4)

In [27]:
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

              precision    recall  f1-score   support

         1.0       0.90      0.88      0.89       498
         2.0       0.89      0.87      0.88       446
         3.0       0.80      0.84      0.82       403
         4.0       0.64      0.63      0.63       532
         5.0       0.66      0.63      0.65       541
         6.0       0.57      0.60      0.59       521

    accuracy                           0.73      2941
   macro avg       0.74      0.74      0.74      2941
weighted avg       0.74      0.73      0.73      2941



In [28]:
SVCclassifier = SVC()
SVCclassifier.fit(X_train, y_train)
print(classification_report(y_test, SVCclassifier.predict(X_test)))

              precision    recall  f1-score   support

         1.0       0.59      0.92      0.72       498
         2.0       0.75      0.58      0.65       446
         3.0       0.74      0.40      0.52       403
         4.0       0.71      0.25      0.37       532
         5.0       0.48      0.66      0.55       541
         6.0       0.48      0.60      0.53       521

    accuracy                           0.57      2941
   macro avg       0.62      0.57      0.56      2941
weighted avg       0.61      0.57      0.55      2941



In [29]:
KNNclassifier = KNeighborsClassifier()
KNNclassifier.fit(X_train, y_train)

print(classification_report(y_test, KNNclassifier.predict(X_test)))

              precision    recall  f1-score   support

         1.0       0.32      0.56      0.41       498
         2.0       0.34      0.37      0.35       446
         3.0       0.31      0.24      0.27       403
         4.0       0.59      0.48      0.53       532
         5.0       0.44      0.28      0.34       541
         6.0       0.53      0.49      0.51       521

    accuracy                           0.41      2941
   macro avg       0.42      0.40      0.40      2941
weighted avg       0.43      0.41      0.41      2941



In [30]:
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

              precision    recall  f1-score   support

         1.0       0.90      0.90      0.90       498
         2.0       0.85      0.87      0.86       446
         3.0       0.83      0.81      0.82       403
         4.0       0.62      0.55      0.58       532
         5.0       0.54      0.57      0.55       541
         6.0       0.54      0.57      0.56       521

    accuracy                           0.70      2941
   macro avg       0.71      0.71      0.71      2941
weighted avg       0.70      0.70      0.70      2941



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
