In [49]:
mypath = "data"

In [50]:
import numpy as np
import pandas as pd
import pickle

from os import listdir
from os.path import join
from sklearn import model_selection, svm
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier

In [51]:
activities = listdir(mypath)
activities

['idle', 'running', 'stairs', 'walking']

In [52]:
for act in activities:
    path = join(mypath, act)
    frames = listdir(path)
    print(f"{act}: {len(frames)}")

idle: 1039
running: 3408
stairs: 165
walking: 1850


In [53]:
frms = listdir(join(mypath, "stairs"))
frame = pd.read_csv(join(join(mypath, "stairs"), frms[2]))
frame.head(3)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z
0,-2.269703,-2.782062,2.39899
1,1.479617,-9.251195,-0.857124
2,5.152323,-11.702858,-2.27928


## Data preparating:

In [55]:
def get_stat_features(frame):
    features = np.array([])
    features = np.concatenate((features, frame.skew(axis=0).values), axis=0)
    features = np.concatenate((features, frame.kurt(axis=0).values), axis=0)
    features = np.concatenate((features, frame.max(axis=0).values), axis=0)
    features = np.concatenate((features, frame.min(axis=0).values), axis=0)
    features = np.concatenate((features, frame.mean(axis=0).values), axis=0)
    features = np.concatenate((features, frame.std(axis=0).values), axis=0)
    features = np.concatenate((features, frame.var(axis=0).values), axis=0)
    features = np.concatenate((features, frame.median(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmax(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmin(axis=0).values), axis=0)
    correlations = frame.corr()
    corr = np.array([correlations['accelerometer_X']['accelerometer_Y'], correlations['accelerometer_X']['accelerometer_Z'], correlations['accelerometer_Y']['accelerometer_Z']])
    features = np.concatenate((features, corr), axis=0)

    frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
    frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
    frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']
    
    mae_X = mean_absolute_error(frame['accelerometer_X'], frame['mean_X'])
    mae_Y = mean_absolute_error(frame['accelerometer_Y'], frame['mean_Y'])
    mae_Z = mean_absolute_error(frame['accelerometer_Z'], frame['mean_Z'])
    
    rmse_x = np.sqrt(mean_squared_error(frame['accelerometer_X'], frame['mean_X']))
    rmse_y = np.sqrt(mean_squared_error(frame['accelerometer_Y'], frame['mean_Y']))
    rmse_z = np.sqrt(mean_squared_error(frame['accelerometer_Z'], frame['mean_Z']))

    metrics = np.array([mae_X, mae_Y, mae_Z, rmse_x, rmse_y, rmse_z])
    features = np.concatenate((features, metrics), axis=0)

    return features

In [56]:
len(get_stat_features(frame))

39

## Discover calculation of features:

In [57]:
new_frame = frame

In [58]:
new_frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
new_frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
new_frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']

new_frame.head(3)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,mean_X,mean_Y,mean_Z
0,-2.269703,-2.782062,2.39899,0.864945,-9.36899,-3.801513
1,1.479617,-9.251195,-0.857124,0.864945,-9.36899,-3.801513
2,5.152323,-11.702858,-2.27928,0.864945,-9.36899,-3.801513


In [59]:
mean_absolute_error(frame['accelerometer_X'], new_frame['mean_X'])

2.1644329377777782

In [60]:
np.sqrt(mean_squared_error(frame['accelerometer_X'], new_frame['mean_X']))

2.7998626036369325

## Prepare one class:

In [61]:
def class_data_stat_prepare(class_name, class_number):
    path = join(mypath, class_name)
    X = []
    for item in listdir(path):
        frame = pd.read_csv(join(path, item))
        features = get_stat_features(frame)
        X.append(features)

    y = [class_number]*len(X)

    X = np.array(X)
    y = np.array(y)

    return X, y

## Create DataSet:

In [62]:
def create_dataset(class_prepare):
    X_idle, y_idle = class_prepare('idle', 0)
    X_walking, y_walking = class_prepare('walking', 1)
    X_stairs, y_stairs = class_prepare('stairs', 2)
    X_running, y_running = class_prepare('running', 3)

    X = np.concatenate((X_idle, X_walking), axis=0)
    X = np.concatenate((X, X_stairs), axis=0)
    X = np.concatenate((X, X_running), axis=0)

    Y = np.concatenate((y_idle, y_walking), axis=0)
    Y = np.concatenate((Y, y_stairs), axis=0)
    Y = np.concatenate((Y, y_running), axis=0)

    return X, Y

In [63]:
X, y = create_dataset(class_data_stat_prepare)

In [64]:
def save_data(prefix, X, y):
    with open(f'{prefix}_X.pickle', 'wb') as f:
        pickle.dump(X, f)
    with open(f'{prefix}_y.pickle', 'wb') as f:
        pickle.dump(y, f)

In [65]:
save_data('data', X, y)

In [66]:
def load_data(prefix):
    with open(f'{prefix}_X.pickle', 'rb') as f:
        X = pickle.load(f)
    with open(f'{prefix}_y.pickle', 'rb') as f:
        y = pickle.load(f)
    return X, y

In [67]:
X, y = load_data('data')

In [68]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.3)

## SVM Classifier VS RandomForest Classifier:

In [69]:
cls_ovo = svm.SVC(decision_function_shape='ovo', kernel='rbf', gamma=0.005, probability=True).fit(X_train, y_train)
cls_ovr = svm.SVC(decision_function_shape='ovr', kernel='rbf', gamma=0.005, probability=True).fit(X_train, y_train)

cls_forest = RandomForestClassifier().fit(X_train, y_train)


# test_ovo = cls_ovo.score(X_test, y_test)
# test_ovr = cls_ovr.score(X_test, y_test)

# test_r_forest = cls_forest.score(X_test, y_test)

In [70]:
svm_ovo_pred = cls_ovo.predict(X_test)
svm_ovr_pred = cls_ovr.predict(X_test)

forest_pred = cls_forest.predict(X_test)

In [71]:
svm_ovo_accuracy = accuracy_score(y_test, svm_ovo_pred)
svm_ovr_accuracy = accuracy_score(y_test, svm_ovr_pred)

forest_ovr_accuracy = accuracy_score(y_test, forest_pred)

print("accuracy SVM ovo: ", svm_ovo_accuracy)
print("accuracy SVM ovr: ", svm_ovr_accuracy)
print("accuracy RandomForest: ", forest_ovr_accuracy)

accuracy SVM ovo:  0.9060565870910698
accuracy SVM ovr:  0.9060565870910698
accuracy RandomForest:  0.9971264367816092


In [72]:
svm_ovo_report = classification_report(y_test, svm_ovo_pred)
print("SVM ovo report: ")
print(svm_ovo_report)

svm_ovr_report = classification_report(y_test, svm_ovr_pred)
print("SVM ovr report: ")
print(svm_ovr_report)

forest_report = classification_report(y_test, forest_pred)
print("RandomForest report: ")
print(forest_report)

SVM ovo report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.94       710
           1       0.98      0.79      0.87      1306
           2       1.00      0.38      0.55       109
           3       0.86      1.00      0.92      2399

    accuracy                           0.91      4524
   macro avg       0.96      0.76      0.82      4524
weighted avg       0.92      0.91      0.90      4524

SVM ovr report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.94       710
           1       0.98      0.79      0.87      1306
           2       1.00      0.38      0.55       109
           3       0.86      1.00      0.92      2399

    accuracy                           0.91      4524
   macro avg       0.96      0.76      0.82      4524
weighted avg       0.92      0.91      0.90      4524

RandomForest report: 
              precision    recall  f1-score   support

           0       