In [1]:
import pandas as pd
import numpy as np

In [2]:
sld_df = pd.read_pickle("sliding_window_df.pkl")

In [3]:
sld_df.head(5)

Unnamed: 0,attitude.roll_sld_mean,attitude.pitch_sld_mean,attitude.yaw_sld_mean,gravity.x_sld_mean,gravity.y_sld_mean,gravity.z_sld_mean,rotationRate.x_sld_mean,rotationRate.y_sld_mean,rotationRate.z_sld_mean,userAcceleration.x_sld_mean,...,gravity.x_sld_std,gravity.y_sld_std,gravity.z_sld_std,rotationRate.x_sld_std,rotationRate.y_sld_std,rotationRate.z_sld_std,userAcceleration.x_sld_std,userAcceleration.y_sld_std,userAcceleration.z_sld_std,action
0,1.476032,-0.699698,0.659227,0.761074,0.643965,-0.072516,0.327435,-0.23759,0.125294,0.089179,...,0.003243,0.006475,0.029224,0.346436,0.590791,0.249107,0.083854,0.128267,0.114783,dws
1,1.464487,-0.697192,0.650675,0.761804,0.642056,-0.081426,0.344311,-0.346253,0.059212,0.058162,...,0.001706,0.004752,0.029167,0.377046,0.554298,0.172086,0.087612,0.140744,0.099833,dws
2,1.448353,-0.695176,0.63986,0.761559,0.64051,-0.093848,0.481461,-0.525592,0.033799,0.054865,...,0.002168,0.004552,0.032387,0.428049,0.712118,0.14147,0.090179,0.146393,0.097535,dws
3,1.4265,-0.692378,0.625654,0.760575,0.638354,-0.110722,0.602284,-0.699763,0.062317,0.055195,...,0.004032,0.005687,0.043814,0.439572,1.00645,0.168158,0.089927,0.148507,0.10431,dws
4,1.399383,-0.688014,0.609652,0.758815,0.634966,-0.131806,0.70538,-0.951931,0.111215,0.041147,...,0.007015,0.008968,0.062607,0.433246,1.32976,0.224856,0.074548,0.091135,0.131904,dws


Shuffle the dataset and divide features and label

In [4]:
sld_df = sld_df.sample(frac=1).reset_index(drop=True)
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}
X, y = sld_df.drop(["action"], axis=1), sld_df["action"]
y = y.replace(labels_dict)

Divide to training, validation and test set 70%, 10%, 20%

In [5]:
num_training = int(sld_df.shape[0] * 0.7)
num_validation = int(sld_df.shape[0] * 0.1)
X_train, y_train = X[:num_training], y[:num_training]
X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]

Helper functions for evaluating the results

In [13]:
from sklearn.metrics import classification_report, confusion_matrix

def create_classes(labels_dict):
    classes_indexs = labels_dict.items()
    classes_indexs.sort(key=lambda x: x[1])
    classes_names = [label for label, index in classes_indexs]
    return classes_names

def evaluate_results(y_true, y_pred, classes):
        print "---- Printing classification report ----"
        print classification_report(y_true, y_pred, target_names=classes)

Random Forest classifier

In [15]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, verbose=1)
rf.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.5min finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=1,
            warm_start=False)

In [16]:
prediction = rf.predict(X_vald)

[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.2s finished


In [20]:
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, prediction, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.97      0.99      0.98     34190
        sit       1.00      1.00      1.00     33767
        std       1.00      1.00      1.00     30563
        ups       0.96      0.94      0.95     15691
        jog       0.99      0.98      0.98     13420
        dws       0.97      0.93      0.95     13295

avg / total       0.98      0.98      0.98    140926



In [21]:
prediction_test = rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.96      0.99      0.98     68981
        sit       1.00      1.00      1.00     67904
        std       1.00      1.00      1.00     61082
        ups       0.96      0.94      0.95     31109
        jog       0.99      0.98      0.98     26571
        dws       0.96      0.92      0.94     26207

avg / total       0.98      0.98      0.98    281854



[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.4s finished


## Perform the same analysis over the history data set

In [23]:
hist_df = pd.read_pickle("history_10_encoded.pkl")

In [24]:
hist_df.head(5)

Unnamed: 0,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,...,prev_10_attitude.yaw,prev_10_gravity.x,prev_10_gravity.y,prev_10_gravity.z,prev_10_rotationRate.x,prev_10_rotationRate.y,prev_10_rotationRate.z,prev_10_userAcceleration.x,prev_10_userAcceleration.y,prev_10_userAcceleration.z
0,1.434186,-0.693527,0.616938,0.761832,0.639253,-0.104726,0.865711,0.179274,-0.130432,-0.0176,...,0.696372,0.741895,0.669768,-0.031672,0.316738,0.77818,1.082764,0.294894,-0.184493,0.377542
1,1.412539,-0.69193,0.59224,0.760394,0.638024,-0.121353,1.010791,-0.662185,-0.017245,-0.090762,...,0.677762,0.753099,0.657116,-0.032255,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866
2,1.366432,-0.686835,0.562806,0.757165,0.634093,-0.156928,1.233366,-1.834122,0.089429,-0.022256,...,0.670951,0.759611,0.649555,-0.032707,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808
3,1.298239,-0.676703,0.533675,0.750862,0.626226,-0.209876,1.183222,-2.790427,0.32104,-0.005088,...,0.675735,0.760709,0.647788,-0.04114,-0.025005,-1.048717,0.03586,-0.008389,0.136788,0.094958
4,1.222763,-0.660279,0.51297,0.742468,0.613337,-0.269368,1.145214,-3.434572,0.536324,0.058954,...,0.672994,0.760062,0.64721,-0.05853,0.114253,-0.91289,0.047341,0.199441,0.353996,-0.044299


In [26]:
hist_df = hist_df.sample(frac=1).reset_index(drop=True)
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}
X, y = hist_df.drop(["action"], axis=1), hist_df["action"]
y = y.replace(labels_dict)

In [29]:
num_training = int(hist_df.shape[0] * 0.7)
num_validation = int(hist_df.shape[0] * 0.1)
X_train, y_train = X[:num_training], y[:num_training]
X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]

In [31]:
from sklearn.ensemble import RandomForestClassifier
hist_rf = RandomForestClassifier(n_jobs=-1, verbose=1)
hist_rf.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.6min finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=1,
            warm_start=False)

In [32]:
prediction = hist_rf.predict(X_vald)
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, prediction, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.93      0.98      0.96     34429
        sit       1.00      1.00      1.00     33690
        std       1.00      1.00      1.00     30665
        ups       0.91      0.89      0.90     15585
        jog       0.98      0.97      0.97     13339
        dws       0.93      0.85      0.88     13218

avg / total       0.96      0.96      0.96    140926



[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.2s finished


In [33]:
prediction_test = hist_rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.93      0.98      0.96     68732
        sit       1.00      1.00      1.00     67799
        std       1.00      1.00      1.00     60990
        ups       0.92      0.88      0.90     31339
        jog       0.98      0.97      0.97     26829
        dws       0.93      0.85      0.88     26165

avg / total       0.96      0.96      0.96    281854



[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.5s finished
