# Imports

In [1]:
import pandas as pd
import numpy as np
from sliding_window import SlidingWindow

# Globals

In [2]:
df = pd.read_csv('full_data.gz', compression='gzip')
df = df.drop(['Unnamed: 0'], axis=1).set_index('time')

In [3]:
num_experiments = 16
num_participants = 24
exclude = 10
analytic_functions_list = ['mean', 'sum', 'median', 'min', 'max', 'std']
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}

# Helper Functions

In [4]:
def data_allocation(df):
    # Define X,Y
    df = df.sample(frac=1).reset_index(drop=True)
    X, y = df.drop(["action"], axis=1), df["action"]
    y = y.replace(labels_dict)
    
    # Divide to training, validation and test set 70%, 10%, 20%
    num_training = int(df.shape[0] * 0.7)
    num_validation = int(df.shape[0] * 0.1)
    X_train, y_train = X[:num_training], y[:num_training]
    X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
    X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]
    
    return X_train, y_train, X_vald, y_vald, X_test, y_test

In [12]:
from sklearn.metrics import classification_report, confusion_matrix

def create_classes(labels_dict):
    classes_indexs = labels_dict.items()
    classes_indexs = sorted(classes_indexs, key=lambda x: x[1])
    classes_names = [label for label, index in classes_indexs]
    return classes_names

def evaluate_results(y_true, y_pred, classes):
        print("---- Printing classification report ----")
        print(classification_report(y_true, y_pred, target_names=classes))

# Model Evaluation

## Prepare Dataframe to Classify

In [6]:
window = SlidingWindow(df, 10, num_experiments, num_participants, exclude, analytic_functions_list)
sld_df = window.df
X_train, y_train, X_vald, y_vald, X_test, y_test = data_allocation(sld_df)
classes_names = create_classes(labels_dict)

## Random Forest Classifier

In [7]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, verbose=1)
rf.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed:   32.1s remaining:   21.4s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:   56.2s finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=1,
            warm_start=False)

### Evaluate Results

In [9]:
prediction = rf.predict(X_vald)
evaluate_results(y_vald, prediction, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.97      0.99      0.98     34357
        sit       1.00      1.00      1.00     33864
        std       1.00      1.00      1.00     30314
        ups       0.96      0.94      0.95     15779
        jog       0.99      0.98      0.98     13431
        dws       0.97      0.93      0.95     13181

avg / total       0.98      0.98      0.98    140926



[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.2s finished


In [13]:
prediction_test = rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.3s finished


---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.97      0.99      0.98     68665
        sit       1.00      1.00      1.00     67768
        std       1.00      1.00      1.00     61216
        ups       0.96      0.94      0.95     31422
        jog       0.98      0.98      0.98     26620
        dws       0.97      0.93      0.95     26163

avg / total       0.98      0.98      0.98    281854



## Logistic Regression Classifier

In [16]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', verbose=1, max_iter=300)
lr.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.4min finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=300, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=1, warm_start=False)

### Evaluate Results

In [17]:
lr_prediction = lr.predict(X_vald)
evaluate_results(y_vald, lr_prediction, classes_names)

---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.62      0.81      0.70     34360
        sit       0.99      0.98      0.98     33658
        std       0.97      0.98      0.97     30847
        ups       0.58      0.47      0.52     15720
        jog       0.85      0.83      0.84     13326
        dws       0.52      0.21      0.30     13015

avg / total       0.79      0.80      0.78    140926



## SVM Classifier

In [None]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train, y_train)

## Perform the same analysis over the history data set

In [18]:
hist_df = pd.read_pickle("history_10_encoded.pkl")

In [19]:
hist_df.head(5)

Unnamed: 0,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,...,prev_10_attitude.yaw,prev_10_gravity.x,prev_10_gravity.y,prev_10_gravity.z,prev_10_rotationRate.x,prev_10_rotationRate.y,prev_10_rotationRate.z,prev_10_userAcceleration.x,prev_10_userAcceleration.y,prev_10_userAcceleration.z
0,1.434186,-0.693527,0.616938,0.761832,0.639253,-0.104726,0.865711,0.179274,-0.130432,-0.0176,...,0.696372,0.741895,0.669768,-0.031672,0.316738,0.77818,1.082764,0.294894,-0.184493,0.377542
1,1.412539,-0.69193,0.59224,0.760394,0.638024,-0.121353,1.010791,-0.662185,-0.017245,-0.090762,...,0.677762,0.753099,0.657116,-0.032255,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866
2,1.366432,-0.686835,0.562806,0.757165,0.634093,-0.156928,1.233366,-1.834122,0.089429,-0.022256,...,0.670951,0.759611,0.649555,-0.032707,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808
3,1.298239,-0.676703,0.533675,0.750862,0.626226,-0.209876,1.183222,-2.790427,0.32104,-0.005088,...,0.675735,0.760709,0.647788,-0.04114,-0.025005,-1.048717,0.03586,-0.008389,0.136788,0.094958
4,1.222763,-0.660279,0.51297,0.742468,0.613337,-0.269368,1.145214,-3.434572,0.536324,0.058954,...,0.672994,0.760062,0.64721,-0.05853,0.114253,-0.91289,0.047341,0.199441,0.353996,-0.044299


In [20]:
hist_df = hist_df.sample(frac=1).reset_index(drop=True)
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}
X, y = hist_df.drop(["action"], axis=1), hist_df["action"]
y = y.replace(labels_dict)

In [21]:
num_training = int(hist_df.shape[0] * 0.7)
num_validation = int(hist_df.shape[0] * 0.1)
X_train, y_train = X[:num_training], y[:num_training]
X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]

In [None]:
from sklearn.ensemble import RandomForestClassifier
hist_rf = RandomForestClassifier(n_jobs=-1, verbose=1)
hist_rf.fit(X_train, y_train)

In [None]:
prediction = hist_rf.predict(X_vald)
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, prediction, classes_names)

In [None]:
prediction_test = hist_rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

Try also here linear model

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', verbose=1, max_iter=300)
lr.fit(X_train, y_train)

In [None]:
lr_prediction = lr.predict(X_vald)
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, lr_prediction, classes_names)

# Window Size Performance
### Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, verbose=1)

In [15]:
sizes = [i for i in range(2, 20, 2)]
for size in sizes:
    window = SlidingWindow(df, size, num_experiments, num_participants, exclude, analytic_functions_list)
    sld_df = window.df
    X_train, y_train, X_vald, y_vald, X_test, y_test = data_allocation(sld_df)
    rf.fit(X_train, y_train)
    
    prediction = rf.predict(X_vald)
    evaluate_results(y_vald, prediction, classes_names)

[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed:   41.2s remaining:   27.5s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.1min finished
[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.2s finished


---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.85      0.95      0.90     34473
        sit       1.00      1.00      1.00     33922
        std       0.99      0.99      0.99     30587
        ups       0.83      0.75      0.79     15756
        jog       0.90      0.89      0.90     13507
        dws       0.82      0.67      0.74     12969

avg / total       0.92      0.92      0.92    141214



[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed:   38.4s remaining:   25.6s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.0min finished
[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.2s finished


---- Printing classification report ----
             precision    recall  f1-score   support

        wlk       0.91      0.97      0.94     34411
        sit       1.00      1.00      1.00     34130
        std       1.00      0.99      1.00     30635
        ups       0.89      0.84      0.86     15435
        jog       0.95      0.94      0.94     13471
        dws       0.89      0.79      0.83     13060

avg / total       0.95      0.95      0.95    141142



KeyboardInterrupt: 