# Modelling - Vanilla Algorithms Decision Trees
Author: P.C.O. Maseland <br>
Date: 03 december 2021 <br>

In [14]:
# !!! RUN !!!
#Import packages/libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import time

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

#Decision Tree packages
from sklearn.tree import DecisionTreeClassifier

%matplotlib inline

### Load Correct datafile

**prepared accelerometer, gyroscope and magnetometer csv**

In [2]:
# !!! RUN !!!
#local jupyter notebook path
path ="Totalset+slopes+mean+sd.csv"
df = pd.read_csv(path)

In [3]:
# !!! RUN !!!
#reorder such that labels are at the back
df = df[[c for c in df if c not in ['Subject ID','Activity']] 
       + ['Subject ID','Activity']]

**Activity coding**
- 'climbingdown' {0} 
- 'climbingup' {1} 
- 'jumping' {2} 
- 'lying' {3} 
- 'running' {4} 
- 'sitting' {5}
- 'standing' {6} 
- 'walking' {7}]

### Remove/Convert statistical NaN values
Since we calculated our rolled statistics over a nr of timesteps. The first nr of timesteps have a NaN, these are therefore useless. On the whole set we removed thereby 24336 rows, which is just a small fraction of 0.4% of the whole.

In [4]:
# !!! RUN !!!
df = df. dropna()

### Full set copy
such that the original dataset will not be adjusted

In [5]:
# !!! RUN !!!
#create copy full set
fullset = df
print(fullset.shape)

#drop subject ID
fullset_without_subject = fullset.drop(columns=['Subject ID'])

(6059664, 86)


### Make new subset
With only time-base statistical values

In [6]:
# !!! RUN !!!
#only consider mean and sd features
statistical_cols = [col for col in fullset_without_subject.columns if ('mean' in col 
                                                                      or 'sd' in col
                                                                      or 'Activity' in col)]
new_subset = fullset_without_subject[statistical_cols]
new_subset.shape

(6059664, 43)

In [13]:
# !!! RUN !!! --> REAL SPLITT DATA
X = new_subset.iloc[:,:-1]
y = new_subset['Activity']
X_sub1, X_sub2, y_sub1, y_sub2 = train_test_split(X, y, test_size=0.3, random_state=40, stratify=y)

In [14]:
print(X_sub1.shape, X_sub2.shape, y_sub1.shape, y_sub2.shape)

(4241764, 42) (1817900, 42) (4241764,) (1817900,)


### Gridsearch for Decision Tree 
on subset for computational advantage

In [18]:
#define train, val, test sets -> on smaller subset sub2
X = X_sub2
y = y_sub2
print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=40, stratify=y)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1817900, 42) (1817900,)
(1636110, 42) (181790, 42) (1636110,) (181790,)


In [19]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=40, stratify=y_train)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(1472499, 42) (163611, 42) (1472499,) (163611,)


In [22]:
classifier = DecisionTreeClassifier(random_state=42)

In [21]:
parameters=[{'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : [5, 6, 7, 8, 9],
              'criterion' :['gini', 'entropy']
             }] 

In [27]:
grid_search = GridSearchCV(estimator=classifier, param_grid=parameters, cv=3, verbose=4)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 90 candidates, totalling 270 fits
[CV 1/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=auto; total time=   5.1s
[CV 2/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=auto; total time=   5.3s
[CV 3/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=auto; total time=   5.3s
[CV 1/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=sqrt; total time=   5.1s
[CV 2/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=sqrt; total time=   5.2s
[CV 3/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=sqrt; total time=   5.2s
[CV 1/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=log2; total time=   4.6s
[CV 2/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=log2; total time=   4.6s
[CV 3/3] END ccp_alpha=0.1, criterion=gini, max_depth=5, max_features=log2; total time=   4.5s
[CV 1/3] END ccp_alpha=0.1, criterion=gini, max_depth=6, max_features=auto; total t

[CV 2/3] END ccp_alpha=0.1, criterion=entropy, max_depth=9, max_features=sqrt; total time=  16.4s
[CV 3/3] END ccp_alpha=0.1, criterion=entropy, max_depth=9, max_features=sqrt; total time=  16.6s
[CV 1/3] END ccp_alpha=0.1, criterion=entropy, max_depth=9, max_features=log2; total time=  14.1s
[CV 2/3] END ccp_alpha=0.1, criterion=entropy, max_depth=9, max_features=log2; total time=  14.3s
[CV 3/3] END ccp_alpha=0.1, criterion=entropy, max_depth=9, max_features=log2; total time=  13.8s
[CV 1/3] END ccp_alpha=0.01, criterion=gini, max_depth=5, max_features=auto; total time=   5.1s
[CV 2/3] END ccp_alpha=0.01, criterion=gini, max_depth=5, max_features=auto; total time=   5.1s
[CV 3/3] END ccp_alpha=0.01, criterion=gini, max_depth=5, max_features=auto; total time=   5.1s
[CV 1/3] END ccp_alpha=0.01, criterion=gini, max_depth=5, max_features=sqrt; total time=   5.1s
[CV 2/3] END ccp_alpha=0.01, criterion=gini, max_depth=5, max_features=sqrt; total time=   5.2s
[CV 3/3] END ccp_alpha=0.01, c

[CV 3/3] END ccp_alpha=0.01, criterion=entropy, max_depth=8, max_features=log2; total time=  12.7s
[CV 1/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=auto; total time=  16.5s
[CV 2/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=auto; total time=  16.3s
[CV 3/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=auto; total time=  16.5s
[CV 1/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=sqrt; total time=  16.6s
[CV 2/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=sqrt; total time=  16.2s
[CV 3/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=sqrt; total time=  16.6s
[CV 1/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=log2; total time=  14.2s
[CV 2/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=log2; total time=  14.3s
[CV 3/3] END ccp_alpha=0.01, criterion=entropy, max_depth=9, max_features=log2; total time=  14.1s
[CV 1/3] E

[CV 3/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=auto; total time=  15.2s
[CV 1/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=sqrt; total time=  15.6s
[CV 2/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=sqrt; total time=  15.4s
[CV 3/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=sqrt; total time=  14.8s
[CV 1/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=log2; total time=  12.8s
[CV 2/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=log2; total time=  12.7s
[CV 3/3] END ccp_alpha=0.001, criterion=entropy, max_depth=8, max_features=log2; total time=  13.2s
[CV 1/3] END ccp_alpha=0.001, criterion=entropy, max_depth=9, max_features=auto; total time=  16.6s
[CV 2/3] END ccp_alpha=0.001, criterion=entropy, max_depth=9, max_features=auto; total time=  16.6s
[CV 3/3] END ccp_alpha=0.001, criterion=entropy, max_depth=9, max_features=auto; total time=  16.6s


GridSearchCV(cv=3, estimator=DecisionTreeClassifier(random_state=42),
             param_grid=[{'ccp_alpha': [0.1, 0.01, 0.001],
                          'criterion': ['gini', 'entropy'],
                          'max_depth': [5, 6, 7, 8, 9],
                          'max_features': ['auto', 'sqrt', 'log2']}],
             verbose=4)

In [40]:
best_grid = grid_search.best_estimator_
best_grid

DecisionTreeClassifier(ccp_alpha=0.001, criterion='entropy', max_depth=9,
                       max_features='auto', random_state=42)

In [46]:
#checking how this best_grid predicts on sub2 testset
y_pred = best_grid.predict(X_test)

In [43]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 3911   535   140     8  1660    42    91 11690]
 [ 1064  4802    10   140   270   185   547 13659]
 [  341    31  1033     0  1437     1     3   262]
 [    6    62     0 25769    51   683   898   140]
 [  778   153   366    49 23926   380  2869  1150]
 [    7   138     0   419    40 22813  4062    70]
 [    5   298     0   466   223  1731 24374   184]
 [ 1903   269    33     3   463    12   133 25002]]
              precision    recall  f1-score   support

           0       0.49      0.22      0.30     18077
           1       0.76      0.23      0.36     20677
           2       0.65      0.33      0.44      3108
           3       0.96      0.93      0.95     27609
           4       0.85      0.81      0.83     29671
           5       0.88      0.83      0.85     27549
           6       0.74      0.89      0.81     27281
           7       0.48      0.90      0.63     27818

    accuracy                           0.72    181790
   macro avg       0.73      0.64      0.65    18

# Model set up - Decision tree modelling

#### best grid

In [29]:
#!!! RUN !!!
#best model
best_grid = DecisionTreeClassifier(ccp_alpha=0.001, 
                                   criterion='entropy', 
                                   max_depth=9,
                                   max_features='auto',
                                   class_weight='balanced',
                                   random_state=42)

### Automated Functions

In [7]:
# !!! RUN !!!
#functions that generates a subset given the filter input
def dataset_prep(dataset, filters):
    filterscolls = [col for col in dataset.columns if (filters in col or 'Activity' in col)]
    outputname = dataset[filterscolls]
    
    return outputname

In [30]:
# !!! RUN !!!
#function decisiontree model, prints confusionmatrix
def decisiontree(gridmodel, dataset):
    start = time.time()
    X = dataset.iloc[:,:-1]
    y = dataset['Activity']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40, stratify=y)
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)
    
    #model
    model = gridmodel
    
    #fit model
    model.fit(X_train, y_train)
    
    y_pred_test = model.predict(X_test)
    #y_pred_rnd = np.around(y_pred_test, 0)
    
    # Calculate the absolute errors
    errors = abs(y_pred_test - y_test)

    # Print out the mean absolute error (MAE)
    print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
    
    #Evaluating Matrix
    print(confusion_matrix(y_test, y_pred_test))
    print(classification_report(y_test, y_pred_test, zero_division=0))
    end = time.time()
    print ("Time elapsed:", end - start)
    
    return model, X_train
    

In [9]:
#function to derive importance of features ()
def decisiontree_importance(model, X_train):
    # get importance
    importance = model.coef_

    from matplotlib import pyplot
    avg_importance = sum(importance)/len(importance)
    pyplot.bar([x for x in range(len(avg_importance))], avg_importance)
    pyplot.show()

    print(X_train.columns)

## Single Sensor position - Subq 1
Slope score low on feature importance score

Positions: <br>
Head <br>
Upperarm <br>
Forearm <br>
Chest <br>
Waist <br>
Thigh <br>
Shin <br>

Different Sensors: <br>
acc (mean, sd) <br>
gyr (mean, sd) <br>
mag (mean, sd) <br>
acc + mag (mean, sd) <br>
acc + gyr (mean, sd) <br>
gyr + mag (mean, sd) <br>
acc + gyr + mag (mean, sd) <br>

#### Single sensor HEAD

In [37]:
#generate dataset samples
head_acc_subset = dataset_prep(new_subset, 'head_acc')
head_gyr_subset = dataset_prep(new_subset, 'head_gyr')
head_mag_subset = dataset_prep(new_subset, 'head_mag')
filterscolls1 = [col for col in new_subset.columns if ('head_acc' in col 
                                                   or 'head_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('head_acc' in col 
                                                   or 'head_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('head_mag' in col 
                                                   or 'head_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('head_mag' in col
                                                      or 'head_gyr' in col
                                                      or 'head_acc' in col
                                                      or 'Activity' in col)]

head_accgyr_subset = new_subset[filterscolls1] 
head_accmag_subset = new_subset[filterscolls2]  
head_maggyr_subset = new_subset[filterscolls3]
head_accmaggyr_subset = new_subset[filterscolls4]

In [52]:
#Single Head - acc
head_acc, head_acc_X = decisiontree(best_grid, head_acc_subset)

#Single Head - gyr
head_gyr, head_gyr_X = decisiontree(best_grid, head_gyr_subset)

#Single Head - mag
head_mag, head_mag_X = decisiontree(best_grid, head_mag_subset)

#Single Head - acc + gyr
head_accgyr, head_accgyr_X = decisiontree(best_grid, head_accgyr_subset)

#Single Head - acc + mag
head_accmag, head_accmag_X = decisiontree(best_grid, head_accmag_subset)

#Single Head - gyr + mag
head_maggyr, head_maggyr_X = decisiontree(best_grid, head_maggyr_subset)

#Single Head - acc + gyr + mag
head_accmaggyr, head_accmaggyr_X = decisiontree(best_grid, head_accmaggyr_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.77 degrees.
[[ 13896   4006    107     81  15694    581   3219  82932]
 [  7439   9253     88   2128   3048   3965   9943 101982]
 [    61      1   2172      0  18169      2     25    287]
 [    37    309      0 138038     36  25455  18353   1833]
 [  1921    925   1651   3834 149290  11815  13629  14741]
 [     4   1573      0  58856      6  73322  45544   4357]
 [     0   2726      0  39145      3  73126  60675   6195]
 [  9050   5397    329     95  11065    782   5185 153552]]
              precision    recall  f1-score   support

           0       0.43      0.12      0.18    120516
           1       0.38      0.07      0.11    137846
           2       0.50      0.10      0.17     20717
           3       0.57      0.75      0.65    184061
           4       0.76      0.75      0.76    197806
           5       0.39      0.40      0.39    183662
           6       0.39      0.33      0.36    181870
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00    120516
           1       0.30      0.22      0.25    137846
           2       0.00      0.00      0.00     20717
           3       0.69      0.72      0.71    184061
           4       0.48      0.63      0.54    197806
           5       0.46      0.37      0.41    183662
           6       0.34      0.31      0.33    181870
           7       0.33      0.61      0.43    185455

    accuracy                           0.43   1211933
   macro avg       0.32      0.36      0.33   1211933
weighted avg       0.39      0.43      0.40   1211933

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.84 degrees.
[[  7225   7899      0   8471  52412   7910   8161  28438]
 [  3110  19661      0  12613  62734   7041   5260  27427]
 [   569    205      0   3148   9143   1431   3052   3169]
 [  2844   3763      0 135647  20897   5411   6732   8767]
 [   879   1260      0  15507 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.30      0.06      0.10    120516
           1       0.45      0.14      0.22    137846
           2       0.00      0.00      0.00     20717
           3       0.50      0.74      0.59    184061
           4       0.26      0.58      0.36    197806
           5       0.49      0.29      0.37    183662
           6       0.36      0.17      0.23    181870
           7       0.26      0.34      0.30    185455

    accuracy                           0.35   1211933
   macro avg       0.33      0.29      0.27   1211933
weighted avg       0.37      0.35      0.32   1211933

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.67 degrees.
[[ 21107  18182      0     26  14247    584   2917  63453]
 [ 11995  33831      0    874   1986   3186   9627  76347]
 [    99     11    242      0  19472     23     10    860]
 [   144   2011      0 134904     35  12040  33844   1083]
 [  7877   5419     13   1351 

#### Single sensor Forearm

In [61]:
#create subsets for forearm
forearm_acc_subset = dataset_prep(new_subset, 'forearm_acc')
forearm_gyr_subset = dataset_prep(new_subset, 'forearm_gyr')
forearm_mag_subset = dataset_prep(new_subset, 'forearm_mag')
filterscolls1 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                   or 'forearm_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                   or 'forearm_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_mag' in col 
                                                   or 'forearm_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('forearm_mag' in col
                                                      or 'forearm_gyr' in col
                                                      or 'forearm_acc' in col
                                                      or 'Activity' in col)]
forearm_accgyr_subset = new_subset[filterscolls1] 
forearm_accmag_subset = new_subset[filterscolls2]
forearm_maggyr_subset = new_subset[filterscolls3]
forearm_accmaggyr_subset = new_subset[filterscolls4] 

In [62]:
#run all models for single sensor
forearm_acc, forearm_acc_X = decisiontree(best_grid, forearm_acc_subset)
forearm_gyr, forearm_gyr_X = decisiontree(best_grid, forearm_gyr_subset)
forearm_mag, forearm_mag_X = decisiontree(best_grid, forearm_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.89 degrees.
[[ 24784   3324   1011    320  19514    140   1934  69489]
 [ 12491   9530    118   3241   4775   2340   7544  97807]
 [   994    313   3891      0  14953      0      1    565]
 [   183   1298      0 124635    133  16278  38722   2812]
 [ 16861   3401   1751   5247 116874   3429  15899  34344]
 [   520   4496     14  54823    619  73476  41017   8697]
 [   570   7370     39  49851    611  22938  90123  10368]
 [ 17980   8113    358    167   7768    248   3673 147148]]
              precision    recall  f1-score   support

           0       0.33      0.21      0.25    120516
           1       0.25      0.07      0.11    137846
           2       0.54      0.19      0.28     20717
           3       0.52      0.68      0.59    184061
           4       0.71      0.59      0.64    197806
           5       0.62      0.40      0.49    183662
           6       0.45      0.50      0.47    181870
           

In [63]:
#run all models for combination sensor
forearm_accgyr, forearm_accgyr_X = decisiontree(best_grid, forearm_accgyr_subset)
forearm_accmag, forearm_accmag_X = decisiontree(best_grid, forearm_accmag_subset)
forearm_maggyr, forearm_maggyr_X = decisiontree(best_grid, forearm_maggyr_subset)
forearm_accmaggyr, forearm_accmaggyr_X = decisiontree(best_grid, forearm_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.71 degrees.
[[ 19479   5377   1201    178  24879     90   1711  67601]
 [  7018  15443    108   1416   8259   2448   7952  95202]
 [  1287      1   5625      0  13037      0    159    608]
 [    64    828      0 136847    145  29410  15520   1247]
 [ 12152   2972   3025   2133 127022   3980  21664  24858]
 [   134   3730      2  47415    618  83475  45170   3118]
 [   188   6082     14  25823    714  30395 114482   4172]
 [  9341  13430    204     84  11083     46   2934 148333]]
              precision    recall  f1-score   support

           0       0.39      0.16      0.23    120516
           1       0.32      0.11      0.17    137846
           2       0.55      0.27      0.36     20717
           3       0.64      0.74      0.69    184061
           4       0.68      0.64      0.66    197806
           5       0.56      0.45      0.50    183662
           6       0.55      0.63      0.58    181870
           

#### Single sensor Waist

In [64]:
#create subsets for Waist
Waist_acc_subset = dataset_prep(new_subset, 'waist_acc')
Waist_gyr_subset = dataset_prep(new_subset, 'waist_gyr')
Waist_mag_subset = dataset_prep(new_subset, 'waist_mag')
filterscolls1 = [col for col in new_subset.columns if ('waist_acc' in col 
                                                   or 'waist_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('waist_acc' in col 
                                                   or 'waist_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('waist_mag' in col 
                                                   or 'waist_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('waist_mag' in col
                                                      or 'waist_gyr' in col
                                                      or 'waist_acc' in col
                                                      or 'Activity' in col)]
Waist_accgyr_subset = new_subset[filterscolls1] 
Waist_accmag_subset = new_subset[filterscolls2]
Waist_maggyr_subset = new_subset[filterscolls3]
Waist_accmaggyr_subset = new_subset[filterscolls4] 

In [65]:
#run all models for single sensor
Waist_acc, Waist_acc_X = decisiontree(best_grid, Waist_acc_subset)
Waist_gyr, Waist_gyr_X = decisiontree(best_grid, Waist_gyr_subset)
Waist_mag, Waist_mag_X = decisiontree(best_grid, Waist_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.57 degrees.
[[     0  29899      0    210  32254     83   1916  56154]
 [     0  52980      0   2320   8009     95   8183  66259]
 [     0    271    363      2  18046      0    265   1770]
 [     0   2011      0 145347     16  21543  14386    758]
 [     0   4425     53   1960 131800   2293  25075  32200]
 [     0   1044      0  28190      6  66068  87885    469]
 [     0   4352      0   9468     15  19743 146984   1308]
 [     0  45556      0    236  21872     28   3706 114057]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00    120516
           1       0.38      0.38      0.38    137846
           2       0.87      0.02      0.03     20717
           3       0.77      0.79      0.78    184061
           4       0.62      0.67      0.64    197806
           5       0.60      0.36      0.45    183662
           6       0.51      0.81      0.63    181870
           

In [66]:
#run all models for combination sensor
Waist_accgyr, Waist_accgyr_X = decisiontree(best_grid, Waist_accgyr_subset)
Waist_accmag, Waist_accmag_X = decisiontree(best_grid, Waist_accmag_subset)
Waist_maggyr, Waist_maggyr_X = decisiontree(best_grid, Waist_maggyr_subset)
Waist_accmaggyr, Waist_accmaggyr_X = decisiontree(best_grid, Waist_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.5 degrees.
[[  2240  15694    269     69  30329     44    806  71065]
 [   646  34624    112   1131  10648    197   3795  86693]
 [   797    407    492      0  14165      0     21   4835]
 [     0   2126      0 155618    121   8806  16033   1357]
 [  1636   3442    202   2300 153610   1298  24671  10647]
 [     0   1526      0  33886     32  67468  79827    923]
 [     2   3356      0  10833     13  16346 147310   4010]
 [  1395  16403    132     50  12242     29   2564 152640]]
              precision    recall  f1-score   support

           0       0.33      0.02      0.04    120516
           1       0.45      0.25      0.32    137846
           2       0.41      0.02      0.04     20717
           3       0.76      0.85      0.80    184061
           4       0.69      0.78      0.73    197806
           5       0.72      0.37      0.49    183662
           6       0.54      0.81      0.64    181870
           7

#### Single sensor Thigh

In [67]:
#create subsets for Thigh
Thigh_acc_subset = dataset_prep(new_subset, 'thigh_acc')
Thigh_gyr_subset = dataset_prep(new_subset, 'thigh_gyr')
Thigh_mag_subset = dataset_prep(new_subset, 'thigh_mag')
filterscolls1 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                   or 'thigh_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                   or 'thigh_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('thigh_mag' in col 
                                                   or 'thigh_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('thigh_mag' in col
                                                      or 'thigh_gyr' in col
                                                      or 'thigh_acc' in col
                                                      or 'Activity' in col)]
Thigh_accgyr_subset = new_subset[filterscolls1] 
Thigh_accmag_subset = new_subset[filterscolls2]
Thigh_maggyr_subset = new_subset[filterscolls3]
Thigh_accmaggyr_subset = new_subset[filterscolls4] 

In [68]:
# run all models for single sensor
Thigh_acc, Thigh_acc_X = decisiontree(best_grid, Thigh_acc_subset)
Thigh_gyr, Thigh_gyr_X = decisiontree(best_grid, Thigh_gyr_subset)
Thigh_mag, Thigh_mag_X = decisiontree(best_grid, Thigh_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.79 degrees.
[[  5646  14326    167    129  40516    174   1293  58265]
 [  3288  26203     71   4072  19935    711   8636  74930]
 [   164    331    827      3  13733     66     24   5569]
 [    10    818      0 114051     52  28423  39907    800]
 [  4477   2999    153   1922 134564   1216  26045  26430]
 [     7   1611      0  52571     49  88818  39590   1016]
 [     8   2882      0  36717     69   1725 138835   1634]
 [  5361  19988    355     84  54246    358   2084 102979]]
              precision    recall  f1-score   support

           0       0.30      0.05      0.08    120516
           1       0.38      0.19      0.25    137846
           2       0.53      0.04      0.07     20717
           3       0.54      0.62      0.58    184061
           4       0.51      0.68      0.58    197806
           5       0.73      0.48      0.58    183662
           6       0.54      0.76      0.63    181870
           

In [69]:
#run all models for combination sensor
Thigh_accgyr, Thigh_accgyr_X = decisiontree(best_grid, Thigh_accgyr_subset)
Thigh_accmag, Thigh_accmag_X = decisiontree(best_grid, Thigh_accmag_subset)
Thigh_maggyr, Thigh_maggyr_X = decisiontree(best_grid, Thigh_maggyr_subset)
Thigh_accmaggyr, Thigh_accmaggyr_X = decisiontree(best_grid, Thigh_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.53 degrees.
[[  4490  27017   1085     95  29236    176    849  57568]
 [  3225  54972    612   3679  10870    964   5228  58296]
 [    72    336   4457      3   8037     59     48   7705]
 [    33   1290      1 143425     33  22150  16916    213]
 [   218   3850    519    217 131553   3680  25164  32605]
 [    30   1685      0  34892     12 120547  26297    199]
 [    65   4771      1   3671     38  16031 156613    680]
 [  2535  43833    861      8  37272     97   1001  99848]]
              precision    recall  f1-score   support

           0       0.42      0.04      0.07    120516
           1       0.40      0.40      0.40    137846
           2       0.59      0.22      0.32     20717
           3       0.77      0.78      0.78    184061
           4       0.61      0.67      0.63    197806
           5       0.74      0.66      0.69    183662
           6       0.67      0.86      0.76    181870
           

#### Single sensor Upperarm

In [70]:
#create subsets for upperarm
upperarm_acc_subset = dataset_prep(new_subset, 'upperarm_acc')
upperarm_gyr_subset = dataset_prep(new_subset, 'upperarm_gyr')
upperarm_mag_subset = dataset_prep(new_subset, 'upperarm_mag')
filterscolls1 = [col for col in new_subset.columns if ('upperarm_acc' in col 
                                                   or 'upperarm_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('upperarm_acc' in col 
                                                   or 'upperarm_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('upperarm_mag' in col 
                                                   or 'upperarm_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('upperarm_mag' in col
                                                      or 'upperarm_gyr' in col
                                                      or 'upperarm_acc' in col
                                                      or 'Activity' in col)]
upperarm_accgyr_subset = new_subset[filterscolls1] 
upperarm_accmag_subset = new_subset[filterscolls2]
upperarm_maggyr_subset = new_subset[filterscolls3]
upperarm_accmaggyr_subset = new_subset[filterscolls4] 

In [71]:
#run all models for single sensor
upperarm_acc, upperarm_acc_X = decisiontree(best_grid, upperarm_acc_subset)
upperarm_gyr, upperarm_gyr_X = decisiontree(best_grid, upperarm_gyr_subset)
upperarm_mag, upperarm_mag_X = decisiontree(best_grid, upperarm_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.63 degrees.
[[ 23730   4247     62    238  25024    233   1609  65373]
 [ 13594  13656     27   1826   6228   2808   8713  90994]
 [   206    222   3855      0  14995      0     49   1390]
 [    51   1233      0 168296    100   7983   4036   2362]
 [ 10004   3796    288   1313 126150   2098  23404  30753]
 [    56   6569      0  24548    106  37360 110834   4189]
 [    42   7151      0   1341     63   7390 161072   4811]
 [ 18878  11473     91    502  13915    441   2919 137236]]
              precision    recall  f1-score   support

           0       0.36      0.20      0.25    120516
           1       0.28      0.10      0.15    137846
           2       0.89      0.19      0.31     20717
           3       0.85      0.91      0.88    184061
           4       0.68      0.64      0.66    197806
           5       0.64      0.20      0.31    183662
           6       0.52      0.89      0.65    181870
           

In [72]:
#run all models for combination sensor
upperarm_accgyr, upperarm_accgyr_X = decisiontree(best_grid, upperarm_accgyr_subset)
upperarm_accmag, upperarm_accmag_X = decisiontree(best_grid, upperarm_accmag_subset)
upperarm_maggyr, upperarm_maggyr_X = decisiontree(best_grid, upperarm_maggyr_subset)
upperarm_accmaggyr, upperarm_accmaggyr_X = decisiontree(best_grid, upperarm_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.48 degrees.
[[ 10818   7992     47    176  32938    182   1952  66411]
 [  4797  19480      5   1101  13701   3318   9575  85869]
 [  2353    430   1527      0  15532      0    105    770]
 [    90    795      0 170094    245  10277   1424   1136]
 [  3446   2498    139   1362 148200   5061  21722  15378]
 [    89   1937      0  16284    315  85416  75696   3925]
 [    75   3106      0   1661    353  25834 145774   5067]
 [  4096  14527     39    231  22436    278   3770 140078]]
              precision    recall  f1-score   support

           0       0.42      0.09      0.15    120516
           1       0.38      0.14      0.21    137846
           2       0.87      0.07      0.14     20717
           3       0.89      0.92      0.91    184061
           4       0.63      0.75      0.69    197806
           5       0.66      0.47      0.54    183662
           6       0.56      0.80      0.66    181870
           

#### Single sensor Chest

In [73]:
#create subsets for Chest
Chest_acc_subset = dataset_prep(new_subset, 'chest_acc')
Chest_gyr_subset = dataset_prep(new_subset, 'chest_gyr')
Chest_mag_subset = dataset_prep(new_subset, 'chest_mag')
filterscolls1 = [col for col in new_subset.columns if ('chest_acc' in col 
                                                   or 'chest_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('chest_acc' in col 
                                                   or 'chest_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('chest_mag' in col 
                                                   or 'chest_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('chest_mag' in col
                                                      or 'chest_gyr' in col
                                                      or 'chest_acc' in col
                                                      or 'Activity' in col)]
Chest_accgyr_subset = new_subset[filterscolls1] 
Chest_accmag_subset = new_subset[filterscolls2]
Chest_maggyr_subset = new_subset[filterscolls3]
Chest_accmaggyr_subset = new_subset[filterscolls4] 

In [74]:
#run all models for single sensor
Chest_acc, Chest_acc_X = decisiontree(best_grid, Chest_acc_subset)
Chest_gyr, Chest_gyr_X = decisiontree(best_grid, Chest_gyr_subset)
Chest_mag, Chest_mag_X = decisiontree(best_grid, Chest_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.58 degrees.
[[ 21394  14640      0    269  24340    199   1198  58476]
 [ 17149  29954      0   4235   7470   2748   3640  72650]
 [   173    389      0      2  19552      1      7    593]
 [    36   1302      0 161360     31  15476   4969    887]
 [  8033   3783      0   2489 139422   6629  16907  20543]
 [    15   2825      0  23833      8  96862  58462   1657]
 [    12   5767      0  21220     10  65890  85785   3186]
 [ 19800  25208      0    510  17925    247   2120 119645]]
              precision    recall  f1-score   support

           0       0.32      0.18      0.23    120516
           1       0.36      0.22      0.27    137846
           2       0.00      0.00      0.00     20717
           3       0.75      0.88      0.81    184061
           4       0.67      0.70      0.69    197806
           5       0.52      0.53      0.52    183662
           6       0.50      0.47      0.48    181870
           

In [75]:
### run all models for combination sensor
Chest_accgyr, Chest_accgyr_X = decisiontree(best_grid, Chest_accgyr_subset)
Chest_accmag, Chest_accmag_X = decisiontree(best_grid, Chest_accmag_subset)
Chest_maggyr, Chest_maggyr_X = decisiontree(best_grid, Chest_maggyr_subset)
Chest_accmaggyr, Chest_accmaggyr_X = decisiontree(best_grid, Chest_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.32 degrees.
[[ 14518  20774   1880     27  14140    121   1848  67208]
 [  3604  64702    233     78   7338    221   3896  57774]
 [  1405    871   5558      1  10667      0    229   1986]
 [     4    703      0 156051    391  15433  10781    698]
 [  2303   6922   1595   1256 160574   4197  16542   4417]
 [     3   1789      0   6330    282  91411  82022   1825]
 [    10   5579      0   5363    252  40310 126768   3588]
 [  9847  26383   1624     52   7829    171   3988 135561]]
              precision    recall  f1-score   support

           0       0.46      0.12      0.19    120516
           1       0.51      0.47      0.49    137846
           2       0.51      0.27      0.35     20717
           3       0.92      0.85      0.88    184061
           4       0.80      0.81      0.80    197806
           5       0.60      0.50      0.54    183662
           6       0.52      0.70      0.59    181870
           

#### Single sensor Shin

In [76]:
#create subsets for Shin
Shin_acc_subset = dataset_prep(new_subset, 'shin_acc')
Shin_gyr_subset = dataset_prep(new_subset, 'shin_gyr')
Shin_mag_subset = dataset_prep(new_subset, 'shin_mag')
filterscolls1 = [col for col in new_subset.columns if ('shin_acc' in col 
                                                   or 'shin_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('shin_acc' in col 
                                                   or 'shin_mag' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('shin_mag' in col 
                                                   or 'shin_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('shin_mag' in col
                                                      or 'shin_gyr' in col
                                                      or 'shin_acc' in col
                                                      or 'Activity' in col)]
Shin_accgyr_subset = new_subset[filterscolls1] 
Shin_accmag_subset = new_subset[filterscolls2]
Shin_maggyr_subset = new_subset[filterscolls3]
Shin_accmaggyr_subset = new_subset[filterscolls4] 

In [77]:
#run all models for single sensor
Shin_acc, Shin_acc_X = decisiontree(best_grid, Shin_acc_subset)
Shin_gyr, Shin_gyr_X = decisiontree(best_grid, Shin_gyr_subset)
Shin_mag, Shin_mag_X = decisiontree(best_grid, Shin_mag_subset)

(4847731, 2)
(4847731,)
(1211933, 2)
(1211933,)
Mean Absolute Error: 1.69 degrees.
[[ 17580   3537    266    374  47877    227   1706  48949]
 [ 10371  12783    388   3182  35558   1521  11439  62604]
 [  1219   1474    830      5  10363      0     16   6810]
 [   863    908      1 134068    133  29762  16337   1989]
 [  8945   2112    808   3755 127391   9215  16219  29361]
 [  1679   1884      0   4349    346  98542  68020   8842]
 [   597    933      0   3551    120  72198 100487   3984]
 [ 13633   7561    156    442  73051    267   5686  84659]]
              precision    recall  f1-score   support

           0       0.32      0.15      0.20    120516
           1       0.41      0.09      0.15    137846
           2       0.34      0.04      0.07     20717
           3       0.90      0.73      0.80    184061
           4       0.43      0.64      0.52    197806
           5       0.47      0.54      0.50    183662
           6       0.46      0.55      0.50    181870
           

In [78]:
#run all models for combination sensor
Shin_accgyr, Shin_accgyr_X = decisiontree(best_grid, Shin_accgyr_subset)
Shin_accmag, Shin_accmag_X = decisiontree(best_grid, Shin_accmag_subset)
Shin_maggyr, Shin_maggyr_X = decisiontree(best_grid, Shin_maggyr_subset)
Shin_accmaggyr, Shin_accmaggyr_X = decisiontree(best_grid, Shin_accmaggyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.51 degrees.
[[ 18099  14151    214     21  20117    485    309  67120]
 [  9464  47778    441   1967   8369   3479   2910  63438]
 [  1920   4716    503      2   3743     11      9   9813]
 [   311   2109      1 137716     16  37752   5355    801]
 [  3837  10483     27   3301 114584  11972  13221  40381]
 [   571   6710      7   5523     19 129471  38600   2761]
 [   355   5555      0   4860     24  61730 107768   1578]
 [ 10481  12893     52     41  31036    291    199 130462]]
              precision    recall  f1-score   support

           0       0.40      0.15      0.22    120516
           1       0.46      0.35      0.39    137846
           2       0.40      0.02      0.05     20717
           3       0.90      0.75      0.82    184061
           4       0.64      0.58      0.61    197806
           5       0.53      0.70      0.60    183662
           6       0.64      0.59      0.62    181870
           

## Combination (two sensor positions) - Subq 2
Logistic combination of sensor-positions
(head, waist, shin (scheenbeen), upperarm, forearm (onderarm), chest, thigh (pocket)) <br>

Head = smartglasses <br>
forearm = smartwatch <br>
thigh = pocket smartwatch <br>

Combinations: <br>
Head - Forearm <br>
Head - Thigh <br>
Forearm - Thigh <br>

Different Sensors: <br>
acc (mean, sd) <br>
gyr (mean, sd) <br>
mag (mean, sd) <br>
acc + mag (mean, sd) <br>
acc + gyr (mean, sd) <br>
gyr + mag (mean, sd) <br>
acc + gyr + mag (mean, sd) <br>

### Each combo - only acc

In [79]:
#create subsets for first combinations with only acc
filterscolls1 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                   or 'head_acc' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                   or 'head_acc' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                   or 'thigh_acc' in col 
                                                   or 'Activity' in col)]
headforearm_acc_subset = new_subset[filterscolls1] 
headthigh_acc_subset = new_subset[filterscolls2]  
thighforearm_acc_subset = new_subset[filterscolls3] 

In [80]:
#run all models for combination sensor
headforearm_acc, headforearm_acc_X = decisiontree(best_grid, headforearm_acc_subset)
headthigh_acc, headthigh_acc_X = decisiontree(best_grid, headthigh_acc_subset)
thighforearm_acc, thighforearm_acc_X = decisiontree(best_grid, thighforearm_acc_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.68 degrees.
[[ 25401   5150    167     58  12463    378   1655  75244]
 [  9859  12553    150   1207   1426   4200   7017 101434]
 [   548    359   2497      0  17061      2     13    237]
 [   133    515      7 122701     13  24158  35348   1186]
 [  7151   1570    609   1994 147647   6343  20763  11729]
 [   383   2867     14  24029     20  98796  55645   1908]
 [   343   3689     35  25877     67  40916 107482   3461]
 [ 13314  10145    151     49   7122    387   1998 152289]]
              precision    recall  f1-score   support

           0       0.44      0.21      0.29    120516
           1       0.34      0.09      0.14    137846
           2       0.69      0.12      0.21     20717
           3       0.70      0.67      0.68    184061
           4       0.79      0.75      0.77    197806
           5       0.56      0.54      0.55    183662
           6       0.47      0.59      0.52    181870
           

### Each combo - only gyr

In [81]:
#create subsets for first combinations with only gyr
filterscolls1 = [col for col in new_subset.columns if ('thigh_gyr' in col 
                                                   or 'head_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_gyr' in col 
                                                   or 'thigh_gyr' in col 
                                                   or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_gyr' in col 
                                                   or 'head_gyr' in col 
                                                   or 'Activity' in col)]
 
headthigh_gyr_subset = new_subset[filterscolls1]  
thighforearm_gyr_subset = new_subset[filterscolls2] 
headforearm_gyr_subset = new_subset[filterscolls3] 

In [82]:
#run all models for combination sensor
headthigh_gyr, headthigh_gyr_X = decisiontree(best_grid, headthigh_gyr_subset)
thighforearm_gyr, thighforearm_gyr_X = decisiontree(best_grid, thighforearm_gyr_subset)
headforearm__gyr, headforearm__gyr_X = decisiontree(best_grid, headforearm_gyr_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.5 degrees.
[[  9519  14628      0     26  34711    221   2115  59296]
 [  7389  26827      0   1550  20902   4163   7577  69438]
 [  3926    617      0      2   9873     68    181   6050]
 [   220    471      0 153545    153  13503  15746    423]
 [  2204   4116      0   1674 140255   4526  23010  22021]
 [   113    423      0  24324     45 113159  44983    615]
 [   642   2193      0  17831    128  30008 127485   3583]
 [  5750  13421      0     39  31526     68   2493 132158]]
              precision    recall  f1-score   support

           0       0.32      0.08      0.13    120516
           1       0.43      0.19      0.27    137846
           2       0.00      0.00      0.00     20717
           3       0.77      0.83      0.80    184061
           4       0.59      0.71      0.64    197806
           5       0.68      0.62      0.65    183662
           6       0.57      0.70      0.63    181870
           7

### Each combo - only mag

In [83]:
#create subsets for first combinations with only mag
filterscolls2 = [col for col in new_subset.columns if ('thigh_mag' in col 
                                                   or 'head_mag' in col 
                                                   or 'Activity' in col)]
filterscolls4 = [col for col in new_subset.columns if ('forearm_mag' in col 
                                                   or 'thigh_mag' in col 
                                                   or 'Activity' in col)]
filterscolls6 = [col for col in new_subset.columns if ('forearm_mag' in col 
                                                   or 'head_mag' in col 
                                                   or 'Activity' in col)]

headthigh_mag_subset = new_subset[filterscolls2] 
thighforearm_mag_subset = new_subset[filterscolls4] 
headforearm_mag_subset = new_subset[filterscolls6] 

In [84]:
#run all models for combination sensor
headthigh_mag, headthigh_mag_X = decisiontree(best_grid, headthigh_mag_subset)
thighforearm_mag, thighforearm_mag_X = decisiontree(best_grid, thighforearm_mag_subset)
headforearm__mag, headforearm__mag_X = decisiontree(best_grid, headforearm_mag_subset)

(4847731, 4)
(4847731,)
(1211933, 4)
(1211933,)
Mean Absolute Error: 1.31 degrees.
[[ 20845  15087      0   1076  59675   1956   3102  18775]
 [  5906  65677     49   1808  34213   1984   2977  25232]
 [   367   1272    937    393  13242    824   1317   2365]
 [   987   1000    121 147867   2889  13593  16727    877]
 [  3384   1712      3   5594 138375   9143  13088  26507]
 [    53   2575      0  17776   4561 123604  33697   1396]
 [  1676   1553      0  16718  20334  28077 111326   2186]
 [ 12315  10943     63   1893  69726   2926   3837  83752]]
              precision    recall  f1-score   support

           0       0.46      0.17      0.25    120516
           1       0.66      0.48      0.55    137846
           2       0.80      0.05      0.09     20717
           3       0.77      0.80      0.78    184061
           4       0.40      0.70      0.51    197806
           5       0.68      0.67      0.68    183662
           6       0.60      0.61      0.61    181870
           

### Each combo - only acc + gyr

In [85]:
#create subsets for first combinations with only acc+gyr
filterscolls1 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'head_acc' in col
                                                       or 'head_gyr' in col
                                                       or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'thigh_acc' in col
                                                       or 'forearm_gyr' in col 
                                                       or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'head_acc' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'Activity' in col)]
 
headthigh_accgyr_subset = new_subset[filterscolls1] 
thighforearm_accgyr_subset = new_subset[filterscolls2] 
headforearm_accgyr_subset = new_subset[filterscolls3] 

In [86]:
#run all models for combination sensor
headthigh_accgyr, headthigh_accgyr_X = decisiontree(best_grid, headthigh_accgyr_subset)
thighforearm_accgyr, thighforearm_accgyr_X = decisiontree(best_grid, thighforearm_accgyr_subset)
headforearm__accgyr, headforearm__accgyr_X = decisiontree(best_grid, headforearm_accgyr_subset)

(4847731, 8)
(4847731,)
(1211933, 8)
(1211933,)
Mean Absolute Error: 1.38 degrees.
[[  8538  27027   2016     39  16285    386   1166  65059]
 [  3045  45732   1786   1433   4644   4281   5686  71239]
 [  1342   1378   6185      1  10272     57     38   1444]
 [    23   1004      0 154467     18  16408  11815    326]
 [  3859   4631   1147    537 150522   4791  24136   8183]
 [     6    685      0  26395      4 122272  33836    464]
 [    14   2596      0  10809      0  24860 141558   2033]
 [  2756  26122   1934     29  16990    164   1933 135527]]
              precision    recall  f1-score   support

           0       0.44      0.07      0.12    120516
           1       0.42      0.33      0.37    137846
           2       0.47      0.30      0.37     20717
           3       0.80      0.84      0.82    184061
           4       0.76      0.76      0.76    197806
           5       0.71      0.67      0.69    183662
           6       0.64      0.78      0.70    181870
           

### Each combo - only acc + mag

In [87]:
#create subsets for first combinations with only acc+mag
filterscolls1 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                       or 'thigh_mag' in col
                                                       or 'head_acc' in col
                                                       or 'head_mag' in col
                                                       or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'thigh_mag' in col
                                                       or 'thigh_acc' in col
                                                       or 'forearm_mag' in col 
                                                       or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'head_acc' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
 
headthigh_accmag_subset = new_subset[filterscolls1] 
thighforearm_accmag_subset = new_subset[filterscolls2] 
headforearm_accmag_subset = new_subset[filterscolls3] 

In [88]:
#run all models for combination sensor
headthigh_accmag, headthigh_accmag_X = decisiontree(best_grid, headthigh_accmag_subset)
thighforearm_accmag, thighforearm_accmag_X = decisiontree(best_grid, thighforearm_accmag_subset)
headforearm__accmag, headforearm__accmag_X = decisiontree(best_grid, headforearm_accmag_subset)

(4847731, 8)
(4847731,)
(1211933, 8)
(1211933,)
Mean Absolute Error: 1.33 degrees.
[[ 27012  18811    116    330  18772    428   1559  53488]
 [ 10250  57923     21    722   5478    667   3666  59119]
 [   254     58   4025      1  14750     57     75   1497]
 [    72    701      0 144117    827  22049  15610    685]
 [  3306   2257   1365   2912 150995   2731  22138  12102]
 [    51    568      0   9129     11 151051  22187    665]
 [    31   1295      0  22170     48  12537 144738   1051]
 [ 19810  25624     75    679  17235    464   2003 119565]]
              precision    recall  f1-score   support

           0       0.44      0.22      0.30    120516
           1       0.54      0.42      0.47    137846
           2       0.72      0.19      0.31     20717
           3       0.80      0.78      0.79    184061
           4       0.73      0.76      0.74    197806
           5       0.80      0.82      0.81    183662
           6       0.68      0.80      0.73    181870
           

### Each combo - only gyr + mag

In [89]:
#create subsets for first combinations with only gyr+mag
filterscolls1 = [col for col in new_subset.columns if ('thigh_mag' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'head_mag' in col
                                                       or 'head_gyr' in col
                                                       or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_mag' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'thigh_mag' in col
                                                       or 'forearm_gyr' in col 
                                                       or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_mag' in col 
                                                       or 'head_mag' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'Activity' in col)]
 
headthigh_gyrmag_subset = new_subset[filterscolls1] 
thighforearm_gyrmag_subset = new_subset[filterscolls2] 
headforearm_gyrmag_subset = new_subset[filterscolls3] 

In [90]:
#run all models for combination sensor
headthigh_gyrmag, headthigh_gyrmag_X = decisiontree(best_grid, headthigh_gyrmag_subset)
thighforearm_gyrmag, thighforearm_gyrmag_X = decisiontree(best_grid, thighforearm_gyrmag_subset)
headforearm__gyrmag, headforearm__gyrmag_X = decisiontree(best_grid, headforearm_gyrmag_subset)

(4847731, 8)
(4847731,)
(1211933, 8)
(1211933,)
Mean Absolute Error: 1.36 degrees.
[[ 16178  10353      0    103  30292    251   1689  61650]
 [  6044  48950      0   1426  10121    778   4799  65728]
 [  4232    472      0      1   9730     13    361   5908]
 [   173   1600      0 154773     82  14375  12346    712]
 [  6144   1403      0    509 138238   3416  24511  23585]
 [   113    522      0   8217     33 135040  39022    715]
 [   644   3059      0   5985    184  24614 143522   3862]
 [  9519   9083      0     54  27162    229   1788 137620]]
              precision    recall  f1-score   support

           0       0.38      0.13      0.20    120516
           1       0.65      0.36      0.46    137846
           2       0.00      0.00      0.00     20717
           3       0.90      0.84      0.87    184061
           4       0.64      0.70      0.67    197806
           5       0.76      0.74      0.75    183662
           6       0.63      0.79      0.70    181870
           

### Each combo - only acc + gyr + mag

In [12]:
#create subsets for first combinations with only acc+gyr+mag
filterscolls1 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'head_acc' in col
                                                       or 'head_gyr' in col
                                                       or 'thigh_mag' in col
                                                       or 'head_mag' in col
                                                       or 'Activity' in col)]
filterscolls2 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'thigh_gyr' in col
                                                       or 'thigh_acc' in col
                                                       or 'forearm_gyr' in col
                                                       or 'thigh_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
filterscolls3 = [col for col in new_subset.columns if ('forearm_acc' in col 
                                                       or 'head_acc' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
 
headthigh_accgyrmag_subset = new_subset[filterscolls1] 
thighforearm_accgyrmag_subset = new_subset[filterscolls2] 
headforearm_accgyrmag_subset = new_subset[filterscolls3] 

In [15]:
#run all models for combination sensor
headthigh_accgyrmag, headthigh_accgyrmag_X = decisiontree(best_grid, headthigh_accgyrmag_subset)
thighforearm_accgyrmag, thighforearm_accgyrmag_X = decisiontree(best_grid, thighforearm_accgyrmag_subset)
headforearm__accgyrmag, headforearm__accgyrmag_X = decisiontree(best_grid, headforearm_accgyrmag_subset)

(4847731, 12)
(4847731,)
(1211933, 12)
(1211933,)
Mean Absolute Error: 1.3 degrees.
[[ 31885  22948    535     81  16312    195   1061  47499]
 [ 15414  61159     75   2554   2451    809   1938  53446]
 [  1158    555   5297      0  12725      4    132    846]
 [   297   1383      0 148338     19  15748  17899    377]
 [  6581   4463   1894    631 149727   5693  22048   6769]
 [   267    695      0  22071      2 109831  50461    335]
 [   903   3802      0   8629      6  17912 148962   1656]
 [ 23224  28754    508     72  12670    159    919 119149]]
              precision    recall  f1-score   support

           0       0.40      0.26      0.32    120516
           1       0.49      0.44      0.47    137846
           2       0.64      0.26      0.36     20717
           3       0.81      0.81      0.81    184061
           4       0.77      0.76      0.76    197806
           5       0.73      0.60      0.66    183662
           6       0.61      0.82      0.70    181870
          

## Difference between Nr positions - Subq 3
We have looked into single and double positions, what about third positions <br>

Only combine top 3 positions from SQ1 that show promising results. or reflect on SQ combinations. 

Head = smartglasses <br>
forearm = smartwatch <br>
thigh = pocket smartwatch <br>

Combinations: <br>
Head - Forearm - Thigh <br>


Different Sensors: <br>
acc (mean, sd) <br>
gyr (mean, sd) <br>
mag (mean, sd) <br>
acc + mag (mean, sd) <br>
acc + gyr (mean, sd) <br>
gyr + mag (mean, sd) <br>
acc + gyr + mag (mean, sd) <br>

In [23]:
#create subsets for combinations with only head-forearm-thigh
#only acc
filterscolls1 = [col for col in new_subset.columns if ('thigh_acc' in col 
                                                       or 'head_acc' in col
                                                       or 'forearm_acc' in col
                                                       or 'Activity' in col)]
#only gyr
filterscolls2 = [col for col in new_subset.columns if ('thigh_gyr' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'Activity' in col)]
#only mag
filterscolls3 = [col for col in new_subset.columns if ('thigh_mag' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
#acc+gyr
filterscolls4 = [col for col in new_subset.columns if ('thigh_acc' in col
                                                       or 'head_acc' in col
                                                       or 'forearm_acc' in col
                                                       or 'thigh_gyr' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'Activity' in col)]
#acc+mag
filterscolls5 = [col for col in new_subset.columns if ('thigh_acc' in col
                                                       or 'head_acc' in col
                                                       or 'forearm_acc' in col
                                                       or 'thigh_mag' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
#gyr+mag
filterscolls6 = [col for col in new_subset.columns if ('thigh_gyr' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'thigh_mag' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
#acc+gyr+mag
filterscolls7 = [col for col in new_subset.columns if ('thigh_acc' in col
                                                       or 'head_acc' in col
                                                       or 'forearm_acc' in col
                                                       or 'thigh_gyr' in col
                                                       or 'head_gyr' in col
                                                       or 'forearm_gyr' in col
                                                       or 'thigh_mag' in col
                                                       or 'head_mag' in col
                                                       or 'forearm_mag' in col
                                                       or 'Activity' in col)]
 
thighheadforearm_acc_subset = new_subset[filterscolls1] 
thighheadforearm_gyr_subset = new_subset[filterscolls2] 
thighheadforearm_mag_subset = new_subset[filterscolls3] 
thighheadforearm_accgyr_subset = new_subset[filterscolls4] 
thighheadforearm_accmag_subset = new_subset[filterscolls5] 
thighheadforearm_gyrmag_subset = new_subset[filterscolls6]
thighheadforearm_accgyrmag_subset = new_subset[filterscolls7]

In [24]:
#run all models for combination sensor only acc, gyr, mag
thighheadforearm_acc, thighheadforearm_acc_X = decisiontree(best_grid, thighheadforearm_acc_subset)
thighheadforearm_gyr, thighheadforearm_gyr_X = decisiontree(best_grid, thighheadforearm_gyr_subset)
thighheadforearm_mag, thighheadforearm_mag_X = decisiontree(best_grid, thighheadforearm_mag_subset)

(4847731, 6)
(4847731,)
(1211933, 6)
(1211933,)
Mean Absolute Error: 1.58 degrees.
[[ 13774   9609     77     53  17153    729   1490  77631]
 [  4459  26070     29   1321   2415   5097   6429  92026]
 [    47    410   4752      1  14652    146    282    427]
 [    83    573      0 132478     20  24849  25270    788]
 [  2309   1712   2214   1388 154678   4004  24858   6643]
 [   144    924      0  27526      5 118343  36021    699]
 [   182   1542      0  18324     13  20097 140290   1422]
 [  5456  22710     35     40  12838    835   1848 141693]]
              precision    recall  f1-score   support

           0       0.52      0.11      0.19    120516
           1       0.41      0.19      0.26    137846
           2       0.67      0.23      0.34     20717
           3       0.73      0.72      0.73    184061
           4       0.77      0.78      0.77    197806
           5       0.68      0.64      0.66    183662
           6       0.59      0.77      0.67    181870
           

In [25]:
#run all models for combination sensor only acc+gyr, acc+mag, gyr+mag
thighheadforearm_accgyr, thighheadforearm_accgyr_X = decisiontree(best_grid, thighheadforearm_accgyr_subset)
thighheadforearm_accmag, thighheadforearm_accmag_X = decisiontree(best_grid, thighheadforearm_accmag_subset)
thighheadforearm_gyrmag, thighheadforearm_gyrmag_X = decisiontree(best_grid, thighheadforearm_gyrmag_subset)

(4847731, 12)
(4847731,)
(1211933, 12)
(1211933,)
Mean Absolute Error: 1.62 degrees.
[[ 15319   3368   2126     50  18368    172    943  80170]
 [  9733  11043    616    979   4550   2541   6140 102244]
 [   930    172   8015      0  10695      0     63    842]
 [   220    624      4 133309     50  28631  20498    725]
 [  7927   2314   4016    440 138132   2822  24782  17373]
 [   345   1105     24  13064     36 123413  44945    730]
 [   454   4461     55  13186     43  16073 145745   1853]
 [ 13549   7893    815     34   7204    139    997 154824]]
              precision    recall  f1-score   support

           0       0.32      0.13      0.18    120516
           1       0.36      0.08      0.13    137846
           2       0.51      0.39      0.44     20717
           3       0.83      0.72      0.77    184061
           4       0.77      0.70      0.73    197806
           5       0.71      0.67      0.69    183662
           6       0.60      0.80      0.68    181870
         

In [26]:
#run all models for combination sensor only acc+gyr, acc+mag, gyr+mag
thighheadforearm_accgyrmag, thighheadforearm_accgyrmag_X = decisiontree(best_grid, thighheadforearm_accgyrmag_subset)

(4847731, 18)
(4847731,)
(1211933, 18)
(1211933,)
Mean Absolute Error: 1.37 degrees.
[[ 16775  11447    136    233  12448    141   1057  78279]
 [  6119  49043     17   1196   2107    556   5757  73051]
 [  2793    540   1928     34  12190     63    181   2988]
 [    58    782      0 155585     44   9124  17495    973]
 [  6970    437   1180   1691 136624   5040  22755  23109]
 [    83    439      0   9042     32 149393  23875    798]
 [    80    681      0  10183     33  24944 142825   3124]
 [  8911  14833     46     43   5487    159   1379 154597]]
              precision    recall  f1-score   support

           0       0.40      0.14      0.21    120516
           1       0.63      0.36      0.45    137846
           2       0.58      0.09      0.16     20717
           3       0.87      0.85      0.86    184061
           4       0.81      0.69      0.75    197806
           5       0.79      0.81      0.80    183662
           6       0.66      0.79      0.72    181870
         