In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import pickle
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline 

In [2]:
filedir = ".\\data\\"
activities = ['cycling', 'driving', 'jogging', 'sleeping', 'walking']
epoch_length = 360

In [3]:
pickle_in = open(filedir+"data\\data.pickle","rb")
data_dict = pickle.load(pickle_in)
pickle_in.close()
X = data_dict["svm_array"]

In [4]:
y = data_dict["label"]

---

## Data Split 80/20

In [5]:
test_size = int(int(X.shape[0] * 0.2) / len(set(y))) * len(set(y))
test_size

1625

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=19011954, stratify=y)

## SVM 1

In [7]:
# Create a classifier: a support vector classifier
SVM_classifier = svm.SVC(kernel='rbf', C=1.0, gamma = "auto", random_state=19011954)
#SVM_classifier = svm.SVC(random_state=42)
#fit to the trainin data
SVM_classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False)

In [8]:
y_pred_SVM = SVM_classifier.predict(X_test)

In [9]:
accuracy_SVM1 = metrics.accuracy_score(y_test, y_pred_SVM)
accuracy_SVM1

0.8664615384615385

In [10]:
results = open("accuracy_svm1.txt","w")
results.write(str(accuracy_SVM1))
results.close()

In [11]:
print("Classification report for classifier %s:\n%s\n"
      % (SVM_classifier, metrics.classification_report(y_test, y_pred_SVM, digits = 4)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

     cycling     0.7706    0.8062    0.7880       325
     driving     0.7446    0.7446    0.7446       325
     jogging     0.9586    0.9969    0.9774       325
    sleeping     0.9663    0.8831    0.9228       325
     walking     0.9015    0.9015    0.9015       325

    accuracy                         0.8665      1625
   macro avg     0.8683    0.8665    0.8669      1625
weighted avg     0.8683    0.8665    0.8669      1625




In [12]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_SVM))

Confusion matrix:
[[262  46   6   1  10]
 [ 52 242   3   8  20]
 [  0   0 324   0   1]
 [ 18  15   4 287   1]
 [  8  22   1   1 293]]


## Random Forest 1

In [13]:
# Create a classifier: a random forest classifier
RF_classifier = RandomForestClassifier(n_estimators = 1000, random_state = 19011954)
#fit to the trainin data
RF_classifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False)

In [14]:
y_pred_RF = RF_classifier.predict(X_test)

In [15]:
accuracy_RF1 = metrics.accuracy_score(y_test, y_pred_RF)
accuracy_RF1

0.947076923076923

In [16]:
results = open("accuracy_rf1.txt","w")
results.write(str(accuracy_RF1))
results.close()

In [17]:
print("Classification report for classifier %s:\n%s\n"
      % (RF_classifier, metrics.classification_report(y_test, y_pred_RF, digits = 4)))

Classification report for classifier RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False):
              precision    recall  f1-score   support

     cycling     0.9279    0.9108    0.9193       325
     driving     0.8757    0.9108    0.8929       325
     jogging     0.9969    0.9969    0.9969       325
    sleeping     0.9969    0.9754    0.9860       325
     walking     0.9415    0.9415    0.9415       325

    accuracy                         0.9471      1625
   macro avg     0.9478    0.9471    0.9473      1625
weighted avg     0.9478    0.947

In [18]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_RF))

Confusion matrix:
[[296  27   0   0   2]
 [ 15 296   0   0  14]
 [  0   0 324   0   1]
 [  2   4   0 317   2]
 [  6  11   1   1 306]]


---

## Data Split 85/15

In [19]:
test_size = int(int(X.shape[0] * 0.15) / len(set(y))) * len(set(y))

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=4091988, stratify=y)

## SVM 2

In [21]:
# Create a classifier: a support vector classifier
SVM_classifier = svm.SVC(kernel='rbf', C=1.0, gamma = "auto", random_state=19011954)
#SVM_classifier = svm.SVC(random_state=42)
#fit to the trainin data
SVM_classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False)

In [22]:
y_pred_SVM = SVM_classifier.predict(X_test)

In [23]:
accuracy_SVM2 = metrics.accuracy_score(y_test, y_pred_SVM)
accuracy_SVM2

0.8647540983606558

In [24]:
results = open("accuracy_svm2.txt","w")
results.write(str(accuracy_SVM2))
results.close()

In [25]:
print("Classification report for classifier %s:\n%s\n"
      % (SVM_classifier, metrics.classification_report(y_test, y_pred_SVM, digits = 4)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

     cycling     0.7406    0.8074    0.7725       244
     driving     0.7642    0.7705    0.7673       244
     jogging     0.9683    1.0000    0.9839       244
    sleeping     0.9554    0.8770    0.9145       244
     walking     0.9138    0.8689    0.8908       244

    accuracy                         0.8648      1220
   macro avg     0.8684    0.8648    0.8658      1220
weighted avg     0.8684    0.8648    0.8658      1220




In [26]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_SVM))

Confusion matrix:
[[197  28   4   2  13]
 [ 43 188   1   5   7]
 [  0   0 244   0   0]
 [ 16  13   1 214   0]
 [ 10  17   2   3 212]]


## Random Forest 2

In [27]:
# Create a classifier: a random forest classifier
RF_classifier = RandomForestClassifier(n_estimators = 1000, random_state = 19011954)
#fit to the trainin data
RF_classifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False)

In [28]:
y_pred_RF = RF_classifier.predict(X_test)

In [29]:
accuracy_RF2 = metrics.accuracy_score(y_test, y_pred_RF)
accuracy_RF2

0.9508196721311475

In [30]:
results = open("accuracy_RF2.txt","w")
results.write(str(accuracy_RF2))
results.close()

In [31]:
print("Classification report for classifier %s:\n%s\n"
      % (RF_classifier, metrics.classification_report(y_test, y_pred_RF, digits = 4)))

Classification report for classifier RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False):
              precision    recall  f1-score   support

     cycling     0.9292    0.9139    0.9215       244
     driving     0.9073    0.9221    0.9146       244
     jogging     0.9959    1.0000    0.9980       244
    sleeping     0.9876    0.9795    0.9835       244
     walking     0.9347    0.9385    0.9366       244

    accuracy                         0.9508      1220
   macro avg     0.9509    0.9508    0.9508      1220
weighted avg     0.9509    0.950

In [32]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_RF))

Confusion matrix:
[[223  13   0   2   6]
 [ 10 225   0   0   9]
 [  0   0 244   0   0]
 [  1   3   0 239   1]
 [  6   7   1   1 229]]


---

## Data Split 75/25

In [33]:
test_size = int(int(X.shape[0] * 0.25) / len(set(y))) * len(set(y))

In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=4091988, stratify=y)

## SVM 3

In [35]:
# Create a classifier: a support vector classifier
SVM_classifier = svm.SVC(kernel='rbf', C=1.0, gamma = "auto", random_state=19011954)
#SVM_classifier = svm.SVC(random_state=42)
#fit to the trainin data
SVM_classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False)

In [36]:
y_pred_SVM = SVM_classifier.predict(X_test)

In [37]:
accuracy_SVM3 = metrics.accuracy_score(y_test, y_pred_SVM)
accuracy_SVM3

0.8722358722358723

In [38]:
results = open("accuracy_SVM3.txt","w")
results.write(str(accuracy_SVM3))
results.close()

In [39]:
print("Classification report for classifier %s:\n%s\n"
      % (SVM_classifier, metrics.classification_report(y_test, y_pred_SVM, digits = 4)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

     cycling     0.7706    0.8256    0.7972       407
     driving     0.7641    0.7641    0.7641       407
     jogging     0.9667    0.9975    0.9819       407
    sleeping     0.9581    0.8993    0.9278       407
     walking     0.9128    0.8747    0.8934       407

    accuracy                         0.8722      2035
   macro avg     0.8745    0.8722    0.8728      2035
weighted avg     0.8745    0.8722    0.8728      2035




In [40]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_SVM))

Confusion matrix:
[[336  45   7   2  17]
 [ 66 311   4   9  17]
 [  1   0 406   0   0]
 [ 22  18   1 366   0]
 [ 11  33   2   5 356]]


## Random Forest 3

In [41]:
# Create a classifier: a random forest classifier
RF_classifier = RandomForestClassifier(n_estimators = 1000, random_state = 19011954)
#fit to the trainin data
RF_classifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False)

In [42]:
y_pred_RF = RF_classifier.predict(X_test)

In [43]:
accuracy_RF3 = metrics.accuracy_score(y_test, y_pred_RF)
accuracy_RF3

0.9547911547911548

In [44]:
results = open("accuracy_RF3.txt","w")
results.write(str(accuracy_RF3))
results.close()

In [45]:
print("Classification report for classifier %s:\n%s\n"
      % (RF_classifier, metrics.classification_report(y_test, y_pred_RF, digits = 4)))

Classification report for classifier RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False):
              precision    recall  f1-score   support

     cycling     0.9464    0.9115    0.9287       407
     driving     0.9067    0.9312    0.9188       407
     jogging     0.9951    0.9975    0.9963       407
    sleeping     0.9901    0.9853    0.9877       407
     walking     0.9369    0.9484    0.9426       407

    accuracy                         0.9548      2035
   macro avg     0.9550    0.9548    0.9548      2035
weighted avg     0.9550    0.954

In [46]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_RF))

Confusion matrix:
[[371  23   1   2  10]
 [ 14 379   0   0  14]
 [  1   0 406   0   0]
 [  0   4   0 401   2]
 [  6  12   1   2 386]]


---

## Data Split 50/50

In [47]:
test_size = int(int(X.shape[0] * 0.50) / len(set(y))) * len(set(y))

In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=4091988, stratify=y)

## SVM 4

In [49]:
# Create a classifier: a support vector classifier
SVM_classifier = svm.SVC(kernel='rbf', C=1.0, gamma = "auto", random_state=19011954)
#SVM_classifier = svm.SVC(random_state=42)
#fit to the trainin data
SVM_classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False)

In [50]:
y_pred_SVM = SVM_classifier.predict(X_test)

In [51]:
accuracy_SVM4 = metrics.accuracy_score(y_test, y_pred_SVM)
accuracy_SVM4

0.8626535626535626

In [52]:
results = open("accuracy_SVM4.txt","w")
results.write(str(accuracy_SVM4))
results.close()

In [53]:
print("Classification report for classifier %s:\n%s\n"
      % (SVM_classifier, metrics.classification_report(y_test, y_pred_SVM, digits = 4)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

     cycling     0.7656    0.8145    0.7893       814
     driving     0.7276    0.7482    0.7377       814
     jogging     0.9575    0.9963    0.9765       814
    sleeping     0.9567    0.8956    0.9251       814
     walking     0.9222    0.8587    0.8893       814

    accuracy                         0.8627      4070
   macro avg     0.8659    0.8627    0.8636      4070
weighted avg     0.8659    0.8627    0.8636      4070




In [54]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_SVM))

Confusion matrix:
[[663 112  13   3  23]
 [141 609   9  19  36]
 [  3   0 811   0   0]
 [ 39  43   3 729   0]
 [ 20  73  11  11 699]]


## Random Forest 4

In [55]:
# Create a classifier: a random forest classifier
RF_classifier = RandomForestClassifier(n_estimators = 1000, random_state = 19011954)
#fit to the trainin data
RF_classifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False)

In [56]:
y_pred_RF = RF_classifier.predict(X_test)

In [57]:
accuracy_RF4 = metrics.accuracy_score(y_test, y_pred_RF)
accuracy_RF4

0.9506142506142506

In [58]:
results = open("accuracy_RF4.txt","w")
results.write(str(accuracy_RF4))
results.close()

In [59]:
print("Classification report for classifier %s:\n%s\n"
      % (RF_classifier, metrics.classification_report(y_test, y_pred_RF, digits = 4)))

Classification report for classifier RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False):
              precision    recall  f1-score   support

     cycling     0.9328    0.9201    0.9264       814
     driving     0.8888    0.9128    0.9006       814
     jogging     0.9951    0.9963    0.9957       814
    sleeping     0.9865    0.9853    0.9859       814
     walking     0.9514    0.9386    0.9450       814

    accuracy                         0.9506      4070
   macro avg     0.9509    0.9506    0.9507      4070
weighted avg     0.9509    0.950

In [60]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_RF))

Confusion matrix:
[[749  55   1   1   8]
 [ 32 743   1   8  30]
 [  3   0 811   0   0]
 [  2   9   0 802   1]
 [ 17  29   2   2 764]]


## Data Split 80/10/10

In [61]:
test_size = int(int(X.shape[0] * 0.1) / len(set(y))) * len(set(y))

In [62]:
from sklearn.model_selection import train_test_split
X_data, X_test, y_data, y_test = train_test_split(X, y, test_size=test_size, random_state=19011954, stratify=y)

In [63]:
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=test_size, random_state=19011954, stratify=y_data)

## SVM 5

In [64]:
# Create a classifier: a support vector classifier
SVM_classifier = svm.SVC(kernel='rbf', C=1.0, gamma = "auto", random_state=19011954)
#SVM_classifier = svm.SVC(random_state=42)
#fit to the trainin data
SVM_classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False)

In [65]:
y_pred_SVM = SVM_classifier.predict(X_test)

In [66]:
accuracy_SVM5 = metrics.accuracy_score(y_test, y_pred_SVM)
accuracy_SVM5

0.8654320987654321

In [67]:
results = open("accuracy_SVM5.txt","w")
results.write(str(accuracy_SVM5))
results.close()

In [68]:
print("Classification report for classifier %s:\n%s\n"
      % (SVM_classifier, metrics.classification_report(y_test, y_pred_SVM, digits = 4)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=19011954, shrinking=True,
    tol=0.001, verbose=False):
              precision    recall  f1-score   support

     cycling     0.7702    0.7654    0.7678       162
     driving     0.7232    0.7901    0.7552       162
     jogging     0.9759    1.0000    0.9878       162
    sleeping     0.9792    0.8704    0.9216       162
     walking     0.9012    0.9012    0.9012       162

    accuracy                         0.8654       810
   macro avg     0.8699    0.8654    0.8667       810
weighted avg     0.8699    0.8654    0.8667       810




In [69]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_SVM))

Confusion matrix:
[[124  29   2   0   7]
 [ 22 128   1   3   8]
 [  0   0 162   0   0]
 [ 12   7   1 141   1]
 [  3  13   0   0 146]]


## Random Forest 5

In [70]:
# Create a classifier: a random forest classifier
RF_classifier = RandomForestClassifier(n_estimators = 1000, random_state = 19011954)
#fit to the trainin data
RF_classifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False)

In [71]:
y_pred_RF = RF_classifier.predict(X_test)

In [72]:
accuracy_RF5 = metrics.accuracy_score(y_test, y_pred_RF)
accuracy_RF5

0.9567901234567902

In [73]:
results = open("accuracy_RF5.txt","w")
results.write(str(accuracy_RF5))
results.close()

In [74]:
print("Classification report for classifier %s:\n%s\n"
      % (RF_classifier, metrics.classification_report(y_test, y_pred_RF, digits = 4)))

Classification report for classifier RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=19011954,
                       verbose=0, warm_start=False):
              precision    recall  f1-score   support

     cycling     0.9735    0.9074    0.9393       162
     driving     0.8807    0.9568    0.9172       162
     jogging     1.0000    1.0000    1.0000       162
    sleeping     0.9936    0.9630    0.9781       162
     walking     0.9451    0.9568    0.9509       162

    accuracy                         0.9568       810
   macro avg     0.9586    0.9568    0.9571       810
weighted avg     0.9586    0.956

In [75]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred_RF))

Confusion matrix:
[[147  13   0   1   1]
 [  2 155   0   0   5]
 [  0   0 162   0   0]
 [  1   2   0 156   3]
 [  1   6   0   0 155]]
