In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, plot_confusion_matrix
from sklearn.decomposition import PCA
import keras
from keras.utils import to_categorical
from keras.models import Sequential, Model
from keras.layers import Dense, Conv1D, Flatten, concatenate, Input

Using TensorFlow backend.


In [2]:
from dataset import HARDatasetCrops

## Data

### Features

In [3]:
dataset = HARDatasetCrops('motionsense-dataset/train', 256, 50, 50)

In [4]:
print('Dataset size:', len(dataset))

sample, _ = dataset[0]
print('Datapoints shape:', sample.shape)

Dataset size: 4095
Datapoints shape: (256, 12)


Our dataset is composed of 5205 datapoints, each have shape `(256, 12)` because we have 12 signals of 256 samples each.

For our base line model we will use the **user-acceleration**.

In [5]:
dataset = HARDatasetCrops('motionsense-dataset/train', 256, 50, 50, metadata_file='motionsense-dataset/data_subjects_info.csv')

In [6]:
X = np.array([sample[:,-3:] for sample, _, _ in dataset])

In [7]:
metadata = np.array([metadata for _, _, metadata in dataset])
metadata = metadata.reshape((-1, 4, 1))

### Classes

In order to fit a `sklearn` model we should encode each class with a unique integer:

In [8]:
label_encoder = sklearn.preprocessing.LabelEncoder()
label_encoder.fit(list(dataset.CLASSES.keys()));

In [11]:
y = to_categorical(label_encoder.transform([cls for _, cls, _ in dataset]))

Train-test splitting

In [12]:
indices = np.random.choice(np.arange(X.shape[0]), X.shape[0], replace=False)

In [13]:
train_indices = indices[:int(X.shape[0] * 0.8)]
test_indices = indices[int(X.shape[0] * 0.8):]

In [14]:
X_train, y_train = X[train_indices], y[train_indices]
X_test, y_test = X[test_indices], y[test_indices]
metadata_train = metadata[train_indices]
metadata_test = metadata[test_indices]

In [15]:
print("X train shape: {}", X_train.shape)
print("y train shape: {}", y_train.shape)
print("X test shape: {}", X_test.shape)
print("y test shape: {}", y_test.shape)
print("metdata train shape: {}", metadata_train.shape)
print("metdata test shape: {}", metadata_test.shape)

X train shape: {} (3276, 256, 3)
y train shape: {} (3276, 6)
X test shape: {} (819, 256, 3)
y test shape: {} (819, 6)
metdata train shape: {} (3276, 4, 1)
metdata test shape: {} (819, 4, 1)


In [16]:
print('Number of training examples:', y_train.shape[0])
print('Number of testing examples:', y_test.shape[0])

Number of training examples: 3276
Number of testing examples: 819


## CNN

In [17]:
#create model
clf = Sequential()
#add model layers
clf.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 3)))
clf.add(Conv1D(32, kernel_size=5, activation="relu"))
clf.add(Flatten())
clf.add(Dense(6, activation="softmax", kernel_regularizer=keras.regularizers.l2(0.01)))

In [18]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

clf.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x1117b4250>

In [19]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict(X_train), axis=1), target_names=label_encoder.classes_))

print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict(X_test), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.99      1.00      0.99       298
         jog       1.00      1.00      1.00       304
         sit       0.86      0.98      0.92       788
         std       0.97      0.83      0.90       722
         ups       1.00      1.00      1.00       351
         wlk       1.00      0.99      1.00       813

    accuracy                           0.96      3276
   macro avg       0.97      0.97      0.97      3276
weighted avg       0.96      0.96      0.96      3276

              precision    recall  f1-score   support

         dws       0.81      0.89      0.84        61
         jog       0.99      0.99      0.99        80
         sit       0.89      0.98      0.93       216
         std       0.97      0.85      0.91       178
         ups       0.84      0.81      0.83        85
         wlk       0.94      0.92      0.93       199

    accuracy                           0.92       819
   macro avg       0.91

## Metadata

In [20]:
# create model
conv = Sequential()
# add model layers
conv.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 3)))
conv.add(Conv1D(32, kernel_size=5, activation="relu"))
conv.add(Flatten(name="coefs"))

metadata_input_tensor = Input(shape=(4, 1))
metadata_input = Flatten(name="flatten")(metadata_input_tensor)

last_layer = conv.get_layer("coefs").output
x = concatenate([last_layer, metadata_input], axis=1)
out = Dense(6, activation="softmax")(x)

clf = Model([conv.input, metadata_input_tensor], out)

In [21]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

clf.fit([X_train, metadata_train], y_train, validation_data=([X_test, metadata_test], y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x13f1a5290>

In [22]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict([X_train, metadata_train]), axis=1), target_names=label_encoder.classes_))

print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict([X_test, metadata_test]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      1.00      1.00       298
         jog       1.00      1.00      1.00       304
         sit       0.99      0.93      0.96       788
         std       0.93      0.99      0.96       722
         ups       1.00      1.00      1.00       351
         wlk       1.00      1.00      1.00       813

    accuracy                           0.98      3276
   macro avg       0.99      0.99      0.99      3276
weighted avg       0.98      0.98      0.98      3276

              precision    recall  f1-score   support

         dws       0.80      0.64      0.71        61
         jog       0.96      1.00      0.98        80
         sit       0.97      0.90      0.94       216
         std       0.89      0.97      0.93       178
         ups       0.71      0.76      0.74        85
         wlk       0.90      0.91      0.91       199

    accuracy                           0.89       819
   macro avg       0.87

## All signals

In [23]:
X = np.array([sample for sample, _, _ in dataset])

X_train = X[train_indices]
X_test = X[test_indices]

In [24]:
print("X train shape: {}", X_train.shape)
print("y train shape: {}", y_train.shape)
print("X test shape: {}", X_test.shape)
print("y test shape: {}", y_test.shape)
print("metdata train shape: {}", metadata_train.shape)
print("metdata test shape: {}", metadata_test.shape)

X train shape: {} (3276, 256, 12)
y train shape: {} (3276, 6)
X test shape: {} (819, 256, 12)
y test shape: {} (819, 6)
metdata train shape: {} (3276, 4, 1)
metdata test shape: {} (819, 4, 1)


In [25]:
#create model
clf = Sequential()
#add model layers
clf.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 12)))
clf.add(Conv1D(32, kernel_size=5, activation="relu"))
clf.add(Flatten())
clf.add(Dense(6, activation="softmax", kernel_regularizer=keras.regularizers.l2(0.01)))

In [26]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

clf.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x13cd281d0>

In [27]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict(X_train), axis=1), target_names=label_encoder.classes_))

print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict(X_test), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      1.00      1.00       298
         jog       1.00      1.00      1.00       304
         sit       1.00      1.00      1.00       788
         std       1.00      1.00      1.00       722
         ups       1.00      1.00      1.00       351
         wlk       1.00      1.00      1.00       813

    accuracy                           1.00      3276
   macro avg       1.00      1.00      1.00      3276
weighted avg       1.00      1.00      1.00      3276

              precision    recall  f1-score   support

         dws       1.00      0.93      0.97        61
         jog       0.99      1.00      0.99        80
         sit       1.00      1.00      1.00       216
         std       1.00      1.00      1.00       178
         ups       0.99      0.93      0.96        85
         wlk       0.96      0.99      0.98       199

    accuracy                           0.99       819
   macro avg       0.99

## Metadata

In [34]:
from keras.layers import Dropout

In [35]:
# create model
conv = Sequential()
# add model layers
conv.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 12)))
conv.add(Conv1D(32, kernel_size=5, activation="relu"))
conv.add(Flatten(name="coefs"))

metadata_input_tensor = Input(shape=(4, 1))
metadata_input = Flatten(name="flatten")(metadata_input_tensor)

last_layer = conv.get_layer("coefs").output
x = concatenate([last_layer, metadata_input], axis=1)
out = Dense(6, activation="softmax")(Dropout(0.5)(x))

clf = Model([conv.input, metadata_input_tensor], out)

In [36]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

clf.fit([X_train, metadata_train], y_train, validation_data=([X_test, metadata_test], y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x13efc1e10>

In [37]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict([X_train, metadata_train]), axis=1), target_names=label_encoder.classes_))

print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict([X_test, metadata_test]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      1.00      1.00       298
         jog       1.00      1.00      1.00       304
         sit       1.00      1.00      1.00       788
         std       1.00      1.00      1.00       722
         ups       1.00      1.00      1.00       351
         wlk       1.00      1.00      1.00       813

    accuracy                           1.00      3276
   macro avg       1.00      1.00      1.00      3276
weighted avg       1.00      1.00      1.00      3276

              precision    recall  f1-score   support

         dws       1.00      0.95      0.97        61
         jog       1.00      0.97      0.99        80
         sit       1.00      1.00      1.00       216
         std       1.00      1.00      1.00       178
         ups       0.93      0.94      0.94        85
         wlk       0.97      0.98      0.98       199

    accuracy                           0.98       819
   macro avg       0.98

In [46]:
y_true = np.argmax(y_test, axis=1)
correct = (y_true == np.argmax(clf.predict([X_test, metadata_test]), axis=1)).sum()

In [47]:
from statsmodels.stats.proportion import proportion_confint

In [55]:
inf, sup = proportion_confint(correct, len(y_true))
print("Accuracy: {} +- {}".format(correct / len(y_true), (sup - inf) / 2))

Accuracy: 0.9841269841269841 +- 0.00855975699251288
