In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, plot_confusion_matrix
from sklearn.decomposition import PCA
import numpy as np

In [2]:
from dataset import HARDatasetCrops

## Data

### Features

In [3]:
dataset = HARDatasetCrops('motionsense-dataset/train', 256, 50, 50)

In [4]:
print('Dataset size:', len(dataset))

sample, _ = dataset[0]
print('Datapoints shape:', sample.shape)

Dataset size: 4095
Datapoints shape: (256, 12)


Our dataset is composed of 5205 datapoints, each have shape `(256, 12)` because we have 12 signals of 256 samples each.

For our base line model we will use the **user-acceleration**.

In [10]:
dataset = HARDatasetCrops('motionsense-dataset/train', 256, 50, 50, metadata_file='motionsense-dataset/data_subjects_info.csv')

In [11]:
X = np.array([sample[:,-3:] for sample, _, _ in dataset])

In [12]:
metadata = np.array([metadata for _, _, metadata in dataset])
metadata = metadata.reshape((-1, 4, 1))

### Classes

In order to fit a `sklearn` model we should encode each class with a unique integer:

In [7]:
label_encoder = sklearn.preprocessing.LabelEncoder()
label_encoder.fit(list(dataset.CLASSES.keys()));

In [8]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [9]:
y = to_categorical(label_encoder.transform([cls for _, cls in dataset]))

Train-test splitting

In [13]:
indices = np.random.choice(np.arange(X.shape[0]), X.shape[0], replace=False)

In [14]:
train_indices = indices[:int(X.shape[0] * 0.8)]
test_indices = indices[int(X.shape[0] * 0.8):]

In [15]:
X_train, y_train = X[train_indices], y[train_indices]
X_test, y_test = X[test_indices], y[test_indices]
metadata_train = metadata[train_indices]
metadata_test = metadata[test_indices]

In [16]:
print("X train shape: {}", X_train.shape)
print("y train shape: {}", y_train.shape)
print("X test shape: {}", X_test.shape)
print("y test shape: {}", y_test.shape)
print("metdata train shape: {}", metadata_train.shape)
print("metdata test shape: {}", metadata_test.shape)

X train shape: {} (3276, 256, 3)
y train shape: {} (3276, 6)
X test shape: {} (819, 256, 3)
y test shape: {} (819, 6)
metdata train shape: {} (3276, 4, 1)
metdata test shape: {} (819, 4, 1)


In [17]:
print('Number of training examples:', y_train.shape[0])
print('Number of testing examples:', y_test.shape[0])

Number of training examples: 3276
Number of testing examples: 819


## CNN

In [18]:
import keras

In [19]:
from keras.models import Sequential, Model
from keras.layers import Dense, Conv1D, Flatten, concatenate, Input

In [20]:
#create model
clf = Sequential()
#add model layers
clf.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 3)))
clf.add(Conv1D(32, kernel_size=5, activation="relu"))
clf.add(Flatten())
clf.add(Dense(6, activation="softmax", kernel_regularizer=keras.regularizers.l2(0.01)))

In [21]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [22]:
clf.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x143f97350>

In [23]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict(X_train), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      0.99      0.99       284
         jog       1.00      1.00      1.00       307
         sit       0.87      0.97      0.91       794
         std       0.96      0.84      0.89       726
         ups       0.99      1.00      0.99       343
         wlk       1.00      0.99      0.99       822

    accuracy                           0.95      3276
   macro avg       0.97      0.96      0.96      3276
weighted avg       0.96      0.95      0.95      3276



In [24]:
print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict(X_test), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.81      0.77      0.79        75
         jog       0.96      0.95      0.95        77
         sit       0.89      0.94      0.92       210
         std       0.91      0.86      0.89       174
         ups       0.76      0.69      0.72        93
         wlk       0.86      0.91      0.88       190

    accuracy                           0.87       819
   macro avg       0.87      0.85      0.86       819
weighted avg       0.87      0.87      0.87       819



## Metadata

In [25]:
# create model
conv = Sequential()
# add model layers
conv.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 3)))
conv.add(Conv1D(32, kernel_size=5, activation="relu"))
conv.add(Flatten(name="coefs"))

metadata_input_tensor = Input(shape=(4, 1))
metadata_input = Flatten(name="flatten")(metadata_input_tensor)

last_layer = conv.get_layer("coefs").output
x = concatenate([last_layer, metadata_input], axis=1)
out = Dense(6, activation="softmax")(x)

clf = Model([conv.input, metadata_input_tensor], out)

In [26]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [27]:
clf.fit([X_train, metadata_train], y_train, validation_data=([X_test, metadata_test], y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x122d27d90>

In [28]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict([X_train, metadata_train]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      1.00      1.00       284
         jog       1.00      1.00      1.00       307
         sit       0.97      0.99      0.98       794
         std       0.99      0.96      0.98       726
         ups       1.00      1.00      1.00       343
         wlk       1.00      1.00      1.00       822

    accuracy                           0.99      3276
   macro avg       0.99      0.99      0.99      3276
weighted avg       0.99      0.99      0.99      3276



In [29]:
print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict([X_test, metadata_test]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.75      0.68      0.71        75
         jog       0.91      0.96      0.94        77
         sit       0.98      0.98      0.98       210
         std       0.97      0.97      0.97       174
         ups       0.68      0.66      0.67        93
         wlk       0.83      0.85      0.84       190

    accuracy                           0.88       819
   macro avg       0.85      0.85      0.85       819
weighted avg       0.88      0.88      0.88       819



## All signals

In [34]:
X = np.array([sample for sample, _, _ in dataset])

X_train = X[train_indices]
X_test = X[test_indices]

In [36]:
print("X train shape: {}", X_train.shape)
print("y train shape: {}", y_train.shape)
print("X test shape: {}", X_test.shape)
print("y test shape: {}", y_test.shape)
print("metdata train shape: {}", metadata_train.shape)
print("metdata test shape: {}", metadata_test.shape)

X train shape: {} (3276, 256, 12)
y train shape: {} (3276, 6)
X test shape: {} (819, 256, 12)
y test shape: {} (819, 6)
metdata train shape: {} (3276, 4, 1)
metdata test shape: {} (819, 4, 1)


In [20]:
#create model
clf = Sequential()
#add model layers
clf.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 12)))
clf.add(Conv1D(32, kernel_size=5, activation="relu"))
clf.add(Flatten())
clf.add(Dense(6, activation="softmax", kernel_regularizer=keras.regularizers.l2(0.01)))

In [21]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [22]:
clf.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x143f97350>

In [23]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict(X_train), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      0.99      0.99       284
         jog       1.00      1.00      1.00       307
         sit       0.87      0.97      0.91       794
         std       0.96      0.84      0.89       726
         ups       0.99      1.00      0.99       343
         wlk       1.00      0.99      0.99       822

    accuracy                           0.95      3276
   macro avg       0.97      0.96      0.96      3276
weighted avg       0.96      0.95      0.95      3276



In [24]:
print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict(X_test), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.81      0.77      0.79        75
         jog       0.96      0.95      0.95        77
         sit       0.89      0.94      0.92       210
         std       0.91      0.86      0.89       174
         ups       0.76      0.69      0.72        93
         wlk       0.86      0.91      0.88       190

    accuracy                           0.87       819
   macro avg       0.87      0.85      0.86       819
weighted avg       0.87      0.87      0.87       819



## Metadata

In [25]:
# create model
conv = Sequential()
# add model layers
conv.add(Conv1D(16, kernel_size=5, activation="relu", input_shape=(256, 3)))
conv.add(Conv1D(32, kernel_size=5, activation="relu"))
conv.add(Flatten(name="coefs"))

metadata_input_tensor = Input(shape=(4, 1))
metadata_input = Flatten(name="flatten")(metadata_input_tensor)

last_layer = conv.get_layer("coefs").output
x = concatenate([last_layer, metadata_input], axis=1)
out = Dense(6, activation="softmax")(x)

clf = Model([conv.input, metadata_input_tensor], out)

In [26]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [27]:
clf.fit([X_train, metadata_train], y_train, validation_data=([X_test, metadata_test], y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x122d27d90>

In [28]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict([X_train, metadata_train]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       1.00      1.00      1.00       284
         jog       1.00      1.00      1.00       307
         sit       0.97      0.99      0.98       794
         std       0.99      0.96      0.98       726
         ups       1.00      1.00      1.00       343
         wlk       1.00      1.00      1.00       822

    accuracy                           0.99      3276
   macro avg       0.99      0.99      0.99      3276
weighted avg       0.99      0.99      0.99      3276



In [29]:
print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict([X_test, metadata_test]), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.75      0.68      0.71        75
         jog       0.91      0.96      0.94        77
         sit       0.98      0.98      0.98       210
         std       0.97      0.97      0.97       174
         ups       0.68      0.66      0.67        93
         wlk       0.83      0.85      0.84       190

    accuracy                           0.88       819
   macro avg       0.85      0.85      0.85       819
weighted avg       0.88      0.88      0.88       819



## LSTM

In [39]:
X = np.array([sample[:,-3:] for sample, _, _ in dataset])

X_train, y_train = X[train_indices], y[train_indices]
X_test, y_test = X[test_indices], y[test_indices]
metadata_train = metadata[train_indices]
metadata_test = metadata[test_indices]

In [40]:
from keras.layers import LSTM

In [44]:
#create model
clf = Sequential()
#add model layers
clf.add(LSTM(128, input_shape=(256, 3)))
clf.add(Dense(6, activation="softmax", kernel_regularizer=keras.regularizers.l2(0.01)))

In [45]:
clf.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [46]:
clf.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30)

Train on 3276 samples, validate on 819 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x1253aeb50>

In [47]:
print(classification_report(np.argmax(y_train, axis=1), np.argmax(clf.predict(X_train), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.00      0.00      0.00       284
         jog       1.00      0.28      0.44       307
         sit       0.35      1.00      0.52       794
         std       0.00      0.00      0.00       726
         ups       0.00      0.00      0.00       343
         wlk       0.57      0.65      0.61       822

    accuracy                           0.43      3276
   macro avg       0.32      0.32      0.26      3276
weighted avg       0.32      0.43      0.32      3276



  _warn_prf(average, modifier, msg_start, len(result))


In [48]:
print(classification_report(np.argmax(y_test, axis=1), np.argmax(clf.predict(X_test), axis=1), target_names=label_encoder.classes_))

              precision    recall  f1-score   support

         dws       0.00      0.00      0.00        75
         jog       1.00      0.19      0.33        77
         sit       0.37      1.00      0.54       210
         std       0.00      0.00      0.00       174
         ups       0.00      0.00      0.00        93
         wlk       0.53      0.68      0.60       190

    accuracy                           0.43       819
   macro avg       0.32      0.31      0.24       819
weighted avg       0.31      0.43      0.31       819

