In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

Using TensorFlow backend.


In [2]:
# Activety types dict:
Activety_Types = {'dws':1,'jog':2,'sit':3,'std':4,'ups':5,'wlk':6}        
listDict = list(Activety_Types.keys())

In [3]:
# Data folders:
folders = glob('input/A_DeviceMotion_data/*_*')
folders = [s for s in folders if "csv" not in s]

X = list()
y = list()
n_timesteps = 100

# Load All data:
for j  in folders:
    csv = glob(j + '/*' )
    # Get activity type from folder name
    activity = Activety_Types[j[26:29]]

    for i in csv:
        df = pd.read_csv(i, index_col=0).to_numpy()
        
        # Split data into samples of equal number of time steps
        split_positions = list(range(n_timesteps, len(df), n_timesteps))
        df = np.vsplit(df, split_positions)

        # Make sure all samples are of equal size
        if (len(df[-1]) < n_timesteps):
            df = df[:-1]

        X.extend(df)
        y.extend(np.full(len(df), activity))

X = np.array(X)
y = np.array(y)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

ups_y_train = [i for i, x in enumerate(y_train) if x == Activety_Types['ups']]
X_train = [x for i, x in enumerate(X_train) if i not in ups_y_train]
y_train = [x for i, x in enumerate(y_train) if x != Activety_Types['ups']]

X_train = np.array(X_train)
y_train = np.array(y_train)

# zero-offset class values
y_train = y_train - 1
y_test = y_test - 1
# one hot encode y
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Shape of X: [samples, time steps, features]
n_features, n_outputs = X_train.shape[2], y_train.shape[1]

In [5]:
print(X.shape, y.shape)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(13952, 100, 12) (13952,)
(10464, 100, 12) (3488, 100, 12) (10464, 6) (3488, 6)


In [6]:
verbose, epochs, batch_size = 0, 15, 64
model = Sequential()
model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
y_pred = model.predict(X_test, batch_size, use_multiprocessing=True)

In [7]:
y_pred_max = tf.argmax(y_pred, axis=1)
y_test_max = tf.argmax(y_test, axis=1)
confusion_matrix = tf.math.confusion_matrix(y_test_max, y_pred_max, n_outputs)

print(confusion_matrix)

tf.Tensor(
[[289   2   0   1  20  11]
 [  2 320   0   0   0   3]
 [  0   0 839   0   0   0]
 [  1   0   0 783   1   1]
 [ 10   0   0   2 381  10]
 [  2   0   0   0  10 800]], shape=(6, 6), dtype=int32)


In [69]:
threshold = 0.98

def to_class(pred):
    p_class = np.max(pred)
    if (p_class > threshold):
        return np.argmax(pred)
    
    return len(pred)

y_pred_cutoff = tf.convert_to_tensor(list(map(to_class, y_pred)))

confusion_matrix_cutoff = tf.math.confusion_matrix(y_test_max, y_pred_cutoff, n_outputs+1)

print(confusion_matrix_cutoff)

tf.Tensor(
[[238   0   0   0   0  22  63]
 [  1 318   0   0   0   1   5]
 [  0   0 839   0   0   0   0]
 [  0   0   0 782   0   1   3]
 [ 21   6   0  14   0 143 219]
 [  0   0   0   0   0 779  33]
 [  0   0   0   0   0   0   0]], shape=(7, 7), dtype=int32)
