In [10]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

In [2]:
# Activety types dict:
Activety_Types = {'dws':1,'jog':2,'sit':3,'std':4,'ups':5,'wlk':6}        
listDict = list(Activety_Types.keys())

In [3]:
# Data folders:
folders = glob('input/A_DeviceMotion_data/*_*')
folders = [s for s in folders if "csv" not in s]

X = list()
y = list()
n_timesteps = 100

# Load All data:
for j  in folders:
    csv = glob(j + '/*' )
    # Get activity type from folder name
    activity = Activety_Types[j[26:29]]

    for i in csv:
        df = pd.read_csv(i, index_col=0).to_numpy()
        
        # Split data into samples of equal number of time steps
        split_positions = list(range(n_timesteps, len(df), n_timesteps))
        df = np.vsplit(df, split_positions)

        # Make sure all samples are of equal size
        if (len(df[-1]) < n_timesteps):
            df = df[:-1]

        X.extend(df)
        y.extend(np.full(len(df), activity))

X_train = np.array(X)
y_train = np.array(y)

In [4]:
folders = glob('test/*')

X = list()
y = list()
n_timesteps = 100

# Load test data:
for j  in folders:
    csv = glob(j + '/*' )
    # Get activity type from folder name
    activity = Activety_Types[j[5:8]]

    for i in csv:
        df = pd.read_csv(i, index_col=0).to_numpy()
        
        # Split data into samples of equal number of time steps
        split_positions = list(range(n_timesteps, len(df), n_timesteps))
        df = np.vsplit(df, split_positions)

        # Make sure all samples are of equal size
        if (len(df[-1]) < n_timesteps):
            df = df[:-1]

        X.extend(df)
        y.extend(np.full(len(df), activity))

X_test = np.array(X)
y_test = np.array(y)

In [5]:
# zero-offset class values
y_train = y_train - 1
y_test = y_test - 1
# one hot encode y
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Shape of X: [samples, time steps, features]
n_features, n_outputs = X_train.shape[2], y_train.shape[1]

In [7]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(13952, 100, 12) (121, 100, 12) (13952, 6) (121, 6)


In [8]:
verbose, epochs, batch_size = 0, 15, 64
model = Sequential()
model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
y_pred = model.predict(X_test, batch_size, use_multiprocessing=True)


In [17]:
y_pred_max = tf.argmax(y_pred, axis=1)
y_test_max = tf.argmax(y_test, axis=1)

confusion_matrix = tf.math.confusion_matrix(y_test_max, y_pred_max, n_outputs)
score = accuracy_score(y_test_max, y_pred_max)

print(confusion_matrix)
print(score)

tf.Tensor(
[[14  0  0  0 10  2]
 [ 0  0  0  0  0  0]
 [ 3  0 18  0  0  0]
 [ 0  0  0  7  0  0]
 [ 0  0  0  0 15  8]
 [ 7  0  1  0 25 11]], shape=(6, 6), dtype=int32)
0.5371900826446281
