In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import preprocessing
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, LSTM, ConvLSTM2D
from keras.layers.wrappers import Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

Using TensorFlow backend.


In [2]:
# Activety types dict:
Activety_Types = {'dws':1,'jog':2,'sit':3,'std':4,'ups':5,'wlk':6}        
listDict = list(Activety_Types.keys())

In [3]:
# csv = 'input/A_DeviceMotion_data/dws_1/sub_1.csv'
        
# df = pd.read_csv(csv, index_col=0, parse_dates=True, date_parser=lambda epoch: pd.to_datetime(int(epoch) * 20, unit='ms'))
# df.plot(subplots=True)
# plt.show()

# desired_index = pd.date_range('1970-01-01', periods=len(df.index), freq='20ms')

# df2 = pd.read_csv(csv, index_col=0, parse_dates=True, date_parser=lambda epoch: pd.to_datetime(int(epoch) * 22, unit='ms'))

# df2 = df2.asfreq('20ms').interpolate().reindex_like(df)
# df2.plot(subplots=True)
# plt.show()
        

In [4]:
n_timesteps = 120
n_shift = 20
def add_batch(values):
    for start in range(0, len(values) - n_timesteps, n_shift):
        X.append(values[start : start + n_timesteps])
        y.append(activity)

In [5]:
# Data folders:
folders = glob('input/A_DeviceMotion_data/*_*')
folders = [s for s in folders if "csv" not in s]

X = list()
y = list()

# Load All data:
for j  in folders:
    csv = glob(j + '/*' )
    # Get activity type from folder name
    activity = Activety_Types[j[26:29]]

    for i in csv:
        df = pd.read_csv(i, index_col=0, parse_dates=True, date_parser=lambda epoch: pd.to_datetime(int(epoch) * 20, unit='ms'))

        add_batch(df.values)

        df_higher_freq = pd.read_csv(i, index_col=0, parse_dates=True, date_parser=lambda epoch: pd.to_datetime(int(epoch) * 18, unit='ms'))
        df_higher_freq = df_higher_freq.asfreq('20ms').interpolate().reindex_like(df).dropna()
        
        add_batch(df_higher_freq.values)

        df_lower_freq = pd.read_csv(i, index_col=0, parse_dates=True, date_parser=lambda epoch: pd.to_datetime(int(epoch) * 22, unit='ms'))
        df_lower_freq = df_lower_freq.asfreq('20ms').interpolate().reindex_like(df)
        
        add_batch(df_lower_freq.values)

X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [6]:
folders = glob('test/*')

X = list()
y = list()

# Load test data:
for j  in folders:
    csv = glob(j + '/*' )
    # Get activity type from folder name
    activity = Activety_Types[j[5:8]]

    for i in csv:
        df = pd.read_csv(i, index_col=0)
        add_batch(df.values)

X_test_new = np.array(X)
y_test_new = np.array(y)

In [7]:
scalers = {}
for i in range(X_train.shape[-1]):
    scalers[i] = preprocessing.StandardScaler()
    X_train[:, :, i] = scalers[i].fit_transform(X_train[:, :, i]) 

for i in range(X_test.shape[-1]):
    X_test[:, :, i] = scalers[i].transform(X_test[:, :, i])

for i in range(X_test_new.shape[-1]):
    X_test_new[:, :, i] = scalers[i].transform(X_test_new[:, :, i])

In [8]:
# zero-offset class values
y_train = y_train - 1
y_test = y_test - 1
y_test_new = y_test_new - 1
# one hot encode y
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_test_new = to_categorical(y_test_new)

# Shape of X: [samples, time steps, features]
n_features, n_outputs = X_train.shape[2], y_train.shape[1]

In [9]:
# reshape into subsequences (samples, time steps, rows, cols, channels)
n_steps, n_length = 4, 30
X_train = X_train.reshape((X_train.shape[0], n_steps, 1, n_length, n_features))
X_test = X_test.reshape((X_test.shape[0], n_steps, 1, n_length, n_features))
X_test_new = X_test_new.reshape((X_test_new.shape[0], n_steps, 1, n_length, n_features))

In [10]:
verbose, epochs, batch_size = 0, 15, 64
model = Sequential()

model.add(Bidirectional(ConvLSTM2D(16, 
                                       kernel_size = (1, 3),
                                       activation='relu',
                                       input_shape=(n_steps, 1, n_length, n_features),
                                       return_sequences = True)))
model.add(Bidirectional(ConvLSTM2D(32, 
                                       kernel_size = (3, 3),
                                       padding = 'same',
                                       return_sequences = True)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(100, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)


<keras.callbacks.callbacks.History at 0x216c590d1c8>

In [11]:
y_pred = model.predict(X_test, batch_size, use_multiprocessing=True)
y_pred_max = tf.argmax(y_pred, axis=1)
y_test_max = tf.argmax(y_test, axis=1)

confusion_matrix = tf.math.confusion_matrix(y_test_max, y_pred_max, n_outputs)
score = accuracy_score(y_test_max, y_pred_max)

print(confusion_matrix)
print(score)

tf.Tensor(
[[ 4379     7     0     0    41    16]
 [   24  4624     0     0    13     4]
 [    0     0 12081     0     0     0]
 [    3     0     0 10826     0     0]
 [   93     2     0     0  5232    44]
 [   22     0     0     3    26 12290]], shape=(6, 6), dtype=int32)
0.9940076412628193


In [12]:
y_pred_new = model.predict(X_test_new, batch_size, use_multiprocessing=True)
y_pred_new_max = tf.argmax(y_pred_new, axis=1)
y_test_new_max = tf.argmax(y_test_new, axis=1)

confusion_matrix = tf.math.confusion_matrix(y_test_new_max, y_pred_new_max, n_outputs)
score = accuracy_score(y_test_new_max, y_pred_new_max)

print(confusion_matrix)
print(score)

tf.Tensor(
[[89  0  0  0 23 16]
 [ 0  0  0  0  0  0]
 [11  3 80  0  6  0]
 [ 0  0  0 32  0  0]
 [ 6  0  0  0 98  9]
 [68  2 17  0 85 63]], shape=(6, 6), dtype=int32)
0.5953947368421053
