## How to Develop RNN Models for Human Activity Recognition Time Series Classification

Based on Jason Brownlee's MachineLearningMastery article:
https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/

UCI HAR Dataset can be found in our google drive: https://drive.google.com/open?id=1leUN60nh7FJGdtMrUpjSBvUS34OkUSZy

In [41]:
import numpy as np
import pandas as pd
import warnings
from keras.utils import to_categorical
warnings.filterwarnings("ignore")

Using TensorFlow backend.


Put the Dataset in a Data folder, which should be included in your .gitignore!!!
I used drone_steering/Tutorial_DataSets/

In [28]:
data_dir = '../../Tutorial_DataSets/'

#### Labels
1) Walking
<br>2) Walking Upstairs
<br>3) Walking Downstairs
<br>4) Sitting
<br>5) Standing
<br>6) Laying

## I. Investigate Basic File Structure

In [34]:
import os
os.listdir(data_dir + 'HARDataset/UCI HAR Dataset/train/Inertial Signals/')

['body_acc_x_train.txt',
 'body_acc_y_train.txt',
 'body_acc_z_train.txt',
 'body_gyro_x_train.txt',
 'body_gyro_y_train.txt',
 'body_gyro_z_train.txt',
 'total_acc_x_train.txt',
 'total_acc_y_train.txt',
 'total_acc_z_train.txt']

In [61]:
# inspect the data files (there are 9 files in total, one for each feature; here investigate only 2 of them)
total_acc_x_train = pd.read_csv(
    data_dir + 'HARDataset/UCI HAR Dataset/train/Inertial Signals/total_acc_x_train.txt', 
    header = None,
    delim_whitespace = True
)
total_acc_y_train = pd.read_csv(
    data_dir + 'HARDataset/UCI HAR Dataset/train/Inertial Signals/total_acc_y_train.txt', 
    header = None,
    delim_whitespace = True
)
print(total_acc_x_train.shape)
print(total_acc_y_train.shape)

(7352, 128)
(7352, 128)


In [62]:
stackem = np.dstack([total_acc_x_train,total_acc_y_train])
print(stackem.shape)

(7352, 128, 2)


## II. Load Data

In [26]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

In [39]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
    
    loaded = list()
    
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = np.dstack(loaded)
    
    return loaded

In [12]:
# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
    
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
    
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
    
    # load input data
    X = load_group(filenames, filepath)
    
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    
    return X, y

In [117]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=data_dir):
    
    # load all train
    trainX, trainy = load_dataset_group('train', prefix + 'HARDataset/UCI HAR Dataset/')
    print(trainX.shape, trainy.shape)
    
    # load all test
    testX, testy = load_dataset_group('test', prefix + 'HARDataset/UCI HAR Dataset/')
    print(testX.shape, testy.shape)
    
    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    
    return trainX, trainy, testX, testy

In [42]:
# each of the 9 features was splitted into fixed windows of 2.56 seconds (128 data points), with 50% overlap
trainX, trainy, testX, testy = load_dataset(data_dir)

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 128, 9) (7352, 6) (2947, 128, 9) (2947, 6)


In [59]:
# overlap
trainX[0,64:,0] == trainX[1,:64,0] # 1st feature, 1st & 2nd sample

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

## III. Fitting & Evaluating

In [67]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM

In [111]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    
    verbose, epochs, batch_size = 1, 15, 64
    n_timesteps = trainX.shape[1] # here 128
    n_features = trainX.shape[2] # here 9
    n_outputs =  trainy.shape[1] # here 6 (number of labels)
    
    model = Sequential()
    model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
    )
    
    # fit network
    model.fit(
        trainX, 
        trainy, 
        epochs=epochs, 
        batch_size=batch_size, 
        verbose=verbose
    )
    
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    
    return accuracy

In [110]:
verbose, epochs, batch_size = 0, 15, 64

n_timesteps = trainX.shape[1] # here 128
n_features = trainX.shape[2] # here 9
n_outputs =  trainy.shape[1] # here 6 (number of labels)


model = Sequential()
model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))

model.compile(
        loss='categorical_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
    )

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 100)               44000     
_________________________________________________________________
dropout_4 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 6)                 606       
Total params: 54,706
Trainable params: 54,706
Non-trainable params: 0
_________________________________________________________________


In [112]:
model.fit(
        trainX, 
        trainy, 
        epochs=epochs, 
        batch_size=batch_size, 
        verbose=1
    )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1f662365ef0>

## IV. Experiment

In [119]:
# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = np.mean(scores), np.std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [115]:
# run an experiment
def run_experiment(repeats=10):
    
    # load data
    trainX, trainy, testX, testy = load_dataset()
    
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(trainX, trainy, testX, testy)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    
    # summarize results
    summarize_results(scores)

In [120]:
# !!! THIS TAKES A WHILE !!!
# run the experiment
run_experiment(2)

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 128, 9) (7352, 6) (2947, 128, 9) (2947, 6)
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
>#1: 89.922
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
>#2: 90.567
[89.92195453003053, 90.56667797760434]
Accuracy: 90.244% (+/-0.322)
