# HAR prediction with Raw data

> Raw data signals<br>
> Signals are from Accelerometer and Gyroscope<br>
> The signals are in x,y,z directions<br>
> Sensor signals are filtered to have only body acceleration<br>
> excluding the acceleration due to gravity <br>
> Triaxial acceleration from the accelerometer is total acceleration

In [4]:
import pandas as pd
import numpy as np

In [5]:
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

### Data


In [6]:
# Data directory
DATADIR = 'UCI HAR Dataset'

In [8]:
# SINGALS contains all 9 readings
SIGNALS = ["body_acc_x","body_acc_y", "body_acc_z", "body_gyro_x", "body_gyro_y", "body_gyro_z", "total_acc_x", "total_acc_y", "total_acc_z"]

In [41]:
def _read_csv(filename):
    print(filename)
    return pd.read_csv(filename, delim_whitespace=True, header=None)

def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = 'UCI HAR Dataset/'+subset+'/Inertial Signals/' + signal +'_'+subset+'.txt'
        signals_data.append(_read_csv(filename).as_matrix()) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))


def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = 'UCI HAR Dataset/' + subset + '/y_'+subset+'.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()


def load_data():
     
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [42]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt


  # Remove the CWD from sys.path while we load stuff.


UCI HAR Dataset/train/Inertial Signals/body_acc_y_train.txt
UCI HAR Dataset/train/Inertial Signals/body_acc_z_train.txt
UCI HAR Dataset/train/Inertial Signals/body_gyro_x_train.txt
UCI HAR Dataset/train/Inertial Signals/body_gyro_y_train.txt
UCI HAR Dataset/train/Inertial Signals/body_gyro_z_train.txt
UCI HAR Dataset/train/Inertial Signals/total_acc_x_train.txt
UCI HAR Dataset/train/Inertial Signals/total_acc_y_train.txt
UCI HAR Dataset/train/Inertial Signals/total_acc_z_train.txt
UCI HAR Dataset/test/Inertial Signals/body_acc_x_test.txt
UCI HAR Dataset/test/Inertial Signals/body_acc_y_test.txt
UCI HAR Dataset/test/Inertial Signals/body_acc_z_test.txt
UCI HAR Dataset/test/Inertial Signals/body_gyro_x_test.txt
UCI HAR Dataset/test/Inertial Signals/body_gyro_y_test.txt
UCI HAR Dataset/test/Inertial Signals/body_gyro_z_test.txt
UCI HAR Dataset/test/Inertial Signals/total_acc_x_test.txt
UCI HAR Dataset/test/Inertial Signals/total_acc_y_test.txt
UCI HAR Dataset/test/Inertial Signals/total_a



In [43]:
X_train.shape

(7352, 128, 9)

In [28]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout

Using TensorFlow backend.


In [29]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [30]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [39]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [32]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [33]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [34]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fc165e99be0>

In [35]:
# Confusion Matrix

def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 536        1         0        0                   0   
SITTING                  0      374       114        1                   0   
STANDING                 0       95       435        2                   0   
WALKING                  0        0         0      471                   5   
WALKING_DOWNSTAIRS       0        0         0        3                 414   
WALKING_UPSTAIRS         0        1         0       18                  20   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            2  
STANDING                           0  
WALKING                           20  
WALKING_DOWNSTAIRS                 3  
WALKING_UPSTAIRS                 432  


In [36]:
score = model.evaluate(X_test, Y_test)



In [37]:
score

[0.29589248642329097, 0.9032914828639295]

- With a simple 2 layer architecture we got 90.09% accuracy and a loss of 0.30
- We can further imporve the performace with Hyperparameter tuning