In [0]:
# Importing Libraries

In [0]:
import pandas as pd
import numpy as np
np.random.seed(42)

In [0]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### Data

In [0]:
# Data directory
DATADIR = '/content/drive/My Drive/HAR/UCI_HAR_Dataset'

In [0]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [0]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'/content/drive/My Drive/HAR/UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [0]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'/content/drive/My Drive/HAR/UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [0]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [9]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [0]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [11]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


In [0]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM, BatchNormalization, Flatten
from keras.layers.core import Dense, Dropout

In [0]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [15]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


In [16]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [0]:
data_disp = {}

- Defining the Architecture of LSTM

One LSTM + Dropout(50%)

In [96]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(40, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))

# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='softmax'))
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_47 (LSTM)               (None, 40)                8000      
_________________________________________________________________
dropout_46 (Dropout)         (None, 40)                0         
_________________________________________________________________
dense_30 (Dense)             (None, 6)                 246       
Total params: 8,246
Trainable params: 8,246
Non-trainable params: 0
_________________________________________________________________


Batch Size = 8

In [83]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=8,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6bdb1240>

In [84]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,537,0,0,0,0,0
SITTING,5,359,124,1,0,2
STANDING,0,48,478,6,0,0
WALKING,0,0,1,467,16,12
WALKING_DOWNSTAIRS,0,0,6,4,406,4
WALKING_UPSTAIRS,0,1,5,7,1,457


In [85]:
score = model.evaluate(X_test, Y_test)
score



[0.2727017437300063, 0.9175432643366135]

In [0]:
data_disp['1LSTM_8batch'] = score

Batch SIze = 16

In [87]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=16,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6bd1ec18>

In [91]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,536,0,0,0,0,1
SITTING,3,419,68,1,0,0
STANDING,0,132,399,1,0,0
WALKING,0,0,0,468,8,20
WALKING_DOWNSTAIRS,0,0,0,0,398,22
WALKING_UPSTAIRS,0,0,0,18,0,453


In [92]:
score = model.evaluate(X_test, Y_test)
score



[0.2531991467388801, 0.9070240922972514]

In [0]:
data_disp['1LSTM_16batch'] = score

Batch Size = 32

In [97]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=32,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6b16ada0>

In [98]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,518,0,0,16,0,3
SITTING,2,313,173,0,0,3
STANDING,0,39,491,2,0,0
WALKING,0,2,18,441,9,26
WALKING_DOWNSTAIRS,0,0,0,1,379,40
WALKING_UPSTAIRS,0,0,0,14,3,454


In [99]:
score = model.evaluate(X_test, Y_test)
score



[0.31892726644301617, 0.8808958262639973]

In [0]:
data_disp['1LSTM_32batch'] = score

One LSTM + Dropout(30%)

In [115]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(36, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.3))

# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='softmax'))
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "sequential_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_57 (LSTM)               (None, 36)                6624      
_________________________________________________________________
dropout_56 (Dropout)         (None, 36)                0         
_________________________________________________________________
dense_37 (Dense)             (None, 6)                 222       
Total params: 6,846
Trainable params: 6,846
Non-trainable params: 0
_________________________________________________________________


Batch Size = 8

In [102]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=8,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6b12fd68>

In [103]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,517,0,0,0,0,20
SITTING,0,376,107,1,0,7
STANDING,0,96,430,3,0,3
WALKING,0,0,0,460,12,24
WALKING_DOWNSTAIRS,0,0,0,0,415,5
WALKING_UPSTAIRS,0,0,0,2,0,469


In [104]:
score = model.evaluate(X_test, Y_test)
score



[0.25635178967629957, 0.9049881235154394]

In [0]:
data_disp['1LSTM_Drop3_8batch'] = score

Batch size = 16

In [116]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=16,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6a6c8e80>

In [117]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,536,0,1,0,0,0
SITTING,0,385,102,1,0,3
STANDING,0,99,424,9,0,0
WALKING,0,0,0,467,24,5
WALKING_DOWNSTAIRS,0,0,0,2,418,0
WALKING_UPSTAIRS,0,0,0,23,41,407


In [118]:
score = model.evaluate(X_test, Y_test)
score



[0.3718474003477882, 0.8948082796063793]

In [0]:
data_disp['1LSTM_Drop3_16batch'] = score

Two LSTM + Dropout

In [125]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(20, input_shape=(timesteps, input_dim), return_sequences=True))
# Adding a dropout layer
model.add(Dropout(0.3))

model.add(LSTM(20))
# Adding a dropout layer
model.add(Dropout(0.5))

# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='softmax'))
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "sequential_38"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_60 (LSTM)               (None, 128, 20)           2400      
_________________________________________________________________
dropout_59 (Dropout)         (None, 128, 20)           0         
_________________________________________________________________
lstm_61 (LSTM)               (None, 20)                3280      
_________________________________________________________________
dropout_60 (Dropout)         (None, 20)                0         
_________________________________________________________________
dense_39 (Dense)             (None, 6)                 126       
Total params: 5,806
Trainable params: 5,806
Non-trainable params: 0
_________________________________________________________________


Batch Size = 8

In [121]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=8,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc6a376358>

In [122]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,537,0,0,0,0,0
SITTING,6,395,70,2,0,18
STANDING,0,104,425,1,0,2
WALKING,0,0,0,467,27,2
WALKING_DOWNSTAIRS,0,0,0,1,417,2
WALKING_UPSTAIRS,0,0,1,2,6,462


In [123]:
score = model.evaluate(X_test, Y_test)
score



[0.3962146586148283, 0.9172039362063115]

In [0]:
data_disp['2LSTM_8batch'] = score

Batch Size = 16

In [126]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=16,
          validation_data=(X_test, Y_test),
          epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7efc695e8a20>

In [127]:
# Confusion Matrix
pd.DataFrame(confusion_matrix(Y_test, model.predict(X_test)))

Pred,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,521,0,0,0,0,16
SITTING,5,401,64,1,0,20
STANDING,0,96,391,0,0,45
WALKING,0,0,0,460,29,7
WALKING_DOWNSTAIRS,0,0,0,6,411,3
WALKING_UPSTAIRS,0,0,0,21,4,446


In [128]:
score = model.evaluate(X_test, Y_test)
score



[0.46896032411779726, 0.8924329826942654]

In [0]:
data_disp['2LSTM_16batch'] = score

# Conclusion
1. Obtain and Load Data from UCI
2. Data Cleaning
3. Check for imbalance data
4. Changing feature names (Just remove '-' and replace with empty space
5. EDA: Stationary and Moving Activities
6. EDA: Magnitude of an acceleration (BoxPlot)
7. EDA: Position of GravityAccelerationComponants
8. Perform t-SNE with different perplexity
9. Based on Feature Engineering done by people (contain 561 features), we perform Classical ML (which achieved 96%)
10. Based on Acceleratometer and Gyro Sensor (contain 9 features), We perform hyperparameter on LSTM (which achieved 92%)
11. Used Divide and Conquer CNN

In [132]:
from prettytable import PrettyTable
d = PrettyTable()
d.field_names = ['Model', 'Test Loss', 'Test Accuracy %']
for i in data_disp.keys():
    d.add_row([i,data_disp.get(i)[0],data_disp.get(i)[1]*100])
print(d)

+---------------------+---------------------+-------------------+
|        Model        |      Test Loss      |  Test Accuracy %  |
+---------------------+---------------------+-------------------+
|     1LSTM_8batch    |  0.2727017437300063 | 91.75432643366135 |
|    1LSTM_16batch    |  0.2531991467388801 | 90.70240922972515 |
|    1LSTM_32batch    | 0.31892726644301617 | 88.08958262639973 |
|  1LSTM_Drop3_8batch | 0.25635178967629957 | 90.49881235154395 |
| 1LSTM_Drop3_16batch |  0.3718474003477882 | 89.48082796063794 |
|     2LSTM_8batch    |  0.3962146586148283 | 91.72039362063114 |
|    2LSTM_16batch    | 0.46896032411779726 | 89.24329826942655 |
+---------------------+---------------------+-------------------+


In [2]:
from prettytable import PrettyTable
print('Please go to sahiltinky94@gmail.com_2.ipynb to see the result. (Used Divide and Conquer CNN)')
d = PrettyTable()
d.field_names = ['Model', 'Test Loss', 'Test Accuracy %']
d.add_row(['Dvide and Conquer CNN (Static)',0.288,92.7])
d.add_row(['Dvide and Conquer CNN (Dynamic)',0.205,96.4])
d.add_row(['Dvide and Conquer CNN (On test stage after merging)','-',94.43])
print(d)

Please go to sahiltinky94@gmail.com_2.ipynb to see the result. (Used Divide and Conquer CNN)
+-----------------------------------------------------+-----------+-----------------+
|                        Model                        | Test Loss | Test Accuracy % |
+-----------------------------------------------------+-----------+-----------------+
|            Dvide and Conquer CNN (Static)           |   0.288   |       92.7      |
|           Dvide and Conquer CNN (Dynamic)           |   0.205   |       96.4      |
| Dvide and Conquer CNN (On test stage after merging) |     -     |      94.43      |
+-----------------------------------------------------+-----------+-----------------+
