In [0]:
# Importing Libraries

In [0]:
import pandas as pd
import numpy as np

In [0]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM,BatchNormalization, Conv1D, MaxPooling1D , Flatten
from keras.layers.core import Dense, Dropout

In [0]:
from sklearn.metrics import accuracy_score

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Data

In [0]:
def data():
    ''' This function is to load the data'''
    SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
    ]
    signals_data = []

    for signal in SIGNALS:
        filename = f'gdrive/My Drive/UCI_HAR_Dataset/train/Inertial Signals/{signal}_train.txt'
        signals_data.append(
            pd.read_csv(filename, delim_whitespace=True, header=None).as_matrix()
        ) 
    X_train = np.transpose(signals_data, (1, 2, 0))
    
    signals_data = []

    for signal in SIGNALS:
        filename = f'gdrive/My Drive/UCI_HAR_Dataset/test/Inertial Signals/{signal}_test.txt'
        signals_data.append(
            pd.read_csv(filename, delim_whitespace=True, header=None).as_matrix()
        ) 
    X_test = np.transpose(signals_data, (1, 2, 0))
    
    filename = f'gdrive/My Drive/UCI_HAR_Dataset/train/y_train.txt'
    y = pd.read_csv(filename, delim_whitespace=True, header=None)[0]
    Y_train = pd.get_dummies(y).as_matrix()
    
    filename = f'gdrive/My Drive/UCI_HAR_Dataset/test/y_test.txt'
    y = pd.read_csv(filename, delim_whitespace=True, header=None)[0]
    Y_test = pd.get_dummies(y).as_matrix()
    
    return X_train , Y_train , X_test , Y_test

In [0]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [0]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [0]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


In [0]:
# Initializing parameters
epochs = 30
batch_size = 32
n_hidden = 32

In [0]:
# Loading the train and test data
X_train, Y_train,X_test, Y_test = data()



In [0]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 6

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [0]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [0]:
# Initiliazing the sequential model
model_1 = Sequential()
# Configuring the parameters
model_1.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model_1.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model_1.add(Dense(n_classes, activation='sigmoid'))
model_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_155 (Dropout)        (None, 32)                0         
_________________________________________________________________
dense_206 (Dense)            (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Compiling the model
model_1.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [0]:
# Training the model
model_1.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa10e587978>

In [67]:
model_1.evaluate(X_test, Y_test)



[0.36589546700399767, 0.9019341703427214]

In [68]:
# Confusion Matrix
print(confusion_matrix(Y_train, model_1.predict(X_test)))

Pred                LAYING  SITTING  ...  WALKING_DOWNSTAIRS  WALKING_UPSTAIRS
True                                 ...                                      
LAYING                  77       89  ...                  61                96
SITTING                 58       39  ...                  81               127
STANDING                75       46  ...                 125                53
WALKING                 94      125  ...                  68                80
WALKING_DOWNSTAIRS     120       77  ...                  28                84
WALKING_UPSTAIRS        86       66  ...                  63                44

[6 rows x 6 columns]


We ll be breaking the train data into static and dynamic activities. For that we change the labels of the train data as 1 for dynamic activities and 0 for static activities.

In [0]:
filename = f'gdrive/My Drive/UCI_HAR_Dataset/train/y_train.txt'
y = pd.read_csv(filename, delim_whitespace=True, header=None)[0]
y_train = y.map({1:1,2:1, 3:1,\
                       4:0, 5:0,6:0})

In [0]:
filename = f'gdrive/My Drive/UCI_HAR_Dataset/test/y_test.txt'
y = pd.read_csv(filename, delim_whitespace=True, header=None)[0]
y_test = y.map({1:1,2:1, 3:1,\
                       4:0, 5:0,6:0})

In [0]:
y_test.value_counts()

0    1560
1    1387
Name: 0, dtype: int64

In [0]:
#model to break the data into static and dynamic
model_s = Sequential()
model_s.add(LSTM(32, kernel_initializer='glorot_normal',input_shape=(128,9)))
model_s.add(Dropout(0.5))

model_s.add(Dense(1, activation='sigmoid',kernel_initializer='glorot_normal'))
model_s.summary()

W0811 16:48:25.526549 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0811 16:48:25.533162 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0811 16:48:25.544402 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0811 16:48:25.767262 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0811 16:48

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 5,409
Trainable params: 5,409
Non-trainable params: 0
_________________________________________________________________


In [0]:
model_s.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

W0811 16:48:30.244857 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0811 16:48:30.277998 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0811 16:48:30.285583 140332016301952 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [0]:
model_s.fit(X_train, y_train, batch_size=32, validation_data=(X_test, y_test),epochs=30)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa15a195908>

In [0]:
y_pred_test = model_s.predict(X_test)
y_pred=[]
for i in y_pred_test:
    if i>0.5:                             #converting into 0 and 1 for the threshold 0.5
      y_pred.append(1)
    else:
      y_pred.append(0)

In [0]:
accuracy_score(y_test, y_pred)

0.993892093654564

In [0]:
def static_data():
    ''' this function breaks the train and test data on the basis of the static labels and stores them in files'''
    c=0
    k=0
    static = []
    sta = []
    static_y = []
    sta_y = []
    for i in y_train:
        if i==0:
            static.append(X_train[c])
            static_y.append(Y_train[c])
        c+=1
    for i in y_pred:
        if i==0:
            sta.append(X_test[k])
            sta_y.append(Y_test[k])
        k+=1
    X_tr_st = np.array(static)
    X_ts_st = np.array(sta)
    Y_tr_st = np.array(static_y)
    Y_ts_st = np.array(sta_y)

    np.save('X_tr_st', X_tr_st)
    np.save('Y_tr_st', Y_tr_st)
    np.save('X_ts_st', X_ts_st)
    np.save('Y_ts_st', Y_ts_st)
    return X_tr_st, Y_tr_st, X_ts_st, Y_ts_st

In [0]:
def dynamic_data():
    ''' this function breaks the train and test data on the basis of the dynamic labels and stores them in files '''
    c=0
    k=0
    dynamic=[]
    dynamic_y= []
    dyn=[]
    dyn_y= []

    for i in y_train:
        if i==1:
            dynamic.append(X_train[c])
            dynamic_y.append(Y_train[c])
        c+=1
    for i in y_pred:
        if i==1:
            dyn.append(X_test[k])
            dyn_y.append(Y_test[k])
        k+=1
    X_tr_dy = np.array(dynamic)
    X_ts_dy = np.array(dyn)
    Y_tr_dy = np.array(dynamic_y)
    Y_ts_dy = np.array(dyn_y)

    np.save('X_tr_dy', X_tr_dy)
    np.save('Y_tr_dy', Y_tr_dy)
    np.save('X_ts_dy', X_ts_dy)
    np.save('Y_ts_dy', Y_ts_dy)
    return X_tr_dy, Y_tr_dy, X_ts_dy, Y_ts_dy

In [0]:
X_tr_st, Y_tr_st, X_ts_st, Y_ts_st= static_data()      #Calling the static and dynamic data with the functions 
X_tr_dy, Y_tr_dy, X_ts_dy, Y_ts_dy = dynamic_data()


In [0]:
def load_static():
    X_tr_st = np.load('X_tr_st.npy')                      #Function to load the data from the saved files
    Y_tr_st = np.load('Y_tr_st.npy')
    X_ts_st = np.load('X_ts_st.npy')
    Y_ts_st = np.load('Y_ts_st.npy')

    return X_tr_st, Y_tr_st, X_ts_st, Y_ts_st

In [0]:
def load_dynamic():
    X_tr_dy = np.load('X_tr_dy.npy')                  #Function to load the data from the saved files
    Y_tr_dy = np.load('Y_tr_dy.npy')
    X_ts_dy = np.load('X_ts_dy.npy')
    Y_ts_dy = np.load('Y_ts_dy.npy')

    return X_tr_dy, Y_tr_dy, X_ts_dy, Y_ts_dy

****Now we build models separately on the static data and dynamic data with different structures.****

In [0]:
def best_hyperparameters_dy(ker1,ker2,drp,neurons):

    model = Sequential()
    # Configuring the parameters
    model.add(Conv1D(ker1, kernel_size=3, padding='same', input_shape=(128,9), activation='relu', kernel_initializer='glorot_normal', strides=1))
    model.add(MaxPooling1D())
    model.add(Dropout(drp))

    model.add(Conv1D(ker2, kernel_size=2, padding='same',activation='relu', kernel_initializer='glorot_normal', strides=1 ))

    model.add(Flatten())
    model.add(Dense(neurons, kernel_initializer='glorot_normal', activation='relu'))
    model.add(BatchNormalization())

    model.add(Dense(n_classes, activation='sigmoid',kernel_initializer='glorot_normal'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    
    return model

In [0]:
ker1 = [30,35]
ker2= [50,45]
neurons = [40,45]
drp = [0.5,0.6]
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model = KerasClassifier(build_fn=best_hyperparameters_dy, epochs=10, batch_size=batch_size, verbose=0)
param_grid = dict(ker1=ker1, ker2=ker2 ,neurons=neurons, drp=drp)

grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result_dy = grid.fit(X_tr_dy, Y_tr_dy)

W0811 16:59:14.617902 140332016301952 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0811 17:03:15.637176 140332016301952 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0811 17:03:27.892271 140332016301952 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0811 17:03:40.270517 140332016301952 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0811 17:03:52.866335 140332016301952 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W08

In [0]:
print("Best: %f using %s" % (grid_result_dy.best_score_, grid_result_dy.best_params_))
means = grid_result_dy.cv_results_['mean_test_score']
stds = grid_result_dy.cv_results_['std_test_score']
params = grid_result_dy.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.933638 using {'drp': 0.5, 'ker1': 30, 'ker2': 50, 'neurons': 40}
0.933638 (0.039289) with: {'drp': 0.5, 'ker1': 30, 'ker2': 50, 'neurons': 40}
0.865449 (0.108785) with: {'drp': 0.5, 'ker1': 30, 'ker2': 50, 'neurons': 45}
0.885236 (0.023888) with: {'drp': 0.5, 'ker1': 30, 'ker2': 45, 'neurons': 40}
0.900457 (0.006628) with: {'drp': 0.5, 'ker1': 30, 'ker2': 45, 'neurons': 45}
0.850228 (0.111632) with: {'drp': 0.5, 'ker1': 35, 'ker2': 50, 'neurons': 40}
0.926027 (0.022518) with: {'drp': 0.5, 'ker1': 35, 'ker2': 50, 'neurons': 45}
0.909285 (0.020289) with: {'drp': 0.5, 'ker1': 35, 'ker2': 45, 'neurons': 40}
0.891629 (0.102729) with: {'drp': 0.5, 'ker1': 35, 'ker2': 45, 'neurons': 45}
0.768037 (0.107455) with: {'drp': 0.6, 'ker1': 30, 'ker2': 50, 'neurons': 40}
0.933029 (0.024292) with: {'drp': 0.6, 'ker1': 30, 'ker2': 50, 'neurons': 45}
0.904718 (0.071168) with: {'drp': 0.6, 'ker1': 30, 'ker2': 45, 'neurons': 40}
0.891933 (0.085137) with: {'drp': 0.6, 'ker1': 30, 'ker2': 45, 'neuro

In [0]:
model_dy= best_hyperparameters_dy(30,50,0.5,40)
model_dy.fit(X_tr_dy, Y_tr_dy, epochs=30, batch_size=batch_size, validation_data= (X_ts_dy, Y_ts_dy))
model_dy.evaluate(X_ts_dy,Y_ts_dy)

Train on 3285 samples, validate on 1373 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.15724907209655814, 0.9599417334304443]

In [0]:
# Confusion Matrix
print(confusion_matrix(Y_ts_dy, model_dy.predict(X_ts_dy)))

Pred                WALKING  WALKING_DOWNSTAIRS  WALKING_UPSTAIRS
True                                                             
SITTING                   2                   0                 2
STANDING                  5                   0                 3
WALKING                 480                  16                 0
WALKING_DOWNSTAIRS        5                 410                 0
WALKING_UPSTAIRS          3                  24               443


In [0]:
#Model for the static activities
def best_hyperparameters_st(k1,k2,drp,neurons):
  
    model = Sequential()
    # Configuring the parameters
    model.add(Conv1D(k1, kernel_size=2, padding='same', input_shape=(128,9), activation='relu', kernel_initializer='glorot_normal', strides=1))

    model.add(Conv1D(k2, kernel_size=3, padding='same', activation='relu', kernel_initializer='glorot_normal', strides=1))
    model.add(Dropout(drp))

    model.add(Flatten())

    model.add(Dense(neurons, kernel_initializer='glorot_normal', activation='relu'))
    model.add(Dropout(drp))
    model.add(BatchNormalization())

    model.add(Dense(n_classes, activation='sigmoid',kernel_initializer='glorot_normal'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    
    return model

In [0]:
k1 = [40,32]
k2= [40,45]
neurons = [40,45]
drp = [0.5,0.6]

model = KerasClassifier(build_fn=best_hyperparameters_st, epochs=20, batch_size=batch_size, verbose=0)
param_grid = dict(k1=k1, k2=k2 ,neurons=neurons, drp=drp)

grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result_st = grid.fit(X_tr_st, Y_tr_st)



In [0]:
print("Best: %f using %s" % (grid_result_st.best_score_, grid_result_st.best_params_))
means = grid_result_st.cv_results_['mean_test_score']
stds = grid_result_st.cv_results_['std_test_score']
params = grid_result_st.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.896976 using {'drp': 0.6, 'k1': 40, 'k2': 45, 'neurons': 40}
0.871650 (0.017512) with: {'drp': 0.5, 'k1': 40, 'k2': 40, 'neurons': 40}
0.888616 (0.009429) with: {'drp': 0.5, 'k1': 40, 'k2': 40, 'neurons': 45}
0.890091 (0.003581) with: {'drp': 0.5, 'k1': 40, 'k2': 45, 'neurons': 40}
0.890829 (0.016380) with: {'drp': 0.5, 'k1': 40, 'k2': 45, 'neurons': 45}
0.889353 (0.008317) with: {'drp': 0.5, 'k1': 32, 'k2': 40, 'neurons': 40}
0.885911 (0.002885) with: {'drp': 0.5, 'k1': 32, 'k2': 40, 'neurons': 45}
0.884436 (0.009241) with: {'drp': 0.5, 'k1': 32, 'k2': 45, 'neurons': 40}
0.879764 (0.013734) with: {'drp': 0.5, 'k1': 32, 'k2': 45, 'neurons': 45}
0.882469 (0.030871) with: {'drp': 0.6, 'k1': 40, 'k2': 40, 'neurons': 40}
0.895500 (0.018822) with: {'drp': 0.6, 'k1': 40, 'k2': 40, 'neurons': 45}
0.896976 (0.010639) with: {'drp': 0.6, 'k1': 40, 'k2': 45, 'neurons': 40}
0.883206 (0.005958) with: {'drp': 0.6, 'k1': 40, 'k2': 45, 'neurons': 45}
0.887632 (0.024194) with: {'drp': 0.6, 'k1'

In [0]:
model_st= best_hyperparameters_st(40,45,0.6,40)
model_st.fit(X_tr_st, Y_tr_st, epochs=30, batch_size=16, validation_data= (X_ts_st, Y_ts_st))
model_st.evaluate(X_ts_st,Y_ts_st)

Train on 4067 samples, validate on 1574 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.3950737391419832, 0.9212198221092758]

In [66]:
# Confusion Matrix
print(confusion_matrix(Y_ts_st, model_st.predict(X_ts_st)))

Pred                LAYING  SITTING  STANDING
True                                         
LAYING                 537        0         0
SITTING                  2      382       103
STANDING                 0       74       450
WALKING_DOWNSTAIRS       0        0         5
WALKING_UPSTAIRS         0        0         1


In [0]:
print('Mean accuracy over the static and dynamic data is:',np.add(92.12,95.99)/2.0)

Mean accuracy over the static and dynamic data is: 94.055


## Conclusion

****Following are the steps followed in solving this case study:****

- __Objective:__ Given a Datapoint with various attribute values from the activities performed by an individual, we have to predict the activity that was performed.</br>
- __Data Loading:__ We are provided with data in two forms, one with 561 dimensional and the other data which is created using the actual readings observed from the activity graph. The later data is a 128 dimensional data which is created from a window of 2.56 seconds and an overlap of 50% at each iteration. The 561 dimensional data is expert created data and requires domain knowledge to understand it.</br>
- __Data Cleaning:__ We check for the duplicates and remove the (-,(),,) from the features, we check for the nan values . We check for the data imbalance and find out that our data is balanced. </br>
- __EDA:__ We perform some EDA on the 561 dim data and then apply tsne . We find out that the static activities and the dynamic activities can be differentiated easily. Even After increasing the perplexity we are not able to differentiate the standing and sitting activity.</br>
- __Machine Learning Models:__ We build machine learning models like Logistic regression, SVM, Random Forest and Gradient Boosted decision trees on top of the 561 dimensional data that we are provided. We get a pretty decent accuracy with these models.</br>
- __Deep Learning Models:__ Now its turn for the Deep Learning models, we build LSTM model on th temporal data and we get accuracy around 91%. But this is just using the 128 dimensional data extracted from the wimdows of the activity graph along the three axes. So we want a better accuracy using the same data building more complex models. So we use Divide and conquer method to build better models. We break our data into static and dynamic activity data using an LSTM model.As now we have separate statc and dynamic activity data we build independent models on these data using Conv1D and note the accuracy obtained . Using this method we land up having a better accuracy than just using LSTM model alone.

In [0]:
from prettytable import PrettyTable

In [63]:
x= PrettyTable(['Model','Accuracy'])
x.add_row(['Logistic Regression',96.27])
x.add_row(['Linear SVC',96.61])
x.add_row(['RBF SVM',96.27])
x.add_row(['Decision Tree',86.43])
x.add_row(['RandomForest',91.31])
x.add_row(['Gradient Boosted',91.31])
x.add_row(['LSTM',90.19])
x.add_row(['Divide and Conquer (Static)',96.00])
x.add_row(['Divide and Conquer (Dynamic)',92.13])
x.add_row(['Divide and Conquer avg',94.06])
print(x)

+------------------------------+----------+
|            Model             | Accuracy |
+------------------------------+----------+
|     Logistic Regression      |  96.27   |
|          Linear SVC          |  96.61   |
|           RBF SVM            |  96.27   |
|        Decision Tree         |  86.43   |
|         RandomForest         |  91.31   |
|       Gradient Boosted       |  91.31   |
|             LSTM             |  90.19   |
| Divide and Conquer (Static)  |   96.0   |
| Divide and Conquer (Dynamic) |  92.13   |
|    Divide and Conquer avg    |  94.06   |
+------------------------------+----------+
