#Activity Recognition Using Smartphones Dataset

In [49]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import Dense 
from keras.layers import Flatten 
from keras.layers import Dropout 
from keras.layers import LSTM 
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### loading data

The movement data recorded was the x, y, and z accelerometer data (linear acceleration) and gyroscopic data (angular velocity) from the smart phone, specifically a Samsung Galaxy S II. Observations were recorded at 50 Hz (i.e. 50 data points per second). Each subject performed the sequence of activities twice; once with the device on their left-hand-side and once with the device on their right-hand side.




In [4]:
def load_file(filepath):
    df = pd.read_csv(filepath, header=None, delim_whitespace=True)
    return df.values

In [5]:
#group for whether its test or train folder
def load_group(filenames, prefix=''):
    loaded = []
    for filename in filenames:
        data = load_file(prefix + filename)
        loaded.append(data)
    loaded = np.dstack(loaded)
    return loaded

In [6]:
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    
    filenames = []
    #total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
    
    X = load_group(filenames, filepath)
    y = load_file(prefix + group + '/y_'+group+'.txt')
    
    return X, y

In [7]:
def load_dataset(prefix):
  #load train data
  train_X, train_y = load_dataset_group('train', prefix + 'UCI_HAR_Dataset/')
  print(train_X.shape, train_y.shape)
  #load test data
  test_X, test_y = load_dataset_group('test', prefix+'UCI_HAR_Dataset/')
  print(test_X.shape, test_y.shape)

  train_y -= 1
  test_y -= 1

  # one hot encode label
  train_y = to_categorical(train_y)
  test_y = to_categorical(test_y)

  print(train_y.shape, test_y.shape)
  return train_X, train_y, test_X, test_y

In [18]:
#lstm model with 2 lstm layers 
def evaluate_model(train_X, train_y, test_X, test_y):
  verbose, epochs, bs = 0, 15, 64
  n_timesteps, n_features, n_outputs = train_X.shape[1], train_X.shape[2], train_y.shape[1]

  model = Sequential()
  #return sequences to be able to add another lstm layer
  model.add(LSTM(100, input_shape=(n_timesteps, n_features)))
  #dropout to avoid overfitting
  model.add(Dropout(0.5))
  #a dense fully connected layer is used to interpret the features extracted by the LSTM 
  model.add(Dense(100, activation='relu'))
  # a dense layer to output class
  model.add(Dense(n_outputs, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  model.fit(train_X, train_y, epochs=epochs, batch_size=bs, verbose=verbose)
  _, accuracy = model.evaluate(test_X, test_y, batch_size=bs, verbose=0)

  return accuracy

In [9]:
# summarize scores
def summarize_results(scores):
	print(scores)
	m, s = np.mean(scores), np.std(scores)
	print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [10]:
# run an experiment
def run_experiment(repeats=10):
	# load data
	train_X, train_y, test_X, test_y = load_dataset('/content/drive/My Drive/Colab Notebooks/HAR/UCI HAR Dataset/')
	# repeat experiment
	scores = list()
	for r in range(repeats):
		score = evaluate_model(train_X, train_y, test_X, test_y)
		score = score * 100.0
		print('>#%d: %.3f' % (r+1, score))
		scores.append(score)
	# summarize results
	summarize_results(scores)

In [13]:
run_experiment(1)

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 6) (2947, 6)
>#1: 91.686
[91.68646335601807]
Accuracy: 91.686% (+/-0.000)


### CNN-LSTM Network

In [35]:
def model_cnn_lstm(train_X, train_y, test_X, test_y):
  verbose, epochs, bs = 0, 25, 64
  n_timesteps, n_features, n_outputs = train_X.shape[1], train_X.shape[2], train_y.shape[1]

  # reshape data into time steps of sub-sequences
  n_steps, n_length = 4, 32
  train_X = train_X.reshape((train_X.shape[0], n_steps, n_length, n_features))
  test_X = test_X.reshape((test_X.shape[0], n_steps, n_length, n_features))
  #model
  model = Sequential()
  model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
  model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
  model.add(TimeDistributed(Dropout(0.5)))
  model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
  model.add(TimeDistributed(Flatten()))
  model.add(LSTM(100))
  model.add(Dropout(0.5))
  model.add(Dense(100, activation='relu'))
  model.add(Dense(n_outputs, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  model.fit(train_X, train_y, epochs=epochs, batch_size=bs, verbose=verbose)
  _, accuracy = model.evaluate(test_X, test_y, batch_size=bs, verbose=0)

  return accuracy


In [20]:
# run an experiment
def run_experiment_cnnlstm(repeats=10):
	# load data
	train_X, train_y, test_X, test_y = load_dataset('/content/drive/My Drive/Colab Notebooks/HAR/UCI HAR Dataset/')
	# repeat experiment
	scores = list()
	for r in range(repeats):
		score = model_cnn_lstm(train_X, train_y, test_X, test_y)
		score = score * 100.0
		print('>#%d: %.3f' % (r+1, score))
		scores.append(score)
	# summarize results
	summarize_results(scores)

In [37]:
run_experiment_cnnlstm()

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 6) (2947, 6)
>#1: 90.940
>#2: 91.585
>#3: 91.076
>#4: 90.838
>#5: 91.110
>#6: 91.890
>#7: 88.836
>#8: 90.635
>#9: 90.499
>#10: 92.060
[90.93993902206421, 91.58466458320618, 91.07567071914673, 90.83814024925232, 91.10960364341736, 91.89005494117737, 88.83610367774963, 90.63454270362854, 90.49881100654602, 92.05971956253052]
Accuracy: 90.947% (+/-0.857)


### ConvLSTM Network Model

The ConvLSTM2D class in keras expects data to have shape (samples, time, rows, cols, channels)


*   Samples: n, for the number of windows in the dataset.
* Time: 4, for the four subsequences that we split a window of 128 time steps into.
* Rows: 1, for the one-dimensional shape of each subsequence.
* Columns: 32, for the 32 time steps in an input subsequence.
* Channels: 9, for the nine input variables.





In [55]:
def model_convLSTM(train_X, train_y, test_X, test_y):
  bs, epochs = 64, 25
  n_timesteps, n_features, n_outputs = train_X.shape[1], train_X.shape[2], train_y.shape[1]

  n_steps, n_length = 4, 32
  train_X = train_X.reshape((train_X.shape[0], n_steps, 1, n_length, n_features))
  test_X = test_X.reshape((test_X.shape[0], n_steps, 1, n_length, n_features))

  model = Sequential()
  model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, n_features)))
  model.add(Dropout(0.5))
  model.add(Flatten())
  model.add(Dense(100, activation='relu'))
  model.add(Dense(n_outputs, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  model.fit(train_X, train_y, batch_size=bs, epochs=epochs, verbose=0)
  _ , accuracy = model.evaluate(test_X, test_y, batch_size=bs, verbose=0)

  return accuracy

In [58]:
# run an experiment
def run_experiment_convlstm(repeats=10):
	# load data
	train_X, train_y, test_X, test_y = load_dataset('/content/drive/My Drive/Colab Notebooks/HAR/UCI HAR Dataset/')
	# repeat experiment
	scores = list()
	for r in range(repeats):
		score = model_convLSTM(train_X, train_y, test_X, test_y)
		score = score * 100.0
		print('>#%d: %.3f' % (r+1, score))
		scores.append(score)
	# summarize results
	summarize_results(scores)

In [59]:
run_experiment_convlstm()

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 6) (2947, 6)
>#1: 90.024
>#2: 91.076
>#3: 91.008
>#4: 90.397
>#5: 90.193
>#6: 90.668
>#7: 89.447
>#8: 89.141
>#9: 90.906
>#10: 91.042
[90.0237500667572, 91.07567071914673, 91.00780487060547, 90.39701223373413, 90.19341468811035, 90.66847562789917, 89.44689631462097, 89.1414999961853, 90.90600609779358, 91.0417377948761]
Accuracy: 90.390% (+/-0.651)
