# LSTMs for Blink Recognition Time Series Classification

This notebook follows the steps as shown in the following website:<br>
https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/


A volunteer was asked to blink and press a button at the same time. This LSTM model is trained to recognize the blink using the button press as a label.

In [202]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.polynomial.polynomial import Polynomial
from sklearn.linear_model import LinearRegression
from pandas import read_csv
from numpy import dstack, mean, std
from tensorflow import keras
from tensorflow.keras import losses
from tensorflow.keras.layers import Dense, Dropout, LSTM, TimeDistributed, Conv1D, MaxPooling1D, Flatten, ConvLSTM2D
from tensorflow.keras.utils import to_categorical

# Basic LSTM Model

The section is divided into the following sections:
1. Load the Data
2. Fit and Evaluate a Model
3. Summarize Results
4. Final Examination

# Parameters

In [203]:
timesteps = 1000
shift_amount = 50
poly_regression_degree = 3

## Load the Data

In [204]:
# load a channel, skip the first 6 rows, replace NaN with rolling average
def load_channel(filepath, columnindex):
  dataframe = read_csv(filepath, usecols=[columnindex], names=['voltage'], header=None, delim_whitespace=True)

  mask = dataframe['voltage'].isna()  # Create Boolean mask for Nan values

  x = np.arange(len(dataframe['voltage']))
  y = dataframe['voltage'].values
  p = Polynomial.fit(x[~mask], y[~mask], deg=poly_regression_degree)  # Fit polynomial regression model

  dataframe.loc[mask, 'voltage'] = p(x[mask])  # Replace NaN values with polynomial regression model values

  # Convert the one column DataFrame to a numpy array
  data = dataframe.to_numpy().squeeze()

  # Calculate the number of rows in the new DataFrame
  n_rows = (len(data) - timesteps) // shift_amount + 1

  # Initilize the new DataFrame
  new_df = pd.DataFrame(np.zeros((n_rows, timesteps)))

  # Fill the new DataFrame with shifted windows of data
  for i in range(n_rows):
      start = i * shift_amount
      end = start + timesteps
      new_df.iloc[i,:] = data[start:end]
  
  # print(dataframe)
  # print(new_df)
  return new_df.values

In [205]:
var = load_channel("Data/ThrowingPunch-Frontal-Elbow-Outward-ClosedElbow.txt", 0)
print(var.shape)

(23100, 1000)


In [206]:
# load all channels into a 3D array of [samples, timesteps, features]
# samples = number of rows
# timesteps = size of rolling window
# features = number of channels
def load_group(filename):
  loaded = list()
  num_channels = 1
  with open(filename, 'r') as f:
    first_line = f.readline()
    num_channels = len(first_line.split())

  for channel in range(num_channels):
    data = load_channel(filename, channel)
    loaded.append(data)
  # stack group so that features are the 3rd dimension
  loaded = dstack(loaded)
  return loaded, num_channels

In [207]:
# var = load_group("Data/ThrowingPunch-Frontal-Elbow-Outward-ClosedElbow.txt")
# print(var.shape)

In [208]:
def replace_comments(val):
	if val.startswith('#'):
			return 2
	else:
			return 0

# load a dataset group, such as train or test
def load_dataset_group(filename):
	# load input data
	X, num_channels = load_group(filename)

	# load class output
	# 1 = punch
	# 2 = retract
	file = open(filename, 'r')
	raw_y = pd.DataFrame(np.zeros(len(file.readlines())), columns=['label'])
	file.seek(0)
	armExtended = False
	index = 0
	for line in file:
		line = line.strip()
		if '#' in line:
				if armExtended:
						raw_y.loc[index, 'label'] = 2
				else:
						raw_y.loc[index, 'label'] = 1
				armExtended = not armExtended
		index += 1
	file.close()
	# pd.set_option('display.max_columns', None)
	# pd.set_option('display.max_rows', None)
	# print(raw_y)
	# Convert the one column DataFrame to a numpy array
	data = raw_y.to_numpy().squeeze()

	 # Calculate the number of rows in the new DataFrame
	n_rows = (len(data) - timesteps) // shift_amount + 1

  # Initilize the new DataFrame
	y = pd.DataFrame(np.zeros(n_rows), columns=['label'])
	print(y.shape)

	detection_range = (int(timesteps * 0.1), int(timesteps * 0.4))
	start_detection = detection_range[0]
	end_detection = detection_range[1]
  # Fill the new DataFrame with shifted windows of data
	for i in range(n_rows):
		start = i * shift_amount
		end = start + timesteps
		window = data[start:end]
		if np.any(window[start_detection:end_detection] != 0):
			y.loc[i, 'label'] = window[start_detection:end_detection][np.nonzero(window[start_detection:end_detection])][0]
		else:
			y.loc[i, 'label'] = 0
		# y.loc[i, 'label'] = next((x for x in window[int(timesteps * 0.1):int(timesteps * 0.4)] if x != 0), None)

	return X, y

In [209]:
# varX, vary = load_dataset_group("Data/ThrowingPunch-Frontal-Elbow-Outward-ClosedElbow.txt")
# print(vary.loc[2999, 'label'], vary.shape)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
# print(vary)

In [213]:
# load the dataset, returns train and test X and y elements
def load_dataset(filename):	
	# load data
	X, y = load_dataset_group(filename)
	train_size = int(len(X) * 0.8)
  
	# split into train and test
	trainX, testX = X[0:train_size], X[train_size:]
	trainy, testy = y[0:train_size], y[train_size:]
 
	print(trainX.shape, trainy.shape)
	print(testX.shape, testy.shape)
 
	# # zero-offset class values
	# trainy = trainy - 1
	# testy = testy - 1
 
	# one hot encode y
	trainy = to_categorical(trainy)
	testy = to_categorical(testy)
	print(trainX.shape, trainy.shape, testX.shape, testy.shape)
	return trainX, trainy, testX, testy

In [214]:
trainX, trainy, testX, testy = load_dataset("Data/ThrowingPunch-Frontal-Elbow-Outward-ClosedElbow.txt")

(23100, 1)
(18480, 1000, 2) (18480, 1)
(4620, 1000, 2) (4620, 1)
(18480, 1000, 2) (18480, 3) (4620, 1000, 2) (4620, 3)


## Fit and Evaluate a Model

In [219]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
  verbose, epochs, batch_size = 0, 15, 64
  n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
  model = keras.Sequential([
    LSTM(100, input_shape=(n_timesteps,n_features)),
    Dropout(0.5),
    Dense(100, activation='relu'),
    Dense(n_outputs, activation='softmax')
  ])
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  # fit network
  model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
  # evaluate model
  _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
  return accuracy


## Summarize Results

In [220]:
# summarize scores
def summarize_results(scores):
  print(scores)
  m, s = mean(scores), std(scores)
  print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [223]:
# run an experiment
def run_experiment(repeats=10):
  # load data
  trainX, trainy, testX, testy = load_dataset("Data/ThrowingPunch-Frontal-Elbow-Outward-ClosedElbow.txt")
  # repeat experiment
  scores = list()
  for r in range(repeats):
    score = evaluate_model(trainX, trainy, testX, testy)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores.append(score)
  # summarize results
  summarize_results(scores)

## Final Examination

In [224]:
run_experiment()

(23100, 1)
(18480, 1000, 2) (18480, 1)
(4620, 1000, 2) (4620, 1)
(18480, 1000, 2) (18480, 3) (4620, 1000, 2) (4620, 3)
>#1: 64.719
>#2: 64.307
>#3: 63.463
>#4: 63.658
>#5: 65.866
>#6: 64.221
>#7: 64.740
>#8: 66.450
>#9: 66.970
>#10: 63.420
[64.71861600875854, 64.30736184120178, 63.463205099105835, 63.658010959625244, 65.86580276489258, 64.22078013420105, 64.74025845527649, 66.45021438598633, 66.96969866752625, 63.41991424560547]
Accuracy: 64.781% (+/-1.189)


# CNN-LSTM Network Model

In [225]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
  # define model
  verbose, epochs, batch_size = 0, 25, 64
  n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
  # reshape data into time steps of sub-sequences
  n_steps, n_length = 10, 100
  trainX = trainX.reshape((trainX.shape[0], n_steps, n_length, n_features))
  testX = testX.reshape((testX.shape[0], n_steps, n_length, n_features))
  # define model
  model = keras.Sequential([
    TimeDistributed(Conv1D(filters=64, kernel_size=3,
              activation='relu'), input_shape=(None, n_length, n_features)),
    TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')),
    TimeDistributed(Dropout(0.5)),
    TimeDistributed(MaxPooling1D(pool_size=2)),
    TimeDistributed(Flatten()),
    LSTM(100),
    Dropout(0.5),
    Dense(100, activation='relu'),
    Dense(n_outputs, activation='softmax')
  ])
  model.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])
  # fit network
  model.fit(trainX, trainy, epochs=epochs,
            batch_size=batch_size, verbose=verbose)
  # evaluate model
  _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
  return accuracy


In [226]:
run_experiment()

(23100, 1)
(18480, 1000, 2) (18480, 1)
(4620, 1000, 2) (4620, 1)
(18480, 1000, 2) (18480, 3) (4620, 1000, 2) (4620, 3)
>#1: 69.697
>#2: 71.797
>#3: 72.164
>#4: 70.693
>#5: 70.736
>#6: 71.991
>#7: 70.628
>#8: 69.978
>#9: 71.147
>#10: 71.537
[69.69696879386902, 71.79653644561768, 72.16449975967407, 70.69264054298401, 70.73593139648438, 71.99134230613708, 70.6277072429657, 69.97835636138916, 71.14718556404114, 71.53679728507996]
Accuracy: 71.037% (+/-0.795)


# ConvLSTM Network Model

A further extension of the CNN LSTM idea is to perform the convolutions of the CNN (e.g. how the CNN reads the input sequence data) as part of the LSTM.

In [227]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
  # define model
  verbose, epochs, batch_size = 0, 25, 64
  n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
  # reshape into subsequences (samples, time steps, rows, cols, channels)
  n_steps, n_length = 10, 100
  trainX = trainX.reshape((trainX.shape[0], n_steps, 1, n_length, n_features))
  testX = testX.reshape((testX.shape[0], n_steps, 1, n_length, n_features))
  # define model
  model = keras.Sequential([
    ConvLSTM2D(filters=64, kernel_size=(1, 3),
              activation='relu', input_shape=(n_steps, 1, n_length, n_features)),
    Dropout(0.5),
    Flatten(),
    Dense(100, activation='relu'),
    Dense(n_outputs, activation='softmax')
  ])
  model.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])
  # fit network
  model.fit(trainX, trainy, epochs=epochs,
            batch_size=batch_size, verbose=verbose)
  # evaluate model
  _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
  return accuracy

In [228]:
run_experiment()

(23100, 1)
(18480, 1000, 2) (18480, 1)
(4620, 1000, 2) (4620, 1)
(18480, 1000, 2) (18480, 3) (4620, 1000, 2) (4620, 3)
>#1: 69.697
>#2: 70.866
>#3: 71.580
>#4: 69.307
>#5: 70.455
>#6: 71.753
>#7: 69.589
>#8: 68.312
>#9: 71.623
>#10: 69.784
[69.69696879386902, 70.865797996521, 71.58008813858032, 69.3073570728302, 70.45454382896423, 71.75324559211731, 69.58874464035034, 68.31169128417969, 71.62337899208069, 69.78355050086975]
Accuracy: 70.297% (+/-1.092)


In [229]:
# License:
# ========
# Use of this dataset in publications must be acknowledged by referencing the following publication [1] 

# [1] Davide Anguita, Alessandro Ghio, Luca Oneto, Xavier Parra and Jorge L. Reyes-Ortiz.
# A Public Domain Dataset for Human Activity Recognition Using Smartphones.
# 21th European Symposium on Artificial Neural Networks, Computational Intelligence
# and Machine Learning, ESANN 2013. Bruges, Belgium 24-26 April 2013. 

# This dataset is distributed AS-IS and no responsibility implied or
# explicit can be addressed to the authors or their institutions for
# its use or misuse. Any commercial use is prohibited.

# Other Related Publications:
# ===========================
# [2] Davide Anguita, Alessandro Ghio, Luca Oneto, Xavier Parra, Jorge L.
# Reyes-Ortiz.  Energy Efficient Smartphone-Based Activity Recognition
# using Fixed-Point Arithmetic. Journal of Universal Computer Science.
# Special Issue in Ambient Assisted Living: Home Care.   Volume 19, Issue 9. May 2013

# [3] Davide Anguita, Alessandro Ghio, Luca Oneto, Xavier Parra and
# Jorge L. Reyes-Ortiz. Human Activity Recognition on Smartphones
# using a Multiclass Hardware-Friendly Support Vector Machine. 4th
# International Workshop of Ambient Assited Living, IWAAL 2012,
# Vitoria-Gasteiz, Spain, December 3-5, 2012. Proceedings. Lecture
# Notes in Computer Science 2012, pp 216-223. 

# [4] Jorge Luis Reyes-Ortiz, Alessandro Ghio, Xavier Parra-Llanas,
# Davide Anguita, Joan Cabestany, Andreu Català. Human Activity and
# Motion Disorder Recognition: Towards Smarter Interactive Cognitive
# Environments. 21th European Symposium on Artificial Neural Networks,
# Computational Intelligence and Machine Learning, ESANN 2013. Bruges,
# Belgium 24-26 April 2013.  

# ==================================================================================================
# Jorge L. Reyes-Ortiz, Alessandro Ghio, Luca Oneto, Davide Anguita and Xavier Parra. November 2013.