# Introduction

Experimentation on the [Dataset for ADL Recognition with a Wrist-worn Accelerometer](https://archive.ics.uci.edu/ml/datasets/Dataset+for+ADL+Recognition+with+Wrist-worn+Accelerometer).

In [None]:
import os, sys, shutil, glob, requests, zipfile
import numpy as np, matplotlib.pyplot as plt
from sequentia import *
from tqdm.auto import tqdm

# Silence TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Import utility functions and classes
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from utils import *
from lstm import LSTMClassifier

# ggplot style
plt.style.use('ggplot')

# Set seed for reproducible randomness
seed = 0
np.random.seed(seed)
rng = np.random.RandomState(seed)

In [None]:
clfs, results = {}, {'hmm': {}, 'knn': {}, 'lstm': {}}

## Downloading and deserializing the data

In [None]:
# A map of the gesture directories to their class names
directory_map = {
    'Brush_teeth': 'brush_teeth',
    'Climb_stairs*': 'climb_stairs',
    'Comb_hair': 'comb_hair',
    'Descend_stairs': 'descend_stairs',
    'Drink_glass*': 'drink_glass',
    'Eat_meat': 'eat_meat',
    'Eat_soup': 'eat_soup',
    'Getup_bed*': 'getup_bed',
    'Liedown_bed': 'liedown_bed',
    'Pour_water*': 'pour_water',
    'Sitdown_chair*': 'sitdown_chair',
    'Standup_chair*': 'standup_chair',
    'Use_telephone': 'use_telephone',
    'Walk*': 'walk'
}

classes = list(directory_map.values())

In [None]:
def file_to_numpy(file):
    """Convert acceleration parameter .txt file to np.ndarray"""
    with open(file) as f:
        X = []
        for line in f:
            Ax, Ay, Az = line.strip('\n').split()
            X.append([float(Ax), float(Ay), float(Az)])
    return np.array(X)

In [None]:
X, y = [], []
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00283/ADL_Dataset.zip'

try:
    path, zip_path = 'HMP_Dataset', 'ADL_Dataset.zip'
    print('Downloading dataset from {} ...'.format(url))
    response = requests.get(url)
    with open(zip_path, 'wb') as f:
        print('Writing {} ...'.format(zip_path))
        f.write(response.content)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        print('Extracting {} ...'.format(zip_path))
        zip_ref.extractall()
except:
    raise
else:
    print('Reading data into Numpy arrays ...')
    for pattern, label in directory_map.items():
        for folder in glob.glob(os.path.join(path, pattern)):
            for txt in glob.glob(os.path.join(folder, '*.txt')):
                X.append(file_to_numpy(txt))
                y.append(label)
    print('Done!')
finally:
    os.remove(zip_path)
    shutil.rmtree(path)

## Removing infrequent classes

The `eat_meat` and `eat_soup` classes are too infrequent for the dataset to be stratified into an equal training, validation and test set split, so we remove them.

In [None]:
# Remove instances of the infrequent classes from the dataset
remove_classes = ['eat_meat', 'eat_soup']
keep_idx = [i for i, label in enumerate(y) if label not in remove_classes]
X = [x for i, x in enumerate(X) if i in keep_idx]
y = [label for i, label in enumerate(y) if i in keep_idx]

# Update classes
classes = [c for c in classes if c not in remove_classes]

## Dataset splits

Create a stratified 65-20-15 training, validation and test set split.

In [None]:
# Create a stratified training, validation and test set split (65-20-15)
X_train, X_val, X_test, y_train, y_val, y_test = data_split(X, y, random_state=rng, stratify=True)

In [None]:
# ADL dataset class counts (training set)
show_class_counts(y_train, classes, xtick_rotation=45, title=None)

In [None]:
# Histogram of ADL dataset gesture durations (training set)
show_durations(X_train, bins=75, title=None)

## Signal visualization

In [None]:
def plot_signal(signal, figsize=(8, 6), title=None):
    """Function for visualizing an accelerometer signal"""
    title = 'Accelerometer signals' if title is None else title
    labels = ['X', 'Y', 'Z']
    colors = ['blue', 'red', 'green']
    fig, axs = plt.subplots(3, 1, sharex=True, figsize=figsize)
    for i, ax in enumerate(axs.flat):
        ax.plot(signal[:, i], label=labels[i], color=colors[i])
        ax.legend(loc='upper right')
    fig.tight_layout()
    fig.subplots_adjust(top=0.92)
    fig.suptitle(title)
    plt.show()

In [None]:
# Plot some sample accelerometer signals from the training set
for signal in X_train[:2]:
    plot_signal(signal)

## Preprocessing

In [None]:
# Create a preprocessing pipeline
pre = Preprocess([
    Filter(window_size=10, method='median'),
    BinDownsample(bin_size=50, method='decimate'),
    Center()
])
pre.summary()

In [None]:
# Plot the same sample accelerometer signals, but preprocessed
for signal in pre.transform(X_train[:2]):
    plot_signal(signal)

In [None]:
# Histogram of ADL dataset gesture durations (preprocessed) (training set)
Xp_train = pre.fit_transform(X_train, verbose=True)
show_durations(Xp_train, bins=75, title=None)

In [None]:
# Apply the preprocessing pipeline to the other dataset splits
Xp_val, Xp_test = pre.transform(X_val, verbose=True), pre.transform(X_test, verbose=True)

## DTWKNN classifier

### Fitting the model

In [None]:
%%time
# Fit a DTWKNN classifier on the training data
clfs['knn'] = DTWKNN(k=1, radius=1)
clfs['knn'].fit(Xp_train, y_train)

### Evaluating the model

In [None]:
%%time
# Classify the validation examples and display accuracy and confusion matrix
results['knn']['validation'] = clfs['knn'].evaluate(Xp_val, y_val, labels=classes, n_jobs=-1)
show_results(*results['knn']['validation'], dataset='validation', labels=classes)

## Hidden Markov Model classifier

### Fitting the model

In [None]:
%%time

# Create HMMs to represent each class
#
# NumPy sometimes raises some errors as a result of instability during the Cholesky decomposition.
# According to issue #414 on Pomegranate's GitHub repository, this may be caused by:
# - Too many states in the HMMs
# - Too many dimensions in the input data, which leads to a large covariance matrix
# - Too few training examples
hmms = []
for c in tqdm(classes, desc='Training HMMs'):
    hmm = HMM(label=c, n_states=7, random_state=rng)
    hmm.set_random_initial()
    hmm.set_random_transitions()
    hmm.fit([Xp_train[i] for i, label in enumerate(y_train) if label == c])
    hmms.append(hmm)
    
# Fit a HMM classifier with the HMMs
clfs['hmm'] = HMMClassifier()
clfs['hmm'].fit(hmms)

### Evaluating the model

In [None]:
%%time
# Classify the validation examples and display accuracy and confusion matrix
results['hmm']['validation'] = clfs['hmm'].evaluate(Xp_val, y_val, labels=classes)
show_results(*results['hmm']['validation'], dataset='validation', labels=classes)

## LSTM classifier

### Fitting the model

In [None]:
%%time
from tensorflow.keras.optimizers import Adam
clfs['lstm'] = LSTMClassifier(epochs=50, batch_size=256, optimizer=Adam(learning_rate=0.0015), classes=classes)
hist = clfs['lstm'].fit(Xp_train, y_train, validation_data=(Xp_val, y_val), return_history=True)

In [None]:
# Summarize the model
clfs['lstm'].summary()

In [None]:
# Display accuracy and loss history during training
show_accuracy_history(hist)
show_loss_history(hist)

### Evaluating the model

In [None]:
%%time
results['lstm']['validation'] = clfs['lstm'].evaluate(Xp_val, y_val)
show_results(*results['lstm']['validation'], dataset='validation', labels=classes)