# Human Activity Recognition using Inertial sensors and Neural Networks

**Elia Bonetto, Filippo Rigotto.**

Department of Information Engineering, University of Padova, Italy.

Human Data Analytics, a.y. 2018/2019

## Part 1 - Data preprocessing

In [0]:
from IPython.display import Image, clear_output
import os
from google.colab import drive
drive.mount('/content/drive/')
clear_output()
!ls /content/drive/My\ Drive/hda-project
os.chdir("/content/drive/My Drive/hda-project")

In [0]:
!pip install transforms3d
clear_output()

import json
import logging
import math

from transforms3d.axangles import axangle2mat

import h5py
import numpy as np
import scipy as sp
import scipy.io

import pandas as pd
pd.set_option('display.precision',3)
pd.set_option('display.float_format', '{:0.3f}'.format)

from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16,10)
mpl.rcParams['axes.grid'] = True

Let's start from the [original datasets](https://www.dlr.de/kn/desktopdefault.aspx/tabid-8500/14564_read-36508/) provided as MATLAB `mat` files and process data items to have the final dataset all the network models will work on.

In [0]:
datasets = [
    'dataset/ARS_DLR_DataSet.mat',
    'dataset/ARS_DLR_DataSet_V2.mat',
    'dataset/ARS_DLR_Benchmark_Data_Set.mat'
]

Classes  to detect are reduced from the original 17 down to 8.

Furthermore, transitions will not be detected, so the final number of used labels is 7.

In [0]:
labels = ['RUNNING', 'WALKING', 'JUMPING','STNDING','SITTING', 'XLYINGX', 'FALLING',
    'WALKUPS', 'WALKDWS',
    'JUMPVRT', 'JUMPFWD', 'JUMPBCK', 
    'TRANSUP', 'TRANSDW', 'TRNSACC', 'TRNSDCC', 'TRANSIT'
]

# full map for all 17 classes
map_encode = { label:i for i,label in enumerate(labels) }

# map to squeeze down to 8 classes
map_encode_8 = {
    0:0,   1:1,  2:2,  3:3,  4:4,  5:5,  6:6, # untouched
    7:1,   8:1,                  # walking up and downstairs = walking
    9:2,  10:2, 11:2,            # jumping in place, forward and backward = jumping
    12:7, 13:7, 14:7, 15:7, 16:7 # all transitions
}

# useful for plots
map_decode_8 = {
    0: 'running',
    1: 'walking',
    2: 'jumping',
    3: 'standing',
    4: 'sitting',
    5: 'lying',
    6: 'falling',
    7: 'transition'
}

The dataset provides the list of labels ("activities") for each segment of tracked data, along with index bounds (start and stop) for each item of the list.

Outside these ranges, data is considered to be marked as transitions between classes.

This structure is flatten to a single list.

**TODO** write about holes and mislabeling errors in the report 

In [0]:
def flatten_labels(labels, bounds):
    """Builds a single labels array from labels and bounds."""
    start = bounds[0::2]-1 # even positions
    stop  = bounds[1::2]   # -1: numbering starts from 1
    # start is included, stop is excluded
    
    # TODO WRITE ON REPORT HOLES AND MISTIMING!!!!!!
    res = np.ones(bounds[-1], dtype=np.uint8) * map_encode['TRANSIT']
    for i, lab in enumerate(labels):
        if i+1<len(stop):
            if stop[i] > start[i+1]+1:
                print('Time error: {} > {}'.format(stop[i],start[i+1]))
                start[i+1] = stop[i]
        res[start[i] : stop[i]] = lab
    return res

Datasets contain IMU measurements referred to the sensor frame, but also provides the attitude/cosine matrix to express the measurements w.r.t. the body frame.

In [0]:
def convert_body_frame(imu_data, attitude_matrix):
    """Converts sensor frames in a data item to body frames through the attitude matrix."""
    C = attitude_matrix[1:].reshape(3,3).T

    result = imu_data.copy()
    result[1:4]  = np.dot(C, imu_data[1:4].T)  # acc
    result[4:7]  = np.dot(C, imu_data[4:7].T)  # gyro
    result[7:10] = np.dot(C, imu_data[7:10].T) # mag
    return result

For each test in a dataset, extract the relevant data and then flat the labels to a single long list.

The rest of the processing is postponed to operate on the full dataset instead of working on single tests.

In [0]:
def process_single_test(dataset, key):
    imu_data, attitude_matrices, activities, activities_bounds = dataset[key][0]

    # throwing away useless nested arrays
    activities = np.array([ act[0] for act in activities[0] ])
    activities_bounds = activities_bounds[0]
    
    # integrity checks on time and length
    assert([ imu_data[i][0] == attitude_matrices[i][0] for i in range(len(imu_data)) ])
    assert(len(activities_bounds) == 2*len(activities))

    # change labels to int numbers
    activities = np.array([ map_encode[a] for a in activities ])

    # get a single array of labels instead of labels + bounds
    activities_flat = flatten_labels(activities, activities_bounds)
    #print(len(activities_flat[activities_flat == 0]))
    assert(len(imu_data) == len(activities_flat))

    return imu_data, attitude_matrices, activities_flat

Numpy arrays are used to store all the collected data from every test in each dataset.

In [0]:
imu_sensor = np.empty((0,10))
attitude_mat = np.empty((0,10))
activities = np.empty((0,1), dtype=np.uint8)

# loop datasets
for ds in datasets:
    dataset = scipy.io.loadmat(ds)
    keys = [ k for k in dataset if '__' not in k ]
    # loop keys=tests
    for test in keys:
        print('Loading {}:'.format(test).ljust(52,' '), end='')
        imu, mat, act = process_single_test(dataset, test)
        imu_sensor = np.append(imu_sensor, imu, axis=0)
        attitude_mat = np.append(attitude_mat, mat, axis=0)
        activities = np.append(activities, act)
        
        print('{} elements'.format(imu.shape[0]).rjust(15,' '))

assert(imu_sensor.shape==attitude_mat.shape)
num_data = imu_sensor.shape[0]
clear_output()
print('Read {} records'.format(num_data))
print('IMU data shape:   {}'.format(imu_sensor.shape))
print('Attitudes shape:  {}'.format(attitude_mat.shape))
print('Activities shape: {}'.format(activities.shape))

We save this checkpoint, even if we don't use it.

In [0]:
with h5py.File('dataset/ARS-raw.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor)
    h5f.create_dataset('attitudes',  data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

In [0]:
# optional reload if messing up below
#with h5py.File('dataset/ARS-raw.h5','r') as h5f:
#    imu_sensor = h5f['imu_sensor'][:]
#    attitude_mat = h5f['attitudes'][:]
#    activities = h5f['activities'][:]

Performing some checks:

- Displaying minutes for each activity

In [0]:
act,cnt = np.unique(activities, return_counts=True)
cnt = cnt / 100 / 60 # 100 Hz, 60 seconds
act = np.array([labels[a].lower() for a in act])

act = np.append(act,'total')
cnt = np.append(cnt, sum(cnt))

df = pd.DataFrame(np.array([act,np.round(cnt)]).T, columns=['Activity','Time (min)'])
df.index = [''] * len(df) # blank index
display(df)

#TODO other if needed


More processing on the whole dataset:
- Reduce the number of tracked activities to 8
- Remove of items labelled as transitions
- Conversion of measurements to the body frame
- Remove time columns (in both measurements and attitude matrixes)

In [0]:
# remap activities
print('Old labels: {}'.format(np.unique(activities)))
activities = np.array([ map_encode_8[act] for act in activities])
print('New labels: {}'.format(np.unique(activities)))

In [0]:
# remove transitions
transit_label = map_encode_8[map_encode['TRANSIT']]
transit_number = sum(activities == transit_label)
print('Transit label is {}, found {} elements'.format(transit_label, transit_number))

num_data = len(imu_sensor)
print('Old num data: {}'.format(num_data))
imu_sensor = imu_sensor[activities != transit_label]
attitude_mat = attitude_mat[activities != transit_label]
activities = activities[activities != transit_label]

assert(imu_sensor.shape[0] == num_data-transit_number)
assert(attitude_mat.shape[0] == num_data-transit_number)
assert(activities.shape[0] == num_data-transit_number)

num_data = len(imu_sensor)
print('New num data: {}'.format(num_data))

num_labels = len(np.unique(activities))
print('New num labels: {}'.format(num_labels))

In [0]:
# convert from sensor frame to body frame
imu_body = imu_sensor.copy()
for i, imu in enumerate(imu_sensor):
    imu_body[i] = convert_body_frame(imu, attitude_mat[i])

In [0]:
# remove time column from data
imu_sensor = imu_sensor[:,1:]
imu_body = imu_body[:,1:]
attitude_mat = attitude_mat[:,1:]

Performing some checks:

- Displaying minutes for each activity

In [0]:
act,cnt = np.unique(activities, return_counts=True)
cnt = cnt / 100 / 60
act = np.array([map_decode_8[a] for a in act])

act = np.append(act,'total')
cnt = np.append(cnt, sum(cnt))

df = pd.DataFrame(np.array([act,np.round(cnt)]).T, columns=['Activity','Time (min)'])
df.index = [''] * len(df)
display(df)

#TODO other if needed

Group by activity and organize data in overlapping windows (overlapping ratio is regulated by `stride_len`).


In [0]:
window_len = 128
stride_len = round(window_len / 2)
x, y = [], []

for activity in np.unique(activities):
    tmp = imu_sensor[activities == activity]
    if len(tmp) % stride_len != 0:
        # append zeroes to fill the window, if necessary
        windows_inside = math.ceil( (len(tmp) - window_len) / stride_len )
        windows_space = windows_inside * stride_len
        rest = windows_space - len(tmp) + window_len
        tmp = np.append(tmp, np.zeros((rest,imu_sensor.shape[1])), axis=0)
        #tmp = np.append(tmp, [[0]*9]*int((math.floor((len(tmp)-window_len)/stride_len) + 1)*stride_len-len(tmp)+window_len), axis = 0)
        
    # exlude padded windows
    for i in range(0, len(tmp)-window_len, stride_len):
        x.extend([tmp[i:i+window_len]])
        y.extend([activity])

x = np.array(x) # TODO move to right pos
y = np.array(y)
print(x.shape)
print(len(y))

Save again the entire dataset, even if it will not be used directly. 

In [0]:
with h5py.File('dataset/ARS-full.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor)
    h5f.create_dataset('imu_body', data=imu_body)
    h5f.create_dataset('attitudes', data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

Pre-compute and save a 80/20 train/test split of the dataset, using body-referenced data and corresponding labels. 

This is the final version used for training all models.

`random_state` is the seed of the PRNG.

In [0]:
X_train, X_test, Y_train, Y_test = \
    train_test_split(x, y, test_size=0.2, random_state=1, stratify = y) 

print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(len(Y_train)))
print("X_test shape:  " + str(X_test.shape))
print("Y_test shape:  " + str(len(Y_test)))

In [0]:
with h5py.File('dataset/ARS-train-test.h5','w') as h5f:
    h5f.create_dataset('X_train', data=X_train)
    h5f.create_dataset('X_test',  data=X_test)
    h5f.create_dataset('Y_train', data=Y_train)
    h5f.create_dataset('Y_test',  data=Y_test)

Data normalization using training set's mean and std.

May improve results (or may not, this is a thing to test).

In [0]:
# TODO this surely works when data has 3D shape, not 2D. merge framing before this
for i in range(X_train.shape[-1]):
    tmp_train = X_train[:,:,i].flatten()
    tmp_test = X_test[:,:,i].flatten()
    mean = np.mean(tmp_train)
    std = np.std(tmp_train)
    X_train[:,:,i] = np.reshape((tmp_train - mean)/std, (X_train.shape[0], X_train.shape[1]))
    X_test[:,:,i] =  np.reshape((tmp_test - mean)/std, (X_test.shape[0], X_test.shape[1]))

In [0]:
# checks
for i in range(X_train.shape[-1]):
    tmp_train = X_train[:,:,i].flatten()
    print(np.mean(tmp_train))
    print(np.std(tmp_train))
    print()

In [0]:
with h5py.File('dataset/ARS-train-test-norm.h5','w') as h5f:
    h5f.create_dataset('X_train', data=X_train)
    h5f.create_dataset('X_test',  data=X_test)
    h5f.create_dataset('Y_train', data=Y_train)
    h5f.create_dataset('Y_test',  data=Y_test)

Data augmentation: new data is created:
- By applying a rotation of a random angle against a random axis, 
- B
y permutation of data in a window (currently unimplemented).

In [0]:
def rotate_random(data):
    # axis to apply the rotation to
    axis = np.random.uniform(low=-1, high=1, size = 3)
    # rotation entity
    angle = np.random.uniform(low=-np.pi, high=np.pi)
        
    
    result = data.copy()
    for i in range(0,len(data),3):
        result[i:i+3] = np.matmul(data[i:i+3], axangle2mat(axis,angle))

    return result

def rotate_values(values):
    for i in range(values.shape[0]):
        for j in range(values.shape[1]):
            values[i,j] = rotate_random(values[i,j])
    return values

def permute_values(values):
    # currently not implemented
    pass

In [0]:
# TODO check if only for training or for whole dataset
permute = False
rotate = True
augm_factor = 0.35

count = []
for activity in np.unique(Y_test):
    count.append(len(x[y == activity]))
percentage_before = count/np.sum(count,axis=0)
most_repr = int(max(count)*augm_factor)

x_add = []
y_add = []
for activity in range(len(count)):
    augment_number = most_repr - count[activity]
    if augment_number > 0:
        print(f"Class {activity} has {count[activity]} entries and needs to be augmented with {augment_number} values")
        values = x[np.array(y) == activity][np.random.choice(range(count[activity]),size=augment_number)]
        if rotate:
            values = rotate_values(values)
        if permute:
            permute_values(values)
        x_add.extend(values)
        print(np.array(x_add).shape)
        y_add.extend([activity]*values.shape[0])
            
    else:
        print(f"Class {activity} has enough values")

In [0]:
#if necessary
#X_train, X_test, Y_train, Y_test = \
 #   train_test_split(x, y, test_size=0.2, random_state=1, stratify=y) 

In [0]:
#with h5py.File('dataset/ARS-train-test-norm-augm.h5','w') as h5f:
#    h5f.create_dataset('X_train', data=X_train)
#    h5f.create_dataset('X_test',  data=X_test)
#    h5f.create_dataset('Y_train', data=Y_train)
#    h5f.create_dataset('Y_test',  data=Y_test)

Checks

In [0]:
#TODO

In [0]:
!git status

In [0]:
!git add HAR-Preprocessing.ipynb;

In [0]:
!git config --global user.name 'Elia Bonetto'; git config user.email 'eliabntt94@gmail.com'

In [0]:
!git commit -m "Framing"

In [0]:
!git push origin master