# Human Activity Recognition using Inertial sensors and Neural Networks

Elia Bonetto, Filippo Rigotto. 

Deptartment of Information Engineering, University of Padova, Italy.

Human Data Analytics, a.y. 2018/2019

## Part 1 - Data preprocessing

In [0]:
from IPython.display import Image, clear_output
import os
from google.colab import drive
drive.mount('/content/drive/')
clear_output()
!ls /content/drive/My\ Drive/hda-project
os.chdir("/content/drive/My Drive/hda-project")

In [0]:
import json
import logging

import h5py
import numpy as np
import scipy as sp
import scipy.io

import pandas as pd
pd.set_option('display.precision',3)
pd.set_option('display.float_format', '{:0.3f}'.format)

from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16,10)
mpl.rcParams['axes.grid'] = True

Here we start from the [original datasets](https://www.dlr.de/kn/desktopdefault.aspx/tabid-8500/14564_read-36508/) and process data items to have the final dataset all our net models will work on.

In [0]:
datasets = [
    'dataset/ARS_DLR_DataSet.mat',
    'dataset/ARS_DLR_DataSet_V2.mat',
    'dataset/ARS_DLR_Benchmark_Data_Set.mat'
]

We reduce the labels to detect from the original 17 down to 8.

Furthermore, we don't detect transitions, so the final number of labels we work with is 7.

In [0]:
labels = ['RUNNING', 'WALKING', 'JUMPING','STNDING','SITTING', 'XLYINGX', 'FALLING',
    'WALKUPS', 'WALKDWS',
    'JUMPVRT', 'JUMPFWD', 'JUMPBCK', 
    'TRANSUP', 'TRANSDW', 'TRNSACC', 'TRNSDCC', 'TRANSIT'
]

map_encode = { label:i for i,label in enumerate(labels) }

map_encode_8 = {
    0:0,   1:1,  2:2,  3:3,  4:4,  5:5,  6:6,
    7:1,   8:1, # walking up and downstairs = walking
    9:2,  10:2, 11:2, # jumping in place, forward and backward = jumping
    12:7, 13:7, 14:7, 15:7, 16:7 # all transitions
}

map_decode_8 = {
    0: 'running',
    1: 'walking',
    2: 'jumping',
    3: 'standing',
    4: 'sitting',
    5: 'lying',
    6: 'falling',
    7: 'transition'
}

num_classes = len(map_encode)
print("Num original classes: {}".format(num_classes))

The dataset provides the list of labels ("activities") for each segment of tracked data, along with index bounds for each item of the list. We flat this struct to a single list.

In [0]:
def flatten_labels(labels, bounds):
    """Builds a single labels array from labels and bounds."""
    start = bounds[0::2]-1 # even positions
    stop  = bounds[1::2]   # -1: numbering starts from 1

    # start is included, stop is excluded
    # TODO WRITE ON REPORT HOLES AND MISTIMING!!!!!!
    res = np.ones(bounds[-1], dtype=np.uint8) * map_encode['TRANSIT']
    for i, lab in enumerate(labels):
      if i+1<len(stop):
        if stop[i] > start[i+1]+1:
          start[i+1] = stop[i]
      res[start[i] : stop[i]] = lab
    return res

The dataset contains IMU measurements referred to the sensor frame, but also provides the attitude/cosine matrix to express the measurements w.r.t. the body frame.

In [0]:
def convert_body_frame(imu_data, attitude_matrix):
    """Converts sensor frames in a data item to body frames through the attitude matrix."""
    C = attitude_matrix[1:].reshape(3,3).T

    result = imu_data.copy()
    result[1:4]  = np.dot(C, imu_data[1:4].T)  # acc
    result[4:7]  = np.dot(C, imu_data[4:7].T)  # gyro
    result[7:10] = np.dot(C, imu_data[7:10].T) # mag
    return result

For each test in a dataset, we extract the relevant data, and then flat the labels to a single long list.

The rest of the processing is postponed to operate on the full dataset.

In [0]:
summacumlaude = 0
def process_single_test(dataset, key, summacumlaude):
    imu_data, attitude_matrices, activities, activities_bounds = dataset[key][0]

    # throwing away useless nested arrays
    activities = np.array([ act[0] for act in activities[0] ])
    activities_bounds = activities_bounds[0]
    
    # integrity checks on time and length
    assert([ imu_data[i][0] == attitude_matrices[i][0] for i in range(len(imu_data)) ])
    assert(len(activities_bounds) == 2*len(activities))

    # change labels to int numbers
    activities = np.array([ map_encode[a] for a in activities ])

    # get a single array of labels instead of labels + bounds
    activities_flat = flatten_labels(activities, activities_bounds)
    #print(len(activities_flat[activities_flat == 0]))
    assert(len(imu_data) == len(activities_flat))

    return imu_data, attitude_matrices, activities_flat

We use numpy arrays to store all the collected data from every test in each dataset.

In [0]:
imu_sensor = np.empty((0,10))
attitude_mat = np.empty((0,10))
activities = np.empty((0,1), dtype=np.uint8)

# loop datasets
for ds in datasets:
    dataset = scipy.io.loadmat(ds)
    keys = [ k for k in dataset if '__' not in k ]
    # loop keys=tests
    for test in keys:
        print('Loading {}:'.format(test).ljust(52,' '), end='')
        imu, mat, act = process_single_test(dataset, test, summacumlaude)
        imu_sensor = np.append(imu_sensor, imu, axis=0)
        attitude_mat = np.append(attitude_mat, mat, axis=0)
        activities = np.append(activities, act)
        
        print('{} elements'.format(imu.shape[0]).rjust(15,' '))

assert(imu_sensor.shape==attitude_mat.shape)
num_data = imu_sensor.shape[0]
clear_output()
print('Read {} records'.format(num_data))
print('IMU data shape:   {}'.format(imu_sensor.shape))
print('Attitudes shape:  {}'.format(attitude_mat.shape))
print('Activities shape: {}'.format(activities.shape))

We save this checkpoint, even if we don't use it.

In [0]:
with h5py.File('dataset/ARS-raw.h5','w') as h5f:
    h5f.create_dataset('imu_sensor', data=imu_sensor)
    h5f.create_dataset('attitudes',  data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

In [0]:
# optional reload if messing up below
#with h5py.File('dataset/ARS-raw.h5','r') as h5f:
#    imu_sensor = h5f['imu_sensor'][:]
#    attitude_mat = h5f['attitudes'][:]
#    activities = h5f['activities'][:]

Checks

In [0]:
act,cnt = np.unique(activities, return_counts=True)
cnt_sum = sum(cnt)
cnt = cnt / 100 / 60 # 100 Hz, 60 seconds
act = np.array([labels[a].lower() for a in act])

act = np.append(act,'total')
cnt = np.append(cnt, cnt_sum)

df = pd.DataFrame(np.array([act,cnt]).T, columns=['Activity','Time (min)'])
df.index = [''] * len(df) # blank index
display(df)

#TODO other if needed


More processing:
- reduce the number of tracked activities to 8
- removal of items labelled as transitions
- conversion of measurements to the body frame
- removal of time column (in both measurements and attitude matrixes)

In [0]:
# remap activities
print('Old labels: {}'.format(np.unique(activities)))
activities = np.array([ map_encode_8[act] for act in activities])
print('New labels: {}'.format(np.unique(activities)))

In [0]:
# remove transitions
transit_label = map_encode_8[map_encode['TRANSIT']]
transit_number = sum(activities == transit_label)
print('Transit label is {}, found {} elements'.format(transit_label, transit_number))

num_data = len(imu_sensor)
print('Old num data: {}'.format(num_data))
imu_sensor = imu_sensor[activities != transit_label]
assert(imu_sensor.shape[0] == num_data-transit_number)
attitude_mat = attitude_mat[activities != transit_label]
assert(attitude_mat.shape[0] == num_data-transit_number)
activities = activities[activities != transit_label]
assert(activities.shape[0] == num_data-transit_number)

num_data = len(imu_sensor)
print('New num data: {}'.format(num_data))

num_labels = len(np.unique(activities))
print('New num labels: {}'.format(num_labels))

In [0]:
# convert from sensor frame to body frame
imu_body = imu_sensor.copy()
for i, imu in enumerate(imu_sensor):
    imu_body[i] = convert_body_frame(imu, attitude_mat[i])

In [0]:
# remove time column from data
imu_sensor = imu_sensor[:,1:]
imu_body = imu_body[:,1:]
attitude_mat = attitude_mat[:,1:]

Checks

In [0]:
act,cnt = np.unique(activities, return_counts=True)
cnt_sum = sum(cnt)
cnt = cnt / 100 / 60
act = np.array([map_decode_8[a] for a in act])

act = np.append(act,'total')
cnt = np.append(cnt, cnt_sum)

df = pd.DataFrame(np.array([act,cnt]).T, columns=['Activity','Time (min)'])
df.index = [''] * len(df)
display(df)

#TODO other if needed

Framing


In [0]:
import math

In [0]:
window_len = 128
stride_len = window_len / 2
x, y = [], []

for activity in np.unique(activities):
  tmp = imu_sensor[activities == activity]
  if len(imu_sensor[activities == activity]) % stride_len != 0:
    tmp = np.append(tmp,[[0]*9]*int((math.floor((len(tmp)-window_len)/stride_len) + 1)*stride_len-len(tmp)+window_len), axis = 0)
    #exluding padded arrays
    x.extend([tmp[i:i+int(window_len)] for i in range(0,len(tmp)-window_len,int(stride_len))])
    y.extend([activity for i in range(0,len(tmp)-window_len,int(stride_len))])


In [0]:
x = np.array(x)

In [0]:
print(x.shape)
print(len(y))

Normalization

In [0]:
#TODO

Checks

In [0]:
#TODO

We now save the entire dataset, even if we won't use this directly. 

In [0]:
with h5py.File('dataset/ARS-full.h5','w') as h5f:
    h5f.create_dataset('imu_sens', data=imu_sensor)
    h5f.create_dataset('imu_body', data=imu_body)
    h5f.create_dataset('attitudes', data=attitude_mat)
    h5f.create_dataset('activities', data=activities)

We pre-compute and save a 80/20 train/test split of the dataset, using body-referenced data and corresponding labels. 

This is the final version we will use for training all our models.

`random_state` is the seed of the PRNG.

In [0]:
X_train, X_test, Y_train, Y_test = \
    train_test_split(imu_body, activities, test_size=0.2, random_state=1)

print("IMU shape:     " + str(imu_body.shape))
print("Labels shape:  " + str(activities.shape))
print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(Y_train.shape))
print("X_test shape:  " + str(X_test.shape))
print("Y_test shape:  " + str(Y_test.shape))

In [0]:
with h5py.File('dataset/ARS-train-test.h5','w') as h5f:
    h5f.create_dataset('X_train', data=X_train)
    h5f.create_dataset('X_test',  data=X_test)
    h5f.create_dataset('Y_train', data=Y_train)
    h5f.create_dataset('Y_test',  data=Y_test)

**Done**. Move to part 2 to see DL in action.

In [0]:
!git status

In [0]:
!git add HAR-Preprocessing.ipynb;

In [0]:
!git commit -m "Framing"

In [0]:
!git config --global user.name 'Elia Bonetto'; git config user.email 'eliabntt94@gmail.com'

In [0]:
!git push origin master