In [37]:
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns

dataset_dir = Path('UCI HAR Dataset')

# Loading Data

## Total Body acceleration

In [21]:
train_total_acc_x = pd.read_csv(dataset_dir/'train/Inertial Signals/total_acc_x_train.txt', 
                          header=None, delim_whitespace=True).values
train_total_acc_y = pd.read_csv(dataset_dir/'train/Inertial Signals/total_acc_y_train.txt', 
                          header=None, delim_whitespace=True).values
train_total_acc_z = pd.read_csv(dataset_dir/'train/Inertial Signals/total_acc_z_train.txt', 
                          header=None, delim_whitespace=True).values

test_total_acc_x = pd.read_csv(dataset_dir/'test/Inertial Signals/total_acc_x_test.txt', 
                          header=None, delim_whitespace=True).values
test_total_acc_y = pd.read_csv(dataset_dir/'test/Inertial Signals/total_acc_y_test.txt', 
                          header=None, delim_whitespace=True).values
test_total_acc_z = pd.read_csv(dataset_dir/'test/Inertial Signals/total_acc_z_test.txt', 
                          header=None, delim_whitespace=True).values

In [22]:
train_total_acc_x.shape

(7352, 128)

In [23]:
test_total_acc_x.shape

(2947, 128)

## Estimated Body acceleration

In [25]:
train_body_acc_x = pd.read_csv(dataset_dir/'train/Inertial Signals/body_acc_x_train.txt', 
                          header=None, delim_whitespace=True).values
train_body_acc_y = pd.read_csv(dataset_dir/'train/Inertial Signals/body_acc_y_train.txt', 
                          header=None, delim_whitespace=True).values
train_body_acc_z = pd.read_csv(dataset_dir/'train/Inertial Signals/body_acc_z_train.txt', 
                          header=None, delim_whitespace=True).values

test_body_acc_x = pd.read_csv(dataset_dir/'test/Inertial Signals/body_acc_x_test.txt', 
                          header=None, delim_whitespace=True).values
test_body_acc_y = pd.read_csv(dataset_dir/'test/Inertial Signals/body_acc_y_test.txt', 
                          header=None, delim_whitespace=True).values
test_body_acc_z = pd.read_csv(dataset_dir/'test/Inertial Signals/body_acc_z_test.txt', 
                          header=None, delim_whitespace=True).values

In [26]:
train_body_acc_x.shape

(7352, 128)

In [27]:
test_body_acc_x.shape

(2947, 128)

## Triaxial Angular velocity from the gyroscope

In [29]:
train_gyro_x = pd.read_csv(dataset_dir/'train/Inertial Signals/body_gyro_x_train.txt', 
                          header=None, delim_whitespace=True).values
train_gyro_y = pd.read_csv(dataset_dir/'train/Inertial Signals/body_gyro_y_train.txt', 
                          header=None, delim_whitespace=True).values
train_gyro_z = pd.read_csv(dataset_dir/'train/Inertial Signals/body_gyro_z_train.txt', 
                          header=None, delim_whitespace=True).values

test_gyro_x = pd.read_csv(dataset_dir/'test/Inertial Signals/body_gyro_x_test.txt', 
                          header=None, delim_whitespace=True).values
test_gyro_y = pd.read_csv(dataset_dir/'test/Inertial Signals/body_gyro_y_test.txt', 
                          header=None, delim_whitespace=True).values
test_gyro_z = pd.read_csv(dataset_dir/'test/Inertial Signals/body_gyro_z_test.txt', 
                          header=None, delim_whitespace=True).values

In [30]:
train_gyro_x.shape

(7352, 128)

In [31]:
test_gyro_x.shape

(2947, 128)

## Engineered features

In [40]:
train_eng = pd.read_csv(dataset_dir/'train/X_train.txt', 
                        header=None, delim_whitespace=True).values

test_eng = pd.read_csv(dataset_dir/'test/X_test.txt', 
                       header=None, delim_whitespace=True).values

In [41]:
train_eng.shape

(7352, 561)

In [42]:
test_eng.shape

(2947, 561)

## Subjects

In [35]:
train_subjects = pd.read_csv(dataset_dir/'train/subject_train.txt', delim_whitespace=True, header=None).values

train_subjects.shape

(7352, 1)

In [36]:
test_subjects = pd.read_csv(dataset_dir/'test/subject_test.txt', delim_whitespace=True, header=None).values

test_subjects.shape

(2947, 1)

## Labels

There are 6 types os activites to be recognized in the dataset:

1. WALKING
2. WALKING_UPSTAIRS
3. WALKING_DOWNSTAIRS
4. SITTING
5. STANDING
6. LAYING

In [68]:
train_labels = pd.read_csv(dataset_dir/'train/y_train.txt', delim_whitespace=True, header=None).values

train_labels.shape

(7352, 1)

In [69]:
test_labels = pd.read_csv(dataset_dir/'test/y_test.txt', delim_whitespace=True, header=None).values

test_labels.shape

(2947, 1)

# 2. Data transformation

The sampled signals will be transformed to a 2D matrix where each column represents a different feature and each row represents the subsequent timestep value of each feature. So, the final array will have indexes representing [sample, timestep, feature].

In [64]:
def group_signals(signals_list):
    signals_set = list()
    for signal in signals_list:
        signals_set.append(signal)
    signals_set = np.dstack(signals_set)
    return signals_set

In [66]:
train_signals = [train_total_acc_x, train_total_acc_y, train_total_acc_z,
                 train_body_acc_x, train_body_acc_y, train_body_acc_z,
                 train_gyro_x, train_gyro_y, train_gyro_z]

train_set_signals = group_signals(train_signals)
train_set_signals.shape

(7352, 128, 9)

In [67]:
test_signals = [test_total_acc_x, test_total_acc_y, test_total_acc_z,
                 test_body_acc_x, test_body_acc_y, test_body_acc_z,
                 test_gyro_x, test_gyro_y, test_gyro_z]
test_set_signals = group_signals(test_signals)
test_set_signals.shape

(2947, 128, 9)

# 3. Model training

## 3.1. Model 1: WiSARD

This model uses Thermometer encoding and the enginnered features to make the prediction.

In [None]:
from thermometer_encoder import ThermometerEncoder

enc = ThermometerEncoder()