# Imports

In [547]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn import svm

# Settings

In [548]:
root_path = os.path.join(os.getcwd(), 'motionsense-dataset')
subjects_info_path = os.path.join(root_path, 'data_subjects_info.csv')

DATA_ROOT_PATH = os.path.join(root_path, 'A_DeviceMotion_data')
NUM_PARTICIPANTS = 24
EXPERIMENT_LABELS = {
    'sit': 0,
    'dws': 1,
    'jog': 2,
    'std': 3,
    'ups': 4,
    'wlk': 5
}

# Data Preparation

In [549]:
def get_experiment_label(dir_name):
    return EXPERIMENT_LABELS[dir_name[:3]]

In [550]:
def dataframe_from_dir(dir_name):
    '''
    Receives a single experiment dirname
    and returns a list of dataframes for each subject
    of the specified experiment
    '''
    dfs = []
    labels = []
    for i in range(1, NUM_PARTICIPANTS + 1):
        file_name = 'sub_' + str(i) + '.csv'
        file_path = os.path.join(DATA_ROOT_PATH, dir_name, file_name)
        df = pd.read_csv(file_path)
        dfs.append(df)
        labels.append(get_experiment_label(dir_name))
    return dfs, labels

In [551]:
def dataframes_from_files(data_dirs):
    '''
    Receives a list of directories
    and returns a list of dataframes for each subject
    and each experiment specified
    '''
    dfs = []
    labels = []
    for dir_name in data_dirs:
        dir_dfs, dir_labels = dataframe_from_dir(dir_name)
        dfs.extend(dir_dfs)
        labels.extend(dir_labels)
    return dfs, labels

In [552]:
def dataframes_to_feature_vectors(dfs, feature, num_cols):
    '''
    Receives a list of dataframes and a feature
    and returns a dataframe and a matrix
    where each column is a timestamp.
    Note that the number of columns (i.e.) timestamps
    is constant, and should refer to the minimal experiment.
    '''
    data_matrix = []
    
    for df in dfs:
        values = df[feature].head(num_cols).tolist()
        data_matrix.append(values)
    
    feature_df = pd.DataFrame(data_matrix)
    return feature_df, data_matrix

# Sit vs. Walk

In [553]:
train_dirs = ['sit_5', 'sit_13', 'wlk_7', 'wlk_8']
test_dirs = ['wlk_15']

In [554]:
train_dfs, train_labels = dataframes_from_files(train_dirs)
test_dfs, test_labels = dataframes_from_files(test_dirs)
dfs = train_dfs + test_dfs
num_cols = min(df.shape[0] for df in dfs) # Cut to minimum experiment length
features = dfs[0].columns.tolist()[1:]

### SVM

In [555]:
def run_SVM(train_set, train_labels, test_set, test_labels):
    '''
    Returns the 0-1 loss and the predicted labels
    '''
    
    classifier = svm.SVC()
    classifier.fit(train_set, train_labels)
    
    predicted_labels = []
    for sample in test_set:
        predicted_labels.append(classifier.predict([sample])[0])
    
    loss = sum(prediction != label for prediction, label in zip(predicted_labels, test_labels))
    loss /= len(test_labels)
    
    return predicted_labels, loss

### Run SVM for all features

In [556]:
scores = dict()

for feature in features:
    train_feature_df, train_set = dataframes_to_feature_vectors(train_dfs, feature, num_cols)
    test_feature_df, test_set = dataframes_to_feature_vectors(test_dfs, feature, num_cols)
    
    predictions, loss = run_SVM(train_set, train_labels, test_set, test_labels)
    scores[feature] = 1 - loss

In [557]:
scores

{'attitude.pitch': 0.9583333333333334,
 'attitude.roll': 0.7916666666666666,
 'attitude.yaw': 0.625,
 'gravity.x': 0.8333333333333334,
 'gravity.y': 0.9583333333333334,
 'gravity.z': 1.0,
 'rotationRate.x': 1.0,
 'rotationRate.y': 1.0,
 'rotationRate.z': 0.875,
 'userAcceleration.x': 0.20833333333333337,
 'userAcceleration.y': 0.9583333333333334,
 'userAcceleration.z': 0.625}

This is clearly not a clean result since we only test on 24 same-label samples.

# Combine all experiments

In [558]:
all_dirs = [dir for dir in os.walk(DATA_ROOT_PATH)][0][1]
all_dfs, all_labels = dataframes_from_files(all_dirs)

In [559]:
# Remove short experiments (< 900 timestamps)
short_indices = [i for i, df in enumerate(all_dfs) if df.shape[0] < 900]
for i in short_indices:
    del all_dfs[i]
    del all_labels[i]

num_cols =  min(df.shape[0] for df in all_dfs) # Cut to minimum experiment length

### Prepare data and test sets

In [560]:
all_size = len(all_dfs)
test_size = round(0.2 * all_size)
test_indices = np.random.choice(all_size, test_size, replace=False)
test_dfs = []
test_labels = []

for i in sorted(test_indices, reverse=True):
    test_dfs.append(all_dfs.pop(i))
    test_labels.append(all_labels.pop(i))

### Multiclass SVM

In [561]:
all_scores = dict()

for feature in features:
    train_feature_df, train_set = dataframes_to_feature_vectors(all_dfs, feature, num_cols)
    test_feature_df, test_set = dataframes_to_feature_vectors(test_dfs, feature, num_cols)
    
    predictions, loss = run_SVM(train_set, all_labels, test_set, test_labels)
    all_scores[feature] = 1 - loss

In [562]:
all_scores

{'attitude.pitch': 0.3623188405797102,
 'attitude.roll': 0.46376811594202894,
 'attitude.yaw': 0.5072463768115942,
 'gravity.x': 0.2753623188405797,
 'gravity.y': 0.30434782608695654,
 'gravity.z': 0.2753623188405797,
 'rotationRate.x': 0.4347826086956522,
 'rotationRate.y': 0.6521739130434783,
 'rotationRate.z': 0.37681159420289856,
 'userAcceleration.x': 0.17391304347826086,
 'userAcceleration.y': 0.30434782608695654,
 'userAcceleration.z': 0.24637681159420288}

# Deep learning solutions

In [563]:
import tensorflow as tf