# Library Imports

In [1]:
import sys
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Functions

In [2]:
def df_to_list(dataframe):
    ## Outputs to three lists ankle, hand, pocket and the tester 
    ## Assumed: The dataframe is for the same tester and it is ordered by 3 ankles, 3 hands, 3 pockets
    total_list = dataframe[['x_accel', 'y_accel', 'z_accel']].values.tolist()
    
    ankle = total_list[0:3]
    hand = total_list[3:6]
    pocket = total_list[6:9]
    tester = dataframe['tester'].tolist()[0:3] # array of size 3 so easy to plug into GaussianNB
    
    return ankle, hand, pocket, tester

In [3]:
def full_df_to_list(dataframe):
    ## Outputs full list regardless of where data was recorded
    ## Assumed: The dataframe is for the same tester
    total_list = dataframe[['x_accel', 'y_accel', 'z_accel']].values.tolist()
    tester = dataframe['tester'].tolist()

    return total_list, tester

In [4]:
# Runs the test 200 times and returns the average score
def average_score(model, x, y):
    score = 0
    tests = 200
    for i in range(tests):
        x_train, x_valid, y_train, y_valid = train_test_split(x, y)
        model.fit(x_train, y_train)
        score = score + model.score(x_valid, y_valid)
    return (score/tests).round(5)

In [5]:
def gaussian(x, y):
    model = GaussianNB()
    return average_score(model, x, y)

In [6]:
def polyfit(x, y):
    model = make_pipeline(
        PolynomialFeatures(degree=9, include_bias=True),
        LinearRegression(fit_intercept=False)
    )
    return average_score(model, x, y)

In [7]:
def neighbors(x, y):
    model = make_pipeline(
    StandardScaler(),
    KNeighborsClassifier(n_neighbors=9)
    )
    return average_score(model, x, y)

In [8]:
def decision(x, y):
    model = DecisionTreeClassifier(max_depth=8)
    return average_score(model, x, y)

In [9]:
def svc(x, y):
    model = make_pipeline(
    PCA(250),
    SVC(kernel='linear', C=2.0)
    )
    return average_score(model, x, y)

In [10]:
def mlp(x, y):
    model = MLPClassifier(solver='lbfgs', hidden_layer_sizes=())
    return average_score(model, x, y)

In [11]:
def triple(data):
    return [data['x_accel'], data['y_accel'], data['z_accel']]

# Data Manipulation

In [12]:
location = pd.DataFrame(['ankle', 'ankle', 'ankle', 'hand', 'hand', 'hand', 'pocket', 'pocket', 'pocket'], 
                        columns=['location'])

p1_data_mean = pd.read_csv('p1-mean.csv').join(location)
p1_data_std = pd.read_csv('p1-std.csv').join(location)
p2_data_mean = pd.read_csv('p2-mean.csv').join(location)
p2_data_std = pd.read_csv('p2-std.csv').join(location)
p3_data_mean = pd.read_csv('p3-mean.csv').join(location)
p3_data_std = pd.read_csv('p3-std.csv').join(location)
p4_data_mean = pd.read_csv('p4-mean.csv').join(location)
p4_data_std = pd.read_csv('p4-std.csv').join(location)

left_data_mean = pd.read_csv('left-mean.csv').join(location)
left_data_std = pd.read_csv('left-std.csv').join(location)
right_data_mean = pd.read_csv('right-mean.csv').join(location)
right_data_std = pd.read_csv('right-std.csv').join(location)

In [13]:
# Appends all user data into one array per type to do machine learning tests
data_mean = p1_data_mean
data_mean = data_mean.append(p2_data_mean)
data_mean = data_mean.append(p3_data_mean)
data_mean = data_mean.append(p4_data_mean)

data_std = p1_data_std
data_std = data_std.append(p2_data_std)
data_std = data_std.append(p3_data_std)
data_std = data_std.append(p4_data_std)

side_mean = left_data_mean
side_mean = side_mean.append(right_data_mean)
side_std = right_data_std
side_std = side_std.append(right_data_std)

# Stats Tests

In [14]:
# Groups data by location
data_ankle = data_mean[data_mean['location']=='ankle'].mean()
data_hand = data_mean[data_mean['location']=='hand'].mean()
data_pocket = data_mean[data_mean['location']=='pocket'].mean()

In [15]:
# ANOVA test to figure out if the means are different
anova = stats.f_oneway(triple(data_ankle), triple(data_hand), triple(data_pocket))
print(anova.pvalue)

0.4626254658191993


In [16]:
# Group data by user
data_p1 = data_mean[data_mean['tester']==1].mean()
data_p2 = data_mean[data_mean['tester']==2].mean()
data_p3 = data_mean[data_mean['tester']==3].mean()

In [17]:
# ANOVA test to figure out if the means are different
anova = stats.f_oneway(triple(data_p1), triple(data_p2), triple(data_p3))
print(anova.pvalue)

0.46475862442251603


In [18]:
# Group data by side
data_left = left_data_mean[left_data_mean['tester']==3].mean()
data_right = right_data_mean[right_data_mean['tester']==4].mean()

In [19]:
# ANOVA test to figure out if the means are different
anova = stats.f_oneway(triple(data_left), triple(data_right))
print(anova.pvalue)

0.7814905995850858


# Machine Learning Classfiers

In [20]:
# Runs each classifier with each test
g_data = []; p_data = []; n_data = []; d_data = []; s_data = []; m_data = []

g_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))
p_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))
n_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))
d_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))
s_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))
m_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['location']))

g_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))
p_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))
n_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))
d_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))
s_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))
m_data.append(gaussian(data_mean[['x_accel', 'y_accel', 'z_accel']], data_mean['tester']))

g_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))
p_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))
n_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))
d_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))
s_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))
m_data.append(gaussian(side_mean[['x_accel', 'y_accel', 'z_accel']], side_mean['tester']))

In [21]:
rows = ['location', 'tester', 'side']
data = np.array([rows, g_data, p_data, n_data, d_data, s_data, m_data])
data = data.transpose()

In [22]:
results =  pd.DataFrame(data, columns =['Test', 'Gaussian', 'Polyfit', 'Neighbors', 'Decision', 'SVC', 'MLP'])
results = results.set_index('Test')
results

Unnamed: 0_level_0,Gaussian,Polyfit,Neighbors,Decision,SVC,MLP
Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
location,0.50944,0.53889,0.51611,0.54,0.52833,0.52667
tester,0.91,0.91111,0.91167,0.91444,0.90444,0.90222
side,0.9,0.913,0.912,0.916,0.884,0.924
