In [1]:
import glob, os
import re

In [2]:
glob_list = []

#loop through subject folders and glob
for subject in range(25):
    glob_list.append(glob.glob(str(subject) + "\\[0-9]*.csv"))
    
#function to flatten glob
flatten = lambda l: [item for sublist in l for item in sublist]
glob_list = flatten(glob_list)

In [3]:
#make a list of tuples of hand pairings
lr_pairs = list(zip(*[iter(glob_list)]*2))

In [4]:
import pandas as pd

df_list = []

for pair in lr_pairs:
    df_left = pd.read_csv(pair[0], index_col=None).drop(['Unnamed: 0'], axis = 1)
    df_right = pd.read_csv(pair[1], index_col=None).drop(['Unnamed: 0'], axis = 1)
    
    #rename columns
    df_left = df_left.add_prefix('left')
    df_right = df_right.add_prefix('right')
    
    #merge
    df = pd.merge(df_left, df_right, left_on='leftTime', right_on='rightTime').drop('rightTime', axis = 1)

    #covert fist column to time object
    df['leftTime'] = pd.to_datetime(df['leftTime'].str[:-3], format = '%H:%M:%S.%f')
    
    #difference between rows
    df = df.diff().iloc[1:]
    df['leftTime'] = df['leftTime'].dt.total_seconds()
    
    #add sign and subject using regex of file name
    subject_sign = re.split(r'\\', re.findall('^[^_]+(?=_)', pair[0])[0])
    df.insert(loc = 0, column = 'Subject', value = subject_sign[0])
    df.insert(loc = 0, column = 'Sign', value = subject_sign[1])

    
    df_list.append(df)

In [5]:
sign_frame = pd.concat(df_list, axis=0, ignore_index=False)

In [6]:
mean_df = pd.DataFrame(dtype=float)
sign_list = []
for i in range(len(df_list)):
    label = df_list[i]['Sign'].iloc[0] +'-'+ df_list[i]['Subject'].iloc[0]
    sign_list.append(df_list[i]['Sign'].iloc[0])
    mean_df[label] = df_list[i].drop(columns=['Sign', 'Subject']).mean()

In [7]:
from sklearn.preprocessing import StandardScaler
mean_dfT = mean_df.T
Y = sign_list
X = mean_dfT.to_numpy()

# Scale data
scaler = StandardScaler()
scaler.fit(X)
X=scaler.transform(X)

In [8]:
# Split into train and test set
split_1 = int(0.84 * len(Y))
X_train = X[:split_1]
Y_train = Y[:split_1]
X_test = X[split_1:]
Y_test = Y[split_1:]

In [9]:
# LDA on means - 25% accuracy
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf = LinearDiscriminantAnalysis()
clf.fit(X_train, Y_train)
#clf.scalings_
clf.score(X_test,Y_test)
# use .scalings_ to get LDA eigenvector equivalents



0.25

In [10]:
# Accuracy increases to 34% when left hand is excluded
X_right = mean_dfT.iloc[:,83:]
scaler1 = StandardScaler()
scaler1.fit(X_right)
X_right=scaler1.transform(X_right)
# Split into train and test set
X_train_r = X_right[:split_1]
X_test_r = X_right[split_1:]
clf_r = LinearDiscriminantAnalysis()
clf_r.fit(X_train_r, Y_train)
#clf.scalings_
clf_r.score(X_test_r,Y_test)



0.3409090909090909

In [11]:
std_df = pd.DataFrame(dtype=float)
for i in range(len(df_list)):
    label = df_list[i]['Sign'].iloc[0] +'-'+ df_list[i]['Subject'].iloc[0]
    std_df[label] = df_list[i].drop(columns=['Sign', 'Subject']).std()

In [12]:
# 50% accuracy when std and mean are included
std_dfT = std_df.T
std_mean_df = mean_dfT.join(std_dfT, lsuffix='mean', rsuffix='std')
scaler1.fit(std_mean_df)
X_std_mean = scaler1.transform(std_mean_df)
X_std_mean_train = X_std_mean[:split_1]
X_std_mean_test = X_std_mean[split_1:]
clf_std_mean = LinearDiscriminantAnalysis()
clf_std_mean.fit(X_std_mean_train,Y_train)
clf_std_mean.score(X_std_mean_test,Y_test)



0.5

In [13]:
# 81% using mean and std when left hand is ignored
std_mean_df = mean_dfT.iloc[:,83:].join(std_dfT.iloc[:,83:], lsuffix='mean', rsuffix='std')
scaler1.fit(std_mean_df)
X_std_mean = scaler1.transform(std_mean_df)
X_std_mean_train = X_std_mean[:split_1]
X_std_mean_test = X_std_mean[split_1:]
clf_std_mean = LinearDiscriminantAnalysis()
clf_std_mean.fit(X_std_mean_train,Y_train)
clf_std_mean.score(X_std_mean_test,Y_test)



0.8181818181818182