In [3]:

def get_gesture_counts(participant):
	gestures = {"drag":0,
				"draw":0,
				"ui":0,
				"tap":0,
				"doubletap":0,
				"tripletap":0,
				"hold":0,
				"pinch":0,
				"rev_pinch":0,
				"lasso":0,
				"box":0,
				"voice":0,
				"other":0}

	#Get all the events
	events = []
	for task in participant["tasks"].keys():
		events.extend(participant["tasks"][task])

	for event in events:
	 	if event["event_type"] == "tap":
	 		#Taps need special handling, as they might be double, triple, or hold
	 		if event["hold"]:
	 			gestures["hold"] += 1
	 		elif event["count"] == 2:
	 			gestures["doubletap"] += 1
	 		elif event["count"] == 3:
	 			gestures["tripletap"] += 1
	 		else:
	 			gestures["tap"] += 1
		elif event["event_type"] == "drag":
			#Drags might be drag or might be draw
			if event["draw"] is None:
				gestures["drag"] += 1
			else:
				gestures["draw"] += 1
		elif event["event_type"] == "pinch":
			#pinch can be pinch or reverse
			if event["reverse"]:
				gestures["rev_pinch"] += 1
			else:
				gestures["pinch"] += 1
		elif event["event_type"] == "voice_command":
			gestures["voice"] += 1
		elif event["event_type"] == "ui":
			gestures["ui"] += 1
		elif event["event_type"] == "memo":
			#Don't do anything with memos
			pass
		elif event["event_type"] == "lasso":
			gestures["lasso"] += 1
		elif event["event_type"] == "box_select":
			gestures["box"] += 1
		elif event["event_type"] == "other":
			gestures["other"] += 1		
		else:
			#This is an error, some event type wasn't handled
			print event["event_type"]

	return gestures

In [4]:
def average_counts(counts):
	participant_count = len(counts.keys())
	totals = {}
	#Collect the total counts
	for p in counts.keys():
		p_count = counts[p]
		for gesture in p_count.keys():
			if gesture in totals.keys():
				totals[gesture] += p_count[gesture]
			else:
				totals[gesture] = p_count[gesture]
	#Average across participants
	for gesture in totals.keys():
		totals[gesture] = totals[gesture]/float(participant_count)

	return totals

In [5]:
import all_data_handler
import pandas

In [349]:
adh = all_data_handler.UserData()

What I actually want to do here is have a dataframe where each user is also tagged with what condition they were in, so I can run ANOVA on that, with the condition as my categorical variable. 

In [350]:
data = []
for condition in adh.conditionMap.keys():
    counts = adh.applyCondition(get_gesture_counts, condition)
    #Convert to a list of dicts with user as a parameter of the dictionary
    #First put the user ID and the condition in the data
    for entry in counts:
        counts[entry]["user"] = entry
        counts[entry]["condition"] = condition
        #Tag as a multi-robot or more-or-less single robot condition
        if condition == "one" or condition == "unknown":
            counts[entry]["multi"] = False
        else:
            counts[entry]["multi"] = True
    data.extend(counts.values())

In [351]:
df = pandas.DataFrame(data)

In [352]:
df.set_index("user")

Unnamed: 0_level_0,box,condition,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11,0,unknown,0,19,3,1,0,False,1,1,0,0,0,0,2
26,0,unknown,5,25,4,2,0,False,1,0,4,4,1,0,0
21,0,unknown,0,13,3,2,1,False,1,1,0,2,0,0,0
16,0,unknown,0,34,1,0,0,False,0,0,0,0,0,0,0
31,0,unknown,1,0,47,0,0,False,1,1,0,13,1,0,0
36,0,unknown,0,0,15,1,0,False,5,0,1,0,1,0,0
1,0,unknown,7,28,5,5,0,False,0,1,2,2,0,0,0
6,0,unknown,0,20,18,0,0,False,2,1,3,32,0,0,0
25,0,thousand,0,17,5,0,0,True,0,1,0,0,0,0,8
15,0,thousand,0,36,21,0,10,True,0,0,2,0,0,0,2


So that gets my data into a nice frame, now how do I tell jupyter to do ANOVA to it?

In [353]:
import statsmodels
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [354]:
model = ols('drag ~ multi', data=df).fit()

In [355]:
table = sm.stats.anova_lm(model, typ=1)

In [356]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
multi,1,1765.8375,1765.8375,2.938415,0.094643
Residual,38,22836.0625,600.949013,,


I have two problems here. The first is that I'm not sure that I'm expressing the dependence between the condition and the variable correctly, and the second is that I don't know how to interpret the output. I think that a low PR(>F) is a good thing, but I'm not sure how low is good enough to say that a given gesture is related to the condition. 

In [357]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   drag   R-squared:                       0.072
Model:                            OLS   Adj. R-squared:                  0.047
Method:                 Least Squares   F-statistic:                     2.938
Date:                Wed, 25 Apr 2018   Prob (F-statistic):             0.0946
Time:                        13:43:08   Log-Likelihood:                -183.70
No. Observations:                  40   AIC:                             371.4
Df Residuals:                      38   BIC:                             374.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept        13.3125      6.129      2.172

I think that what this means is that I can accept that drag is conditional on whether it's a multirobot condition with 1-0.052 = 0.948 likelyhood (before adding Dalton's coding, it changed after that)

In [358]:
df = pandas.DataFrame(data)

In [359]:
df.set_index('user')

Unnamed: 0_level_0,box,condition,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11,0,unknown,0,19,3,1,0,False,1,1,0,0,0,0,2
26,0,unknown,5,25,4,2,0,False,1,0,4,4,1,0,0
21,0,unknown,0,13,3,2,1,False,1,1,0,2,0,0,0
16,0,unknown,0,34,1,0,0,False,0,0,0,0,0,0,0
31,0,unknown,1,0,47,0,0,False,1,1,0,13,1,0,0
36,0,unknown,0,0,15,1,0,False,5,0,1,0,1,0,0
1,0,unknown,7,28,5,5,0,False,0,1,2,2,0,0,0
6,0,unknown,0,20,18,0,0,False,2,1,3,32,0,0,0
25,0,thousand,0,17,5,0,0,True,0,1,0,0,0,0,8
15,0,thousand,0,36,21,0,10,True,0,0,2,0,0,0,2


In [360]:
model = ols('lasso ~ multi', data=df).fit()

In [361]:
table = sm.stats.anova_lm(model, typ=1)

In [362]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
multi,1,355.266667,355.266667,4.848605,0.033811
Residual,38,2784.333333,73.27193,,


In [363]:
model = ols('lasso ~ condition', data=df).fit()

In [364]:
table = sm.stats.anova_lm(model, typ=1)

In [365]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
condition,4,692.1,173.025,2.474311,0.062199
Residual,35,2447.5,69.928571,,


This makes it seem like whether the condition is a multi-robot condition is a stronger predictor (0.052 vs 0.163) of the use of lasso than what the actual condition is. Adding Dalton's coding dropped it to 0.0338 vs 0.062, which is great, as now the condition is becoming a stronger predictor in its own right, which supports the alternative hypothesis. 

In [366]:
df.corr(method='pearson')

Unnamed: 0,box,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
box,1.0,-0.088733,-0.124806,-0.075871,0.284119,-0.040727,0.263234,-0.114935,-0.04026,-0.074044,0.164977,-0.091801,0.708895,-0.022588
doubletap,-0.088733,1.0,-0.028564,-0.043473,0.39743,-0.0041,-0.171123,-0.10425,0.034074,0.052786,0.322175,0.071614,0.010817,-0.106013
drag,-0.124806,-0.028564,1.0,-0.20538,-0.001792,0.144368,0.267911,-0.027478,0.282714,-0.175009,0.250602,0.208201,-0.126988,-0.103395
draw,-0.075871,-0.043473,-0.20538,1.0,-0.108795,-0.014002,0.146435,-0.080846,-0.071976,0.01356,-0.037214,-0.041396,-0.085914,-0.088628
hold,0.284119,0.39743,-0.001792,-0.108795,1.0,0.242365,0.01134,-0.192305,-0.167743,0.378449,0.373875,0.213561,0.262748,-0.195403
lasso,-0.040727,-0.0041,0.144368,-0.014002,0.242365,1.0,0.336388,-0.102361,0.323113,0.215536,-0.056701,0.453181,-0.0815,-0.099511
multi,0.263234,-0.171123,0.267911,0.146435,0.01134,0.336388,1.0,0.256381,0.118525,0.107884,0.069867,0.087186,0.128262,0.153624
other,-0.114935,-0.10425,-0.027478,-0.080846,-0.192305,-0.102361,0.256381,1.0,-0.035163,0.146566,-0.094135,-0.043286,-0.036533,-0.09726
pinch,-0.04026,0.034074,0.282714,-0.071976,-0.167743,0.323113,0.118525,-0.035163,1.0,-0.093771,0.085196,-0.071647,-0.069387,-0.040132
rev_pinch,-0.074044,0.052786,-0.175009,0.01356,0.378449,0.215536,0.107884,0.146566,-0.093771,1.0,-0.036185,-0.067816,-0.040249,-0.117406


Correlation shows that whether the condition is a multirobot condition correlates most strongly with box, drag, other, and lasso. I am not sure why drag and other got in there, although drag is also the most common gesture by far. I suspect that because there were a few multi-robot conditions that had very high counts of "other", but no single-robot conditions that have high counts of "other", the correlation isn't really representative of people's choices so much as it is a couple of outliers. 

Box and UI got a really strong correlation (0.73), which is probably because of box select and menu interactions being the main interaction method of RTS games. Adding whether a person plays RTS games to the data set would help confirm this, as box, ui, and RTS should all be highly correlated if that is the case. 

In [367]:
model = ols('ui ~ box', data=df).fit()

In [368]:
table = sm.stats.anova_lm(model, typ=1)

In [369]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
box,1,1596.593242,1596.593242,38.386766,3.068829e-07
Residual,38,1580.506758,41.592283,,


This makes intuitive sense to me, as the use of box and UI are highly correlated, so one can predict the other.

In [370]:
model = ols('box ~ ui', data=df).fit()
table = sm.stats.anova_lm(model, typ=1)

In [371]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
ui,1,644.13248,644.13248,38.386766,3.068829e-07
Residual,38,637.64252,16.780066,,


Should switching those have changed the PR(>F) value? I don't suppose it should, correlation goes both ways. 

In [372]:
import scipy.stats as stats

Scipy stats has f_oneway, so I can calculate the averages of each group, and then do that to see if the groups have the same population mean, although the means of means is probably not really what I want there. I could also get the totals for each group, but that also seems like a bad plan. 

In [373]:
all_counts = {}
for condition in adh.conditionMap.keys():
    counts = adh.applyCondition(get_gesture_counts, condition)
    avg = average_counts(counts)
    all_counts[condition] = avg

In [374]:
as_lists = {}
for condition in adh.conditionMap.keys():
    as_lists[condition] = [all_counts[condition][x] for x in sorted(all_counts[condition].keys())]

In [375]:
stats.f_oneway(as_lists['unknown'], as_lists['one'], as_lists['ten'], as_lists['hundred'], as_lists['thousand'])

F_onewayResult(statistic=0.86613547851159089, pvalue=0.48958646083756785)

That's not great, I want a very low P (e.g. less than 0.05). Check to see if unknown and one have different pop means (they shouldn't...)

In [376]:
stats.f_oneway(as_lists['unknown'], as_lists['one'])

F_onewayResult(statistic=0.12808686860292812, pvalue=0.72355241191366382)

In [377]:
for x in as_lists.keys():
    for y in as_lists.keys():
        if x != y:
            print x, y, stats.f_oneway(as_lists[x], as_lists[y])

unknown thousand F_onewayResult(statistic=0.50872619737346314, pvalue=0.48256922228487886)
unknown hundred F_onewayResult(statistic=0.67826119631997861, pvalue=0.41829522597032021)
unknown ten F_onewayResult(statistic=2.0474697745631181, pvalue=0.16535221696661093)
unknown one F_onewayResult(statistic=0.12808686860292812, pvalue=0.72355241191366382)
thousand unknown F_onewayResult(statistic=0.50872619737346303, pvalue=0.48256922228487886)
thousand hundred F_onewayResult(statistic=0.080861589182261842, pvalue=0.77857228848247562)
thousand ten F_onewayResult(statistic=0.18982026874118335, pvalue=0.66695979269511851)
thousand one F_onewayResult(statistic=0.80323891877904341, pvalue=0.37902870823159895)
hundred unknown F_onewayResult(statistic=0.67826119631997861, pvalue=0.41829522597032021)
hundred thousand F_onewayResult(statistic=0.080861589182261842, pvalue=0.77857228848247562)
hundred ten F_onewayResult(statistic=0.86690867430020579, pvalue=0.36108452067499341)
hundred one F_onewayRes

None of these appear to be very different. I could also normalize the data by dividing by the total gestures the user made, so rather than having a count, each user would have the proportion of their gestures that were a specific gesture. 

In [378]:
data

[{'box': 0,
  'condition': 'unknown',
  'doubletap': 0,
  'drag': 19,
  'draw': 3,
  'hold': 1,
  'lasso': 0,
  'multi': False,
  'other': 1,
  'pinch': 1,
  'rev_pinch': 0,
  'tap': 0,
  'tripletap': 0,
  'ui': 0,
  'user': u'11',
  'voice': 2},
 {'box': 0,
  'condition': 'unknown',
  'doubletap': 5,
  'drag': 25,
  'draw': 4,
  'hold': 2,
  'lasso': 0,
  'multi': False,
  'other': 1,
  'pinch': 0,
  'rev_pinch': 4,
  'tap': 4,
  'tripletap': 1,
  'ui': 0,
  'user': u'26',
  'voice': 0},
 {'box': 0,
  'condition': 'unknown',
  'doubletap': 0,
  'drag': 13,
  'draw': 3,
  'hold': 2,
  'lasso': 1,
  'multi': False,
  'other': 1,
  'pinch': 1,
  'rev_pinch': 0,
  'tap': 2,
  'tripletap': 0,
  'ui': 0,
  'user': u'21',
  'voice': 0},
 {'box': 0,
  'condition': 'unknown',
  'doubletap': 0,
  'drag': 34,
  'draw': 1,
  'hold': 0,
  'lasso': 0,
  'multi': False,
  'other': 0,
  'pinch': 0,
  'rev_pinch': 0,
  'tap': 0,
  'tripletap': 0,
  'ui': 0,
  'user': u'16',
  'voice': 0},
 {'box': 0,


In [379]:
normalized = []
for entry in data:
    total = 0
    for key in entry.keys():
        if key != "user" and key != "condition":
            total += entry[key]
    for key in entry.keys():
        if key != "user" and key != "condition":
            entry[key] = entry[key]/float(total)
    normalized.append(entry)

In [380]:
normalized

[{'box': 0.0,
  'condition': 'unknown',
  'doubletap': 0.0,
  'drag': 0.7037037037037037,
  'draw': 0.1111111111111111,
  'hold': 0.037037037037037035,
  'lasso': 0.0,
  'multi': 0.0,
  'other': 0.037037037037037035,
  'pinch': 0.037037037037037035,
  'rev_pinch': 0.0,
  'tap': 0.0,
  'tripletap': 0.0,
  'ui': 0.0,
  'user': u'11',
  'voice': 0.07407407407407407},
 {'box': 0.0,
  'condition': 'unknown',
  'doubletap': 0.10869565217391304,
  'drag': 0.5434782608695652,
  'draw': 0.08695652173913043,
  'hold': 0.043478260869565216,
  'lasso': 0.0,
  'multi': 0.0,
  'other': 0.021739130434782608,
  'pinch': 0.0,
  'rev_pinch': 0.08695652173913043,
  'tap': 0.08695652173913043,
  'tripletap': 0.021739130434782608,
  'ui': 0.0,
  'user': u'26',
  'voice': 0.0},
 {'box': 0.0,
  'condition': 'unknown',
  'doubletap': 0.0,
  'drag': 0.5652173913043478,
  'draw': 0.13043478260869565,
  'hold': 0.08695652173913043,
  'lasso': 0.043478260869565216,
  'multi': 0.0,
  'other': 0.043478260869565216,

In [381]:
normf = pandas.DataFrame(normalized)

In [382]:
normf.set_index('user')

Unnamed: 0_level_0,box,condition,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11,0.0,unknown,0.0,0.703704,0.111111,0.037037,0.0,0.0,0.037037,0.037037,0.0,0.0,0.0,0.0,0.074074
26,0.0,unknown,0.108696,0.543478,0.086957,0.043478,0.0,0.0,0.021739,0.0,0.086957,0.086957,0.021739,0.0,0.0
21,0.0,unknown,0.0,0.565217,0.130435,0.086957,0.043478,0.0,0.043478,0.043478,0.0,0.086957,0.0,0.0,0.0
16,0.0,unknown,0.0,0.971429,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31,0.0,unknown,0.015625,0.0,0.734375,0.0,0.0,0.0,0.015625,0.015625,0.0,0.203125,0.015625,0.0,0.0
36,0.0,unknown,0.0,0.0,0.652174,0.043478,0.0,0.0,0.217391,0.0,0.043478,0.0,0.043478,0.0,0.0
1,0.0,unknown,0.14,0.56,0.1,0.1,0.0,0.0,0.0,0.02,0.04,0.04,0.0,0.0,0.0
6,0.0,unknown,0.0,0.263158,0.236842,0.0,0.0,0.0,0.026316,0.013158,0.039474,0.421053,0.0,0.0,0.0
25,0.0,thousand,0.0,0.53125,0.15625,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.25
15,0.0,thousand,0.0,0.5,0.291667,0.0,0.138889,0.013889,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778


In [383]:
model = ols('lasso ~ condition', data=normf).fit()
table = sm.stats.anova_lm(model, typ=1)

In [384]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
condition,4,0.061516,0.015379,1.718972,0.167848
Residual,35,0.313134,0.008947,,


In [385]:
model = ols('lasso ~ multi', data=normf).fit()
table = sm.stats.anova_lm(model, typ=1)

In [386]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
multi,1,0.000234,0.000234,0.023772,0.878281
Residual,38,0.374416,0.009853,,


In [387]:
normf_means = normf.groupby('condition').mean()

In [388]:
normf_means

Unnamed: 0_level_0,box,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
hundred,0.0908,0.010133,0.271563,0.19101,0.018902,0.079544,0.018631,0.09802,0.0,0.007534,0.093197,0.0,0.025011,0.095656
one,0.0,0.054702,0.439141,0.144172,0.03676,0.024802,0.0,0.001179,0.0,0.003205,0.206334,0.0,0.064839,0.024866
ten,0.028791,0.028468,0.334922,0.167458,0.026861,0.109382,0.010227,0.010191,0.020045,0.022819,0.164192,0.01169,0.064162,0.000791
thousand,0.0,0.010417,0.410368,0.297089,0.003289,0.023944,0.022239,0.114013,0.007196,0.023894,0.035092,0.0,0.014831,0.037629
unknown,0.0,0.03304,0.450873,0.260058,0.038869,0.005435,0.0,0.045198,0.016162,0.026239,0.104761,0.010105,0.0,0.009259


In [389]:
normf_means.loc['hundred'].values

array([ 0.09079997,  0.01013251,  0.27156288,  0.19100998,  0.0189024 ,
        0.07954402,  0.01863063,  0.09801951,  0.        ,  0.0075336 ,
        0.09319736,  0.        ,  0.02501066,  0.0956565 ])

In [390]:
stats.f_oneway(normf_means.loc['unknown'].values, normf_means.loc['one'].values, normf_means.loc['ten'].values, normf_means.loc['hundred'].values, normf_means.loc['thousand'].values)

F_onewayResult(statistic=4.3440895527101618e-32, pvalue=1.0)

In [391]:
stats.f_oneway(normf_means.loc['unknown'].values, normf_means.loc['one'].values)

F_onewayResult(statistic=8.8146192798064849e-33, pvalue=1.0)

In [392]:
stats.f_oneway(normf_means.loc['unknown'].values, normf_means.loc['hundred'].values)

F_onewayResult(statistic=3.7284619180820212e-32, pvalue=1.0)

This doesn't appear to be useful either, as this is saying that the population means are identical (or NaN for p values, which seems even less useful). This is probably because the normalization means that the mean for a user should be 1.0, so the mean for a population should be very close to 1.0 as well, and so now everything has a population mean of 1.0, and so of course they're not different. 

In [393]:
nonnorm_means = df.groupby('condition').mean()

In [394]:
nonnorm_means

Unnamed: 0_level_0,box,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
hundred,5.875,0.625,20.125,12.0,1.25,6.875,True,5.75,0.0,0.375,8.625,0.0,2.375,2.875
one,0.0,4.0,9.25,5.25,1.375,0.375,False,0.125,0.0,0.125,10.875,0.0,2.5,0.625
ten,3.25,2.75,43.5,11.375,2.875,10.625,True,1.25,2.25,2.0,24.0,1.125,7.5,0.125
thousand,0.0,0.375,17.0,52.0,0.125,1.5,True,5.75,0.25,1.375,2.125,0.0,0.875,1.375
unknown,0.0,1.625,17.375,12.0,1.375,0.125,False,1.375,0.625,1.25,6.625,0.375,0.0,0.25


In [395]:
stats.f_oneway(nonnorm_means.loc['unknown'].values, nonnorm_means.loc['one'].values, nonnorm_means.loc['ten'].values, nonnorm_means.loc['hundred'].values, nonnorm_means.loc['thousand'].values)

F_onewayResult(statistic=0.87828333447642881, pvalue=0.48192060096578482)

In [396]:
for x in nonnorm_means.index:
    for y in nonnorm_means.index:
        if x != y:
            print x, y, stats.f_oneway(nonnorm_means.loc[x], nonnorm_means.loc[y])
    print

hundred one F_onewayResult(statistic=1.7153801396431338, pvalue=0.20174422101450154)
hundred ten F_onewayResult(statistic=0.84550329445339134, pvalue=0.36628029721936173)
hundred thousand F_onewayResult(statistic=0.080174055755057261, pvalue=0.77930395211229819)
hundred unknown F_onewayResult(statistic=0.71666094821331183, pvalue=0.40496617279529901)

one hundred F_onewayResult(statistic=1.7153801396431345, pvalue=0.20174422101450154)
one ten F_onewayResult(statistic=2.8290345303649866, pvalue=0.10454816847910733)
one thousand F_onewayResult(statistic=0.83151954458327393, pvalue=0.37021138434980894)
one unknown F_onewayResult(statistic=0.12484527261096767, pvalue=0.72668655380757241)

ten hundred F_onewayResult(statistic=0.84550329445339145, pvalue=0.36628029721936173)
ten one F_onewayResult(statistic=2.8290345303649866, pvalue=0.10454816847910733)
ten thousand F_onewayResult(statistic=0.18744057077299967, pvalue=0.66862579020976642)
ten unknown F_onewayResult(statistic=2.0583179122029

hundred one F_onewayResult(statistic=3.0445401036633575, pvalue=0.093803195615485752)  
hundred ten F_onewayResult(statistic=0.62995257328961762, pvalue=0.43515030889820316)  
hundred thousand F_onewayResult(statistic=0.74646064762372855, pvalue=0.39615212673610622)  
hundred unknown F_onewayResult(statistic=0.81132768286727652, pvalue=0.37667821218931918)  

one hundred F_onewayResult(statistic=3.0445401036633579, pvalue=0.093803195615485752)  
one ten F_onewayResult(statistic=2.8456501473790681, pvalue=0.10457865347925255)
one thousand F_onewayResult(statistic=0.6589196563127182, pvalue=0.42491972670585898)  
one unknown F_onewayResult(statistic=0.55200384002671354, pvalue=0.46471106985119826)  

ten hundred F_onewayResult(statistic=0.62995257328961785, pvalue=0.43515030889820316)  
ten one F_onewayResult(statistic=2.8456501473790681, pvalue=0.10457865347925255)  
ten thousand F_onewayResult(statistic=1.6498127555285791, pvalue=0.21124481628590228)  
ten unknown F_onewayResult(statistic=1.7036814881039264, pvalue=0.20418288297121795)  

thousand hundred F_onewayResult(statistic=0.74646064762372843, pvalue=0.39615212673610622)  
thousand one F_onewayResult(statistic=0.6589196563127182, pvalue=0.42491972670585898)  
thousand ten F_onewayResult(statistic=1.6498127555285791, pvalue=0.21124481628590228)  
thousand unknown F_onewayResult(statistic=0.002188696239585675, pvalue=0.96307289865537826)  

unknown hundred F_onewayResult(statistic=0.81132768286727719, pvalue=0.37667821218931918)  
unknown one F_onewayResult(statistic=0.55200384002671332, pvalue=0.46471106985119826)  
unknown ten F_onewayResult(statistic=1.7036814881039262, pvalue=0.20418288297121795)  
unknown thousand F_onewayResult(statistic=0.0021886962395856746, pvalue=0.96307289865537826)  

This is the values from the averages that I calculated with python, before adding Dalton's codes for 31-40, it seems pretty close to what pandas came up with (although not identical). Also, adding Dalton's codes affected the P-values, although some went up and some went down, which is not what I'd really like to see (all of them going down).  

In [397]:
groups = df.groupby('condition').groups

In [398]:
tens = df.loc[groups['ten']].drop(['condition', 'multi'],axis=1)

In [399]:
unknowns = df.loc[groups['unknown']].drop(['condition', 'multi'],axis=1)

In [400]:
thousands = df.loc[groups['thousand']].drop(['condition', 'multi'],axis=1)

In [401]:
ones = df.loc[groups['one']].drop(['condition', 'multi'],axis=1)

In [402]:
hundreds = df.loc[groups['hundred']].drop(['condition', 'multi'],axis=1)

In [403]:
stats.f_oneway(unknowns, ones, tens, hundreds, thousands)

F_onewayResult(statistic=array([ 1.88713693,  0.88146811,  2.45232225,  0.90849319,  2.70454545,
        2.47431052,  1.3027372 ,  1.58964143,  0.68120805,  1.39311282,
        1.82142857,  0.81381698,  0.13333333,  0.93439948]), pvalue=array([ 0.13458201,  0.48502228,  0.06401522,  0.46970958,  0.04605856,
        0.0621986 ,  0.28802133,  0.19878305,  0.60966472,  0.2564933 ,
        0.14672665,  0.52497917,  0.96905741,  0.45538302]))

Again, this makes it look like there isn't a statistically significant variation in the data on each class. 

In [404]:
stats.f_oneway(unknowns['lasso'].values, ones['lasso'].values, tens['lasso'].values, hundreds['lasso'].values, thousands['lasso'].values)

F_onewayResult(statistic=2.4743105209397354, pvalue=0.062198601284028536)

In [405]:
groups = df.groupby('multi').groups

In [406]:
multis = df.loc[groups[True]].drop(['condition','multi'], axis = 1)
singles = df.loc[groups[False]].drop(['condition','multi'], axis = 1)

In [407]:
stats.f_oneway(multis, singles)

F_onewayResult(statistic=array([ 2.82912927,  1.14632773,  2.93841484,  0.83270301,  0.00488746,
        4.84860529,  2.67352677,  0.54143395,  0.44749213,  0.18640216,
        0.29106383,  0.63559657,  0.43263757,  0.91848794]), pvalue=array([ 0.10076998,  0.29107272,  0.09464255,  0.36724538,  0.94463162,
        0.03381051,  0.11028785,  0.466357  ,  0.50757223,  0.66836795,
        0.59268706,  0.43026341,  0.51466255,  0.34393057]))

In [408]:
stats.f_oneway(multis['lasso'].values, singles['lasso'].values)

F_onewayResult(statistic=4.8486052915120315, pvalue=0.033810508298004853)

Now we're getting somewhere, one of the statistics dropped into statistical relevance (0.03 < 0.05) so for that case at least, the null hypothesis has a 95% chance of being wrong. 

In [410]:
df.groupby('condition').describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,box,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
hundred,count,8.0,8.0,8.0,8.0,8.0,8.0,8,8.0,8.0,8.0,8.0,8.0,8.0,8.0
hundred,mean,5.875,0.625,20.125,12.0,1.25,6.875,1,5.75,0.0,0.375,8.625,0.0,2.375,2.875
hundred,std,8.935283,1.06066,22.630179,10.810048,1.581139,10.722973,0,10.166472,0.0,0.517549,18.415347,0.0,4.897157,6.664136
hundred,min,0.0,0.0,0.0,0.0,0.0,0.0,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0
hundred,25%,0.0,0.0,3.75,2.0,0.0,0.0,1,0.0,0.0,0.0,1.5,0.0,0.0,0.0
hundred,50%,0.0,0.0,12.0,10.5,0.5,1.0,1,1.0,0.0,0.0,2.0,0.0,0.0,0.0
hundred,75%,10.5,1.0,30.5,20.25,2.25,9.5,1,5.0,0.0,1.0,4.25,0.0,1.75,1.0
hundred,max,23.0,3.0,58.0,30.0,4.0,29.0,True,28.0,0.0,1.0,54.0,0.0,14.0,19.0
one,count,8.0,8.0,8.0,8.0,8.0,8.0,8,8.0,8.0,8.0,8.0,8.0,8.0,8.0
one,mean,0.0,4.0,9.25,5.25,1.375,0.375,0,0.125,0.0,0.125,10.875,0.0,2.5,0.625


The standard deviations are all over the place, ANOVA expects groups to have similar standard deviations. There is probably some normalization method to account for this. 

In [411]:
for col in df.columns:
    if col != 'condition' and col != 'multi':
        print col, stats.shapiro(df[col])

box (0.35982054471969604, 5.251906548542351e-12)
doubletap (0.4398201107978821, 3.550655255923907e-11)
drag (0.7786376476287842, 2.5019537588377716e-06)
draw (0.2987669110298157, 1.3634892910910357e-12)
hold (0.7797384262084961, 2.6301042908016825e-06)
lasso (0.5040141940116882, 1.8958599967699996e-10)
other (0.4696446657180786, 7.599407908509548e-11)
pinch (0.30901169776916504, 1.6995674615924439e-12)
rev_pinch (0.4500930905342102, 4.5998031772409576e-11)
tap (0.5541698336601257, 7.800968204740855e-10)
tripletap (0.31869006156921387, 2.0973014991376715e-12)
ui (0.31989210844039917, 2.1531118901685353e-12)
user (0.05174773931503296, 1.2088892339696001e-14)
voice (0.36331653594970703, 5.68887428484266e-12)


The _really tiny_ p values on all of these seems to indicate that my data is sampled from a normal distribution, which is good for ANOVA. I probably only have to worry about fixing the standard deviations, not normalizing the distribution. 

In [412]:
groups = df.groupby('condition').groups
for group in groups:
    g = df.loc[groups[group]]
    print group
    for col in g.columns:
        if col != 'condition' and col != 'multi':
            print col, stats.shapiro(g[col])

unknown
box (1.0, 1.0)
doubletap (0.6664437055587769, 0.0009347599698230624)
drag (0.9239009022712708, 0.4623080790042877)
draw (0.7242077589035034, 0.004218011628836393)
hold (0.8083069324493408, 0.03510454297065735)
lasso (0.418398380279541, 1.0472282383489073e-06)
other (0.7485483288764954, 0.007871860638260841)
pinch (0.6412020921707153, 0.0004790576349478215)
rev_pinch (0.8146909475326538, 0.04102260619401932)
tap (0.6749218106269836, 0.001168547780252993)
tripletap (0.6412020921707153, 0.0004790576349478215)
ui (1.0, 1.0)
user (0.18247562646865845, 1.0155577490067458e-09)
voice (0.418398380279541, 1.0472282383489073e-06)
thousand
box (1.0, 1.0)
doubletap (0.418398380279541, 1.0472282383489073e-06)
drag (0.9453262090682983, 0.6641181707382202)
draw (0.4827684760093689, 6.400577603926649e-06)
hold (0.418398380279541, 1.0472282383489073e-06)
lasso (0.5095005035400391, 1.344303473160835e-05)
other (0.5912467241287231, 0.0001255650568054989)
pinch (0.5659406185150146, 6.32297233096323

The normality test mostly stays small when checking within groups, but not for all gestures within a group. Of course, if a gesture is never used, all the counts will be 0, and I suspect that the normality test is not well-behaved in that degnerate case. 

In [413]:
groups = df.groupby('condition').groups
znorm = []
for group in groups:
    g = df.loc[groups[group]]
    means = g.mean(numeric_only=True)
    std_devs = g.std(numeric_only=True)
    #print means['box'], std_devs['box']
    #For each row in the group
    gdata = []
    for row in g.iterrows():
        #For each value in the row, calculate its zscore
        zscores = {}
        for index in df.axes[1].tolist():
            if index != 'condition' and index != "multi" and index != "user":
                if std_devs[index] != 0:
                    zscores[index] = (row[1][index] - means[index])/std_devs[index]   
                else: 
                    zscores[index] = 0.0
        #Put the user ID back in
        zscores['user'] = row[1]['user']
        zscores['condition'] = row[1]['condition']
        znorm.append(zscores)
        
    

This is an attempt to z-score the data within each gesture and within each group, so taking the mean and standard deviation of (for example) "drag" in the 10-robot case, and then using those values to z-score each user in the 10-robot case. 

It would also be possible to take the mean and std. dev. of all of a user's gestures, and then z-score all of their gestures, so that "gestures used" would have a mean of 0 and std. dev. of 1. I'd expect that to have a similar result to the attempt to normalize gestures by dividing by total gesture count per user, where the counts would get turned into a proportion.

In [414]:
znorm_df = pandas.DataFrame(znorm)

In [415]:
znorm_df.set_index('user')

Unnamed: 0_level_0,box,condition,doubletap,drag,draw,hold,lasso,other,pinch,rev_pinch,tap,tripletap,ui,voice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
11,0.0,unknown,-0.585745,0.130833,-0.583383,-0.22255,-0.353553,-0.23467,0.724569,-0.790569,-0.595672,-0.724569,0.0,2.474874
26,0.0,unknown,1.216548,0.613906,-0.518563,0.370916,-0.353553,-0.23467,-1.207615,1.739253,-0.236021,1.207615,0.0,-0.353553
21,0.0,unknown,-0.585745,-0.352241,-0.583383,0.370916,2.474874,-0.23467,0.724569,-0.790569,-0.415847,-0.724569,0.0,-0.353553
16,0.0,unknown,-0.585745,1.338517,-0.713024,-0.816015,-0.353553,-0.860456,-1.207615,-0.790569,-0.595672,-0.724569,0.0,-0.353553
31,0.0,unknown,-0.225287,-1.398901,2.268713,-0.816015,-0.353553,-0.23467,0.724569,-0.790569,0.573194,1.207615,0.0,-0.353553
36,0.0,unknown,-0.585745,-1.398901,0.194461,-0.22255,-0.353553,2.268475,-1.207615,-0.158114,-0.595672,1.207615,0.0,-0.353553
1,0.0,unknown,1.937465,0.855443,-0.453743,2.151312,-0.353553,-0.860456,0.724569,0.474342,-0.415847,-0.724569,0.0,-0.353553
6,0.0,unknown,-0.585745,0.211345,0.388922,-0.816015,-0.353553,0.391116,0.724569,1.106797,2.281537,-0.724569,0.0,-0.353553
25,0.0,thousand,-0.353553,0.0,-0.382644,-0.353553,-0.433013,-0.53736,1.620185,-0.495631,-0.95203,0.0,-0.353553,2.388038
15,0.0,thousand,-0.353553,1.48236,-0.252382,-0.353553,2.453739,-0.53736,-0.540062,0.225287,-0.95203,0.0,-0.353553,0.225287


In [416]:
model = ols('lasso ~ condition', data=znorm_df).fit()
table = sm.stats.anova_lm(model, typ=1)

In [417]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
condition,4,8.966552e-30,2.241638e-30,2.241638e-30,1.0
Residual,35,35.0,1.0,,


In [418]:
groups = znorm_df.groupby('condition').groups

In [419]:
unknowns = znorm_df.loc[groups['unknown']].drop(['condition'],axis=1)

In [420]:
tens = znorm_df.loc[groups['ten']].drop(['condition'],axis=1)

In [421]:
stats.f_oneway(unknowns, tens)

F_onewayResult(statistic=array([  0.00000000e+00,   7.10542736e-15,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
        -7.10542736e-15,   0.00000000e+00,  -1.06581410e-14,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.06666667e-01,   0.00000000e+00]), pvalue=array([ 1.        ,  0.99999993,  1.        ,  1.        ,  1.        ,
        1.        ,         nan,  1.        ,         nan,  1.        ,
        1.        ,  1.        ,  0.74880287,  1.        ]))

Clearly, Z-scores were not the way to go here. What I would want to see is lower p-values, these are nearly one. These are normalized with the means and standard deviations of COLUMNS, because I want to be able to compare the same gestures across users, and normalizing across all the gestures that a user did seems like the wrong thing to do. 

In [422]:
stats.f_oneway(unknowns['lasso'].values, tens['lasso'].values)

F_onewayResult(statistic=1.8488927466117464e-32, pvalue=1.0)

In [423]:
df.set_index('user')
row_stddevs = df.std(axis=1, numeric_only=True)
row_means = df.mean(axis=1, numeric_only=True)

In [424]:
rownormed = []
for row in df.iterrows():
    rowdata = {}
    for index in df.axes[1].tolist():
        if index == 'user' or index == 'condition' or index == 'multi':
            rowdata[index] = row[1][index]
        else:
            if row_stddevs[row[0]] == 0:
                rowdata[index] = 0
            else:
                rowdata[index] = (row[1][index]-row_means[row[0]])/row_stddevs[row[0]]
    rownormed.append(rowdata)

In [425]:
rownormed_df = pandas.DataFrame(rownormed)
rownormed_df.set_index('user')

Unnamed: 0_level_0,box,condition,doubletap,drag,draw,hold,lasso,multi,other,pinch,rev_pinch,tap,tripletap,ui,voice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11,-0.385757,unknown,-0.385757,3.414661,0.214309,-0.185735,-0.385757,False,-0.185735,-0.185735,-0.385757,-0.385757,-0.385757,-0.385757,0.014287
26,-0.503843,unknown,0.262874,3.329743,0.109531,-0.197156,-0.503843,False,-0.350499,-0.503843,0.109531,0.109531,-0.350499,-0.503843,-0.503843
21,-0.481698,unknown,-0.481698,3.330001,0.397925,0.104717,-0.188491,False,-0.188491,-0.188491,-0.481698,0.104717,-0.481698,-0.481698,-0.481698
16,-0.275627,unknown,-0.275627,3.472896,-0.165376,-0.275627,-0.275627,False,-0.275627,-0.275627,-0.275627,-0.275627,-0.275627,-0.275627,-0.275627
31,-0.360587,unknown,-0.281708,-0.360587,3.346696,-0.360587,-0.360587,False,-0.281708,-0.281708,-0.360587,0.664832,-0.281708,-0.360587,-0.360587
36,-0.403772,unknown,-0.403772,-0.403772,3.28284,-0.157998,-0.403772,False,0.825099,-0.403772,-0.157998,-0.403772,-0.157998,-0.403772,-0.403772
1,-0.482101,unknown,0.462817,3.297574,0.192841,0.192841,-0.482101,False,-0.482101,-0.347113,-0.212125,-0.212125,-0.482101,-0.482101,-0.482101
6,-0.532822,unknown,-0.532822,1.430207,1.233904,-0.532822,-0.532822,False,-0.336519,-0.434671,-0.238368,2.608024,-0.532822,-0.532822,-0.532822
25,-0.470463,thousand,-0.470463,3.028606,0.558675,-0.470463,-0.470463,True,-0.470463,-0.264635,-0.470463,-0.470463,-0.470463,-0.470463,1.176158
15,-0.482697,thousand,-0.482697,2.896185,1.488317,-0.482697,0.455881,True,-0.482697,-0.482697,-0.294982,-0.482697,-0.482697,-0.482697,-0.294982


In [426]:
model = ols('lasso ~ condition', data=znorm_df).fit()
table = sm.stats.anova_lm(model, typ=1)

In [427]:
table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
condition,4,8.966552e-30,2.241638e-30,2.241638e-30,1.0
Residual,35,35.0,1.0,,


I imported 10 of Dalton's codings, the other ten are not yet available. I'm not great at stats, but what I've learned so far is that if you just attempt to norm things in a kind of ham-fisted groping-in-the-dark way, you're going to have a bad time. 