# Analysis of U20 lab-based replication

Load libraries and data in.

In [2]:
%matplotlib inline

import pandas as pd
import seaborn as sns
from numpy import * 

# %run clean-data.py # concatenates and cleans data into DataFrame called df
df = pd.read_csv('../data/U20Indonesian_TestData.csv')

Check that data were loaded correctly.

In [3]:
df.sample(10)

Unnamed: 0,ID,sex,condition,trials,phase,trial,type,correctChoice,choice1,choice2,...,mod,oldNoun,oldMod1,oldMod2,choiceType1,choiceType2,choiceType3,choiceType4,choice,binaryCorrect
3999,id_Thu Jul 13 2017 07:24:48 GMT+0700 (WIB),female,condition_AdjDem,testTrial48_inner-outer_1_this round pig_pig t...,test,testTrial48,inner-outer,1,this round pig,pig this round,...,pig,True,False,True,Outer-Inner-N,N-Outer-Inner,Inner-Outer-N,N-Inner-Outer,1,1
2039,id_Sat Jul 08 2017 15:32:15 GMT+0700 (SE Asia ...,female,condition_AdjDem,testTrial8_inner-outer_4_round that table_tabl...,test,testTrial8,inner-outer,4,round that table,table round that,...,table,True,True,True,Inner-Outer-N,N-Inner-Outer,N-Outer-Inner,Outer-Inner-N,4,1
5496,id_Tue Jun 27 2017 14:30:41 GMT+0700 (WIB) 8407,female,condition_AdjNum,testTrial25_inner-outer_1_six purple pillow_pi...,test,testTrial25,inner-outer,1,six purple pillow,pillow purple six,...,pillow,True,True,True,Outer-Inner-N,N-Inner-Outer,N-Outer-Inner,Inner-Outer-N,1,1
2439,id_Sat Jun 24 2017 19:28:28 GMT+0700 (SE Asia ...,male,condition_AdjDem,testTrial8_inner-outer_4_shoe this spotted_spo...,test,testTrial8,inner-outer,4,shoe this spotted,spotted this shoe,...,shoe,True,True,True,N-Outer-Inner,Inner-Outer-N,N-Inner-Outer,Outer-Inner-N,4,1
293,id_Fri Jul 14 2017 21:14:41 GMT+0700 (WIB),,condition_NumDem,testTrial22_outer_2_hat that_that hat_----_---...,test,testTrial22,outer,2,hat that,that hat,...,hat,True,True,,,,,,2,1
874,id_Fri Jun 30 2017 20:09:25 GMT+0700 (SE Asia ...,female,condition_NumDem,testTrial43_outer_2_pear that_that pear_----_-...,test,testTrial43,outer,2,pear that,that pear,...,pear,False,True,,,,,,2,1
4713,id_Thu Jun 29 2017 23:44:21 GMT+0700 (WIB) 7622,male,condition_AdjDem,testTrial42_inner-outer_4_pillow dirty this_di...,test,testTrial42,inner-outer,4,pillow dirty this,dirty this pillow,...,pillow,False,True,True,N-Inner-Outer,Inner-Outer-N,N-Outer-Inner,Outer-Inner-N,2,0
5107,id_Tue Jul 11 2017 21:16:23 GMT+0700 (SE Asia ...,female,condition_NumDem,testTrial36_outer_1_this horse_horse this_----...,test,testTrial36,outer,1,this horse,horse this,...,horse,True,True,,,,,,1,1
2204,id_Sat Jul 15 2017 22:36:02 GMT+0700 (SE Asia ...,female,condition_AdjNum,testTrial13_inner-outer_3_tie soft four_soft f...,test,testTrial13,inner-outer,3,tie soft four,soft four tie,...,tie,True,True,True,N-Inner-Outer,Inner-Outer-N,Outer-Inner-N,N-Outer-Inner,3,1
992,id_Mon Jul 03 2017 11:43:20 GMT+0700 (SE Asia ...,male,condition_AdjDem,testTrial1_inner-outer_1_this purple cherry_pu...,test,testTrial1,inner-outer,1,this purple cherry,purple this cherry,...,cherry,True,True,True,Outer-Inner-N,Inner-Outer-N,N-Inner-Outer,N-Outer-Inner,4,0


Select data from the test phase only.

Check how many participants we have in each condition:

In [5]:
df.groupby('condition').ID.nunique()

condition
condition_AdjDem    28
condition_AdjNum    31
condition_NumDem    32
Name: ID, dtype: int64

Check by participant accuracy on single-modifier trials.  The critical column is `post` denoting whether or not (1 or 0) a response was postnominal.

In [None]:
df[df.nbMods == 1].groupby(['suj', 'cond']).post.mean()

Plot histogram of single-modifier trial accuracy.

In [None]:
df[df.nbMods == 1].groupby(['suj', 'cond']).post.hist()

In [None]:
aggregators = {'iso':mean}

gp = df[df.nbMods == 2].groupby(['suj', 'cond'], as_index=False).agg(aggregators)

In [None]:
ax = sns.swarmplot(x='cond', y='iso', data=gp)
ax.set(ylim=(-0.05,1.05))

Prepare to do R modelling.

In [None]:
%load_ext rpy2.ipython

Load in data and create subsets on which each model will be run.

In [None]:
%%R
library('lme4')

d <- read.csv('../data/test/ENGtest.csv')
test <- subset(d, d$nbMods==2) # only select two modifier trials

testAdjDem <- subset(test, test$cond=="dem-adj")
testAdjNum <- subset(test, test$cond=="num-adj")
testNumDem <- subset(test, test$cond=="dem-num")

Run models for the adj-dem condition, and test whether intercept is significantly different from chance level.

In [None]:
%%R

m1AdjDem <- glmer(iso ~ 1 + (1|suj) + (1|nounSing), family=binomial, data=testAdjDem)
m0AdjDem <- glmer(iso ~ 0 + (1|suj) + (1|nounSing), family=binomial, data=testAdjDem)

anova(m1AdjDem, m0AdjDem)

Run models for the adj-num condition, and test whether intercept is significantly different from chance level.

In [None]:
%%R

m1AdjNum <- glmer(iso ~ 1 + (1|suj) + (1|nounSing), family=binomial, data=testAdjNum)
m0AdjNum <- glmer(iso ~ 0 + (1|suj) + (1|nounSing), family=binomial, data=testAdjNum)

anova(m1AdjNum, m0AdjNum)

Run models for the num-dem condition, and test whether intercept is significantly different from chance level.

In [None]:
%%R

m1NumDem <- glmer(iso ~ 1 + (1|suj) + (1|nounSing), family=binomial, data=testNumDem)
m0NumDem <- glmer(iso ~ 0 + (1|suj) + (1|nounSing), family=binomial, data=testNumDem)

anova(m1NumDem, m0NumDem)