# Loading Responses

In [2]:
options(warn=1)
if(!require('lme4')) {
    install.packages('lme4', repos='https://cloud.r-project.org')
    library('lme4')
}
if(!require('readr')) {
    install.packages('readr', repos='https://cloud.r-project.org')
    library('readr')
}
if(!require('marginaleffects')) {
    install.packages('marginaleffects', repos='https://cloud.r-project.org')
    library('marginaleffects')
}
if(!require('emmeans')) {
    install.packages('emmeans', repos='https://cloud.r-project.org')
    library('emmeans')
}
if(!require('car')) {
    install.packages('car', repos='https://cloud.r-project.org')
    library('car')
}
df <- read_csv("gaze-analysis/result_1743457603_20250506_20250506F.csv", na = "empty", col_select = c("Accuracy", "Group", "GroupKind", "Angle", "Proximity", "n_candidates", "Actor", "Candidates", "Stimulus_ID", "Prompt_ID", "Participant_ID", "list_id", "Run_ID", "Part"), col_types = cols(
    Accuracy = col_logical(),
    Group = col_factor(),
    GroupKind = col_factor(),
    Angle = col_factor(),
    Proximity = col_integer(),
    n_candidates = col_integer(),
    Actor = col_factor(c('X', 'Y')),
    Candidates = col_factor(),
    Stimulus_ID = col_factor(),
    Prompt_ID = col_factor(),
    Participant_ID = col_factor(),
    list_id = col_factor(),
    Run_ID = col_character(),
    Part = col_character(),
),show_col_types = TRUE)
df <- df[df$Group == "gpt-4o" & df$list_id != -1 & df$Part != "p0", ]
df$offset <- log(1/df$n_candidates / (1 - 1/df$n_candidates))
df$Proximity <- df$Proximity - mean(df$Proximity)
df$n_candidates <- df$n_candidates - mean(df$n_candidates)

Loading required package: car

Loading required package: carData

[1mRows: [22m[34m156780[39m [1mColumns: [22m[34m14[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): Run_ID, Part
[32mint[39m (2): Proximity, n_candidates
[33mlgl[39m (1): Accuracy
[31mfct[39m (9): Stimulus_ID, Prompt_ID, Participant_ID, Group, GroupKind, Angle, Ac...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


# Fitting models

In [3]:
model0 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Actor:Candidates) + (1|Stimulus_ID) + (1|Prompt_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

boundary (singular) fit: see help('isSingular')



In [4]:
model1 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Stimulus_ID) + (1|Prompt_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

In [12]:
model1

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
Accuracy ~ Angle + Proximity + n_candidates + Actor + (1 | Stimulus_ID) +  
    (1 | Prompt_ID)
   Data: df
 Offset: offset
      AIC       BIC    logLik  deviance  df.resid 
 7939.456  7996.296 -3961.728  7923.456      8992 
Random effects:
 Groups      Name        Std.Dev.
 Stimulus_ID (Intercept) 3.06101 
 Prompt_ID   (Intercept) 0.08895 
Number of obs: 9000, groups:  Stimulus_ID, 900; Prompt_ID, 12
Fixed Effects:
 (Intercept)     Angleleft    Angleright     Proximity  n_candidates  
      0.8451       -0.1712       -0.1276       -0.4647       -0.5869  
      ActorY  
     -0.9398  

In [5]:
model2 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Actor:Candidates) + (1|Stimulus_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

boundary (singular) fit: see help('isSingular')



In [8]:
model2

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
Accuracy ~ Angle + Proximity + n_candidates + Actor + (1 | Actor:Candidates) +  
    (1 | Stimulus_ID)
   Data: df
 Offset: offset
      AIC       BIC    logLik  deviance  df.resid 
 7940.747  7997.587 -3962.374  7924.747      8992 
Random effects:
 Groups           Name        Std.Dev. 
 Stimulus_ID      (Intercept) 3.058e+00
 Actor:Candidates (Intercept) 8.831e-05
Number of obs: 9000, groups:  Stimulus_ID, 900; Actor:Candidates, 18
Fixed Effects:
 (Intercept)     Angleleft    Angleright     Proximity  n_candidates  
      0.8427       -0.1698       -0.1262       -0.4639       -0.5871  
      ActorY  
     -0.9382  

In [6]:
model3 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Stimulus_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

In [7]:
model3_5 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Actor:Candidates), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

# Model selection

In [9]:
anova(model1, model3)

Unnamed: 0_level_0,npar,AIC,BIC,logLik,deviance,Chisq,Df,Pr(>Chisq)
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
model3,7,7938.748,7988.483,-3962.374,7924.748,,,
model1,8,7939.456,7996.296,-3961.728,7923.456,1.291672,1.0,0.25574


In [10]:
anova(model3, model3_5)

Unnamed: 0_level_0,npar,AIC,BIC,logLik,deviance,Chisq,Df,Pr(>Chisq)
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
model3,7,7938.748,7988.483,-3962.374,7924.748,,,
model3_5,7,11911.919,11961.654,-5948.959,11897.919,0.0,0.0,


In [13]:
model <- model3

# Interpretation

In [14]:
summary(model)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
Accuracy ~ Angle + Proximity + n_candidates + Actor + (1 | Stimulus_ID)
   Data: df
 Offset: offset

     AIC      BIC   logLik deviance df.resid 
  7938.7   7988.5  -3962.4   7924.7     8993 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.9170 -0.3529  0.1520  0.3589  3.0744 

Random effects:
 Groups      Name        Variance Std.Dev.
 Stimulus_ID (Intercept) 9.35     3.058   
Number of obs: 9000, groups:  Stimulus_ID, 900

Fixed effects:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.8428     0.2180   3.865 0.000111 ***
Angleleft     -0.1698     0.2797  -0.607 0.543743    
Angleright    -0.1262     0.2784  -0.453 0.650207    
Proximity     -0.4639     0.1401  -3.311 0.000929 ***
n_candidates  -0.5871     0.1527  -3.844 0.000121 ***
ActorY        -0.9382     0.2329  -4.027 5.64e-05 ***
---
Signif. codes:  0 ‘***’ 

In [15]:
log_odds_means <- emmeans(model, spec = "Angle")
print(log_odds_means)
pairs(log_odds_means)
log_odds_means <- emmeans(model, spec = "Angle", regrid = "response")
print(log_odds_means)
pairs(log_odds_means)
               
vc_list <- lapply(VarCorr(model), function(x) attr(x, "stddev")^2)
vc <- sum(unlist(vc_list))
effect_size_denom_t <- sqrt(vc + pi^2/3)

res <- summary(model)$coefficients
res <- as.data.frame(res)
res <- cbind(term = rownames(res), res)
rownames(res) <- NULL
res$effect_size_denom <- effect_size_denom_t
res$effect_size <- res$Estimate / effect_size_denom_t
res

 Angle  emmean    SE  df asymp.LCL asymp.UCL
 front  0.0234 0.195 Inf    -0.358     0.405
 left  -0.1465 0.202 Inf    -0.543     0.250
 right -0.1029 0.200 Inf    -0.495     0.289

Results are averaged over the levels of: Actor 
Results are given on the logit (not the response) scale. 
Confidence level used: 0.95 


 contrast      estimate    SE  df z.ratio p.value
 front - left    0.1698 0.280 Inf   0.607  0.8162
 front - right   0.1262 0.278 Inf   0.453  0.8929
 left - right   -0.0436 0.284 Inf  -0.154  0.9871

Results are averaged over the levels of: Actor 
Results are given on the log odds ratio (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

 Angle  prob     SE  df asymp.LCL asymp.UCL
 front 0.506 0.0461 Inf     0.415     0.596
 left  0.465 0.0476 Inf     0.372     0.559
 right 0.476 0.0472 Inf     0.383     0.568

Results are averaged over the levels of: Actor 
Confidence level used: 0.95 


 contrast      estimate     SE  df z.ratio p.value
 front - left    0.0402 0.0661 Inf   0.608  0.8159
 front - right   0.0299 0.0659 Inf   0.454  0.8928
 left - right   -0.0103 0.0670 Inf  -0.154  0.9871

Results are averaged over the levels of: Actor 
P value adjustment: tukey method for comparing a family of 3 estimates 

term,Estimate,Std. Error,z value,Pr(>|z|),effect_size_denom,effect_size
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
(Intercept),0.8427575,0.2180374,3.8651962,0.0001109999,3.55532,0.23704122
Angleleft,-0.169847,0.2797391,-0.6071621,0.5437433,3.55532,-0.04777262
Angleright,-0.1262396,0.2783831,-0.4534744,0.6502072,3.55532,-0.03550723
Proximity,-0.4639305,0.1401113,-3.3111582,0.0009291066,3.55532,-0.13048909
n_candidates,-0.5870535,0.1527254,-3.8438498,0.0001211191,3.55532,-0.16511972
ActorY,-0.9381818,0.2329477,-4.0274351,5.638863e-05,3.55532,-0.26388109
