# Loading Responses

In [1]:
options(warn=1)
if(!require('lme4')) {
    install.packages('lme4', repos='https://cloud.r-project.org')
    library('lme4')
}
if(!require('readr')) {
    install.packages('readr', repos='https://cloud.r-project.org')
    library('readr')
}
if(!require('marginaleffects')) {
    install.packages('marginaleffects', repos='https://cloud.r-project.org')
    library('marginaleffects')
}
if(!require('emmeans')) {
    install.packages('emmeans', repos='https://cloud.r-project.org')
    library('emmeans')
}
if(!require('car')) {
    install.packages('car', repos='https://cloud.r-project.org')
    library('car')
}
df <- read_csv("gaze-analysis/result_1743457603_20250506_20250506F.csv", na = "empty", col_select = c("Accuracy", "Group", "GroupKind", "Angle", "Proximity", "n_candidates", "Actor", "Candidates", "Stimulus_ID", "Prompt_ID", "Participant_ID", "list_id", "Run_ID", "Part"), col_types = cols(
    Accuracy = col_logical(),
    Group = col_factor(),
    GroupKind = col_factor(),
    Angle = col_factor(),
    Proximity = col_integer(),
    n_candidates = col_integer(),
    Actor = col_factor(c('X', 'Y')),
    Candidates = col_factor(),
    Stimulus_ID = col_factor(),
    Prompt_ID = col_factor(),
    Participant_ID = col_factor(),
    list_id = col_factor(),
    Run_ID = col_character(),
    Part = col_character(),
),show_col_types = TRUE)
df <- df[df$Group == "internlm-xcomposer2-vl-7b" & df$list_id != -1 & df$Part != "p0", ]
df$offset <- log(1/df$n_candidates / (1 - 1/df$n_candidates))
df$Proximity <- df$Proximity - mean(df$Proximity)
df$n_candidates <- df$n_candidates - mean(df$n_candidates)

Loading required package: lme4

Loading required package: Matrix

Loading required package: readr

Loading required package: marginaleffects

Loading required package: emmeans

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

Loading required package: car

Loading required package: carData

[1mRows: [22m[34m156780[39m [1mColumns: [22m[34m14[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): Run_ID, Part
[32mint[39m (2): Proximity, n_candidates
[33mlgl[39m (1): Accuracy
[31mfct[39m (9): Stimulus_ID, Prompt_ID, Participant_ID, Group, GroupKind, Angle, Ac...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


# Fitting models

In [2]:
model0 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Actor:Candidates) + (1|Stimulus_ID) + (1|Prompt_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

“Model failed to converge with max|grad| = 0.00645188 (tol = 0.002, component 1)”


In [3]:
model1 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Stimulus_ID) + (1|Prompt_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

In [4]:
model2 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Actor:Candidates) + (1|Stimulus_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

boundary (singular) fit: see help('isSingular')



In [5]:
model3 <- glmer(Accuracy ~ Angle + Proximity + n_candidates + Actor + (1|Stimulus_ID), 
               data = df,
               family = binomial(link = "logit"), 
               offset = offset)

# Model selection

In [6]:
anova(model1, model3)

Unnamed: 0_level_0,npar,AIC,BIC,logLik,deviance,Chisq,Df,Pr(>Chisq)
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
model3,7,5739.069,5788.803,-2862.534,5725.069,,,
model1,8,5739.471,5796.311,-2861.736,5723.471,1.59716,1.0,0.2063062


In [7]:
model <- model3

# Interpretation

In [8]:
summary(model)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
Accuracy ~ Angle + Proximity + n_candidates + Actor + (1 | Stimulus_ID)
   Data: df
 Offset: offset

     AIC      BIC   logLik deviance df.resid 
  5739.1   5788.8  -2862.5   5725.1     8993 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.99537 -0.12875 -0.05732  0.12649  3.10520 

Random effects:
 Groups      Name        Variance Std.Dev.
 Stimulus_ID (Intercept) 30.02    5.479   
Number of obs: 9000, groups:  Stimulus_ID, 900

Fixed effects:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.2368     0.4317   0.548   0.5834    
Angleleft      0.1730     0.5482   0.316   0.7523    
Angleright     0.2448     0.5482   0.447   0.6552    
Proximity     -0.6574     0.2776  -2.368   0.0179 *  
n_candidates  -1.6443     0.3313  -4.963 6.93e-07 ***
ActorY        -1.1439     0.4663  -2.453   0.0142 *  
---
Signif. codes:

In [9]:
log_odds_means <- emmeans(model, spec = "Angle")
print(log_odds_means)
pairs(log_odds_means)
log_odds_means <- emmeans(model, spec = "Angle", regrid = "response")
print(log_odds_means)
pairs(log_odds_means)
               
vc_list <- lapply(VarCorr(model), function(x) attr(x, "stddev")^2)
vc <- sum(unlist(vc_list))
effect_size_denom_t <- sqrt(vc + pi^2/3)

res <- summary(model)$coefficients
res <- as.data.frame(res)
res <- cbind(term = rownames(res), res)
rownames(res) <- NULL
res$effect_size_denom <- effect_size_denom_t
res$effect_size <- res$Estimate / effect_size_denom_t
res

 Angle emmean    SE  df asymp.LCL asymp.UCL
 front -0.685 0.385 Inf     -1.44    0.0701
 left  -0.512 0.397 Inf     -1.29    0.2651
 right -0.441 0.397 Inf     -1.22    0.3370

Results are averaged over the levels of: Actor 
Results are given on the logit (not the response) scale. 
Confidence level used: 0.95 


 contrast      estimate    SE  df z.ratio p.value
 front - left   -0.1730 0.548 Inf  -0.316  0.9466
 front - right  -0.2448 0.548 Inf  -0.447  0.8959
 left - right   -0.0718 0.557 Inf  -0.129  0.9909

Results are averaged over the levels of: Actor 
Results are given on the log odds ratio (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

 Angle  prob     SE  df asymp.LCL asymp.UCL
 front 0.347 0.0809 Inf     0.188     0.505
 left  0.384 0.0867 Inf     0.214     0.554
 right 0.400 0.0877 Inf     0.228     0.571

Results are averaged over the levels of: Actor 
Confidence level used: 0.95 


 contrast      estimate    SE  df z.ratio p.value
 front - left   -0.0372 0.118 Inf  -0.316  0.9466
 front - right  -0.0531 0.119 Inf  -0.447  0.8959
 left - right   -0.0159 0.123 Inf  -0.129  0.9909

Results are averaged over the levels of: Actor 
P value adjustment: tukey method for comparing a family of 3 estimates 

term,Estimate,Std. Error,z value,Pr(>|z|),effect_size_denom,effect_size
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
(Intercept),0.236775,0.4317159,0.5484509,0.5833823,5.771574,0.04102434
Angleleft,0.1730034,0.5481663,0.3156039,0.7523032,5.771574,0.02997508
Angleright,0.2448312,0.5482314,0.4465836,0.6551757,5.771574,0.04242018
Proximity,-0.657397,0.2776101,-2.368059,0.01788168,5.771574,-0.11390255
n_candidates,-1.6442912,0.3312829,-4.9634055,6.926775e-07,5.771574,-0.28489476
ActorY,-1.1438885,0.4663476,-2.4528665,0.01417229,5.771574,-0.1981935
