**Imports**

In [2]:
library(tidyverse)
library(emmeans)
library(rstatix)

options(pillar.sigfig = 6, digits=6, warn=0, dplyr.summarise.inform = FALSE)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──
[32m✔[39m [34mggplot2[39m 3.3.0     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.0.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m  masks [34mstats[39m::filter()
[31m✖[39m [34mpurrr[39m::[32mflatten()[39m masks [34mjsonlite[39m::flatten()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m     masks [34mstats[39m::lag()

Attaching package: ‘rstatix’

The following object is masked from ‘package:stats’:

    filter



# Section 2

## Comparisons of familiarization-stage performance between groups

In [72]:
f <- function() { 
    # Prepare data
    df <- tbl_df(read.csv('data/clean_data.csv')) %>%
        dplyr::filter(stage=='train') %>%
        dplyr::select(sid,group,activity,correct) %>%
        dplyr::mutate(
            correct = as.numeric(correct=='True'),
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Calculate PC per task per subject
    df <- group_by(df, sid , activity) %>% dplyr::summarize(group = head(group, 1), pc = mean(correct))

    # Perform mixed ANOVA
    AOV_results <- aov(pc ~ group * activity + Error(sid), df)
    print(summary(AOV_results))
    
    # Post hoc analysis
    linmod <- lm(pc ~ group * activity, data=df, contrasts=list(group=contr.treatment(2), activity=contr.treatment(4)))
    posthoc <- emmeans(linmod, 'activity', adjust = "tukey")
    print(pairs(posthoc))
}

f()


Error: sid
           Df Sum Sq Mean Sq F value Pr(>F)
group       1   0.07  0.0657    2.26   0.13
Residuals 328   9.55  0.0291               

Error: Within
                Df Sum Sq Mean Sq F value Pr(>F)    
activity         3   9.83    3.28  141.10 <2e-16 ***
group:activity   3   0.11    0.04    1.57   0.19    
Residuals      984  22.84    0.02                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


NOTE: Results may be misleading due to involvement in interactions


 contrast estimate     SE   df t.ratio p.value
 A1 - A2    0.0664 0.0123 1312  5.418  <.0001 
 A1 - A3    0.1473 0.0123 1312 12.015  <.0001 
 A1 - A4    0.2299 0.0123 1312 18.750  <.0001 
 A2 - A3    0.0809 0.0123 1312  6.597  <.0001 
 A2 - A4    0.1634 0.0123 1312 13.332  <.0001 
 A3 - A4    0.0826 0.0123 1312  6.736  <.0001 

Results are averaged over the levels of: group 
P value adjustment: tukey method for comparing a family of 4 estimates 


## Comparisons of activity preferences between groups

In [74]:
f <- function() { 
    # Prepare data
    df <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Print stats
    (dplyr::group_by(df, group, activity) %>% dplyr::summarize(mean=mean(relt), std=sd(relt)))

    # Fit a linear mixed effects model
    df <- within(df, activity <- relevel(activity, ref='A1'))
    linmod <- lm(
      relt ~ group * activity, 
      data = df, 
      contrasts = list(group=contr.treatment(2), activity=contr.sum(4))
    )
    print(summary(linmod), digits=6)

    # Perform mixed ANOVA
    AOV_results <- aov(relt ~ group * activity + Error(sid), df)
    print(summary(AOV_results), digits=6)
}

f()


Call:
lm(formula = relt ~ group * activity, data = df, contrasts = list(group = contr.treatment(2), 
    activity = contr.sum(4)))

Residuals:
       Min         1Q     Median         3Q        Max 
-0.3882955 -0.1256250 -0.0409740  0.0981161  0.7143864 

Coefficients:
                     Estimate   Std. Error  t value   Pr(>|t|)    
(Intercept)       2.50000e-01  7.54760e-03 33.12313 < 2.22e-16 ***
group2            1.18405e-16  1.03350e-02  0.00000  1.0000000    
activity1         3.80779e-02  1.30728e-02  2.91276  0.0036432 ** 
activity2        -1.69870e-02  1.30728e-02 -1.29941  0.1940299    
activity3        -2.10649e-02  1.30728e-02 -1.61135  0.1073432    
group2:activity1 -1.26464e-01  1.79007e-02 -7.06477 2.6038e-12 ***
group2:activity2 -3.53539e-02  1.79007e-02 -1.97500  0.0484773 *  
group2:activity3  2.34968e-02  1.79007e-02  1.31262  0.1895416    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.187326 on 1312 degrees of freed

## Analyses of learning achievement

### Unweighted average score

In [75]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>% 
        dplyr::select(sid, group, fpc) %>%
        dplyr::mutate(group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'))
    
    print(dplyr::group_by(df, group) %>% dplyr::summarize(mean=mean(fpc), std=sd(fpc)))
    
    # Compare combined scores in the two groups
    scores.IG <- df[df$group=='IG', 'fpc']
    scores.EG <- df[df$group=='EG', 'fpc']
    t.test(scores.IG, scores.EG)
}

f()

[38;5;246m# A tibble: 2 x 3[39m
  group  mean   std
  [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m IG    0.769 0.120
[38;5;250m2[39m EG    0.798 0.114



	Welch Two Sample t-test

data:  scores.IG and scores.EG
t = -2.189, df = 316.8, p-value = 0.0293
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.0537667 -0.0028711
sample estimates:
mean of x mean of y 
 0.769408  0.797727 


### Weighted average score (dwfPC)

In [76]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid, group, dwfpc) %>%
        dplyr::mutate(group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'))
    
    print(dplyr::group_by(df, group) %>% dplyr::summarize(mean=mean(dwfpc), std=sd(dwfpc)))
    
    # Compare combined scores in the two groups
    scores.IG <- df[df$group=='IG', 'dwfpc']
    scores.EG <- df[df$group=='EG', 'dwfpc']
    print(t.test(scores.IG, scores.EG), digits=6)
}

f()

[38;5;246m# A tibble: 2 x 3[39m
  group  mean   std
  [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m IG    0.734 0.128
[38;5;250m2[39m EG    0.767 0.124

	Welch Two Sample t-test

data:  scores.IG and scores.EG
t = -2.424, df = 319.2, p-value = 0.0159
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.06109326 -0.00634902
sample estimates:
mean of x mean of y 
 0.733766  0.767487 



### Compare learning between groups at each level of NAM

In [53]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid,group,nam,dwfpc,fpc) %>%
        dplyr::filter(nam > 0) %>%
        dplyr::mutate(nam=as.factor(nam), group=as.factor(group))

    # Fit linear model and perform get post hoc contrasts
    linmod <- lm(dwfpc ~ group * nam, data=df)
    print(summary(aov(dwfpc ~ group * nam, data=df)))
    posthoc <- emmeans(linmod, pairwise ~ group | nam, adjust = "tukey")
    print(posthoc)
}

f()

             Df Sum Sq Mean Sq F value Pr(>F)    
group         1  0.082   0.082    9.37 0.0024 ** 
nam           2  1.799   0.900  103.39 <2e-16 ***
group:nam     2  0.015   0.007    0.85 0.4304    
Residuals   314  2.732   0.009                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$emmeans
nam = 1:
 group emmean      SE  df lower.CL upper.CL
 0      0.623 0.01624 314    0.591    0.655
 1      0.637 0.01829 314    0.601    0.673

nam = 2:
 group emmean      SE  df lower.CL upper.CL
 0      0.725 0.01157 314    0.703    0.748
 1      0.747 0.01236 314    0.722    0.771

nam = 3:
 group emmean      SE  df lower.CL upper.CL
 0      0.841 0.01306 314    0.815    0.866
 1      0.832 0.00994 314    0.813    0.852

Confidence level used: 0.95 

$contrasts
nam = 1:
 contrast estimate     SE  df t.ratio p.value
 0 - 1    -0.01462 0.0245 314 -0.598  0.5505 

nam = 2:
 contrast estimate     SE  df t.ratio p.value
 0 - 1    -0.02131 0.0169 314 -1.259  0.2089 

nam

### Count NAMs in each group

In [77]:
f <- function() {   
    # Get data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid,group,nam) %>%
        dplyr::group_by(group, nam) %>%
        dplyr::summarize(count=n()) %>%
        dplyr::mutate(percentage=count/sum(count)*100) %>% 
        dplyr::ungroup() %>%
        dplyr::mutate(
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=dplyr::recode(as.factor(nam), '0'='NAM-0', '1'='NAM-1', '2'='NAM-2', '3'='NAM-3')
        )
    
    print(df)
}

f()

[38;5;246m# A tibble: 8 x 4[39m
  group nam   count percentage
  [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<int>[39m[23m      [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m IG    NAM-0     5       3.25
[38;5;250m2[39m IG    NAM-1    33      21.4 
[38;5;250m3[39m IG    NAM-2    65      42.2 
[38;5;250m4[39m IG    NAM-3    51      33.1 
[38;5;250m5[39m EG    NAM-0     5       2.84
[38;5;250m6[39m EG    NAM-1    26      14.8 
[38;5;250m7[39m EG    NAM-2    57      32.4 
[38;5;250m8[39m EG    NAM-3    88      50   


### Examine relationships between activity selection and NAM

In [81]:
f <- function() {
    # Prepare data
    df <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Get NAM labels
    nam_df <- read.csv('data/nam_data.csv') %>%
        dplyr::select(sid, nam) %>%
        dplyr::mutate(
            sid = as.factor(sid),
            nam = as.factor(nam)
        )

    # Join datasets and exclude NAM0 cases
    df <- dplyr::inner_join(df, nam_df, by='sid') %>%
        dplyr::filter(nam != '0')
    rm(nam_df)

    # Join NAM1 and NAM2 groups
    df$nam <- dplyr::recode(df$nam, '2'='1')

    # Perform an ANOVA for each instruction group
    for (i in c('IG','EG')) {
        cat('\nMixed ANOVA for group', i, '\n=======================================')
        subdf <- dplyr::filter(df, group==i)
        print(summary(
            aov(relt ~ nam * activity + Error(sid), data=subdf)
        ), digits=6)
    }
}

f()


Mixed ANOVA for group IG 
Error: sid
           Df     Sum Sq     Mean Sq F value  Pr(>F)
nam         1 2.0120e-31 2.01222e-31 0.32216 0.57118
Residuals 147 9.1817e-29 6.24605e-31                

Error: Within
              Df   Sum Sq  Mean Sq  F value     Pr(>F)    
activity       3  0.36757 0.122525  2.49552   0.059328 .  
nam:activity   3  1.76659 0.588863 11.99363 1.4604e-07 ***
Residuals    441 21.65219 0.049098                        
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Mixed ANOVA for group EG 
Error: sid
           Df      Sum Sq     Mean Sq F value  Pr(>F)
nam         1 2.77000e-31 2.76982e-31 0.57624 0.44885
Residuals 169 8.12329e-29 4.80668e-31                

Error: Within
              Df   Sum Sq  Mean Sq  F value   Pr(>F)    
activity       3  5.23931 1.746437 42.03967  < 2e-16 ***
nam:activity   3  0.26296 0.087653  2.10995 0.098028 .  
Residuals    507 21.06210 0.041543                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.0

## Relationships between learning and self-challenge (SC)

### Performance as a function of SC

In [18]:
f <- function() {
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        # we exclude people who did not master anything
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, dwfpc, dwipc, fpc, ipc, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam = as.factor(nam),
            sid = as.factor(sid),
            sc = (sc_flat-mean(sc_flat))/sd(sc_flat)
        ) %>%
        dplyr::mutate(sc2 = sc^2)

    # Fit linear model
    linmod.flat <-lm(dwfpc ~ dwipc + group + sc, data=df)
    
    # Fit linear-quadratic model
    linmod.quad <-lm(dwfpc ~ dwipc + group + sc + sc2, data=df)
    # Print quadratic model summary
    print(summary(linmod.quad), digits=6)
    
    # Print quadratic model summary of unweighted scores for comparison
    print(summary(lm(fpc ~ ipc + group + sc + sc2, data=df)), digits=6)
    
    # Compaire AIC
    delta_aic <- AIC(linmod.flat) - AIC(linmod.quad)
    cat('Delta AIC =', delta_aic)
    
}

f()


Call:
lm(formula = dwfpc ~ dwipc + group + sc + sc2, data = df)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.3256512 -0.0635611  0.0077414  0.0838477  0.2692800 

Coefficients:
               Estimate  Std. Error  t value   Pr(>|t|)    
(Intercept)  0.56739207  0.03873727 14.64719 < 2.22e-16 ***
dwipc        0.31032459  0.05979591  5.18973 3.7813e-07 ***
groupEG      0.01788061  0.01354584  1.32001   0.187791    
sc          -0.00171340  0.00731289 -0.23430   0.814905    
sc2         -0.01576450  0.00549718 -2.86774   0.004413 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.113935 on 315 degrees of freedom
Multiple R-squared:  0.116451,	Adjusted R-squared:  0.105232 
F-statistic: 10.3792 on 4 and 315 DF,  p-value: 6.57339e-08


Call:
lm(formula = fpc ~ ipc + group + sc + sc2, data = df)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.3490687 -0.0569411  0.0074006  0.0717155  0.2142473 


### Model of SC as a function of instruction and NAM

In [93]:
f <- function() {
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, sc_lep) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam = as.factor(nam),
            sid = as.factor(sid)
        )

    # Fit linear model
    linmod <-lm(sc_lep ~ group * nam, data=df)
    print(summary(linmod))
    
    # Compare SC between groups across different levels of NAM
    emmeans(linmod, pairwise ~ group | nam, adjust='Tukey')

}

f()


Call:
lm(formula = sc_lep ~ group * nam, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.3132 -0.0849 -0.0033  0.0822  0.3257 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)    0.3355     0.0211   15.90   <2e-16 ***
groupEG        0.0863     0.0318    2.72    0.007 ** 
nam2          -0.0146     0.0259   -0.56    0.573    
nam3           0.0425     0.0271    1.57    0.118    
groupEG:nam2   0.0093     0.0386    0.24    0.810    
groupEG:nam3  -0.0624     0.0383   -1.63    0.104    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.121 on 314 degrees of freedom
Multiple R-squared:  0.0899,	Adjusted R-squared:  0.0754 
F-statistic: 6.21 on 5 and 314 DF,  p-value: 1.67e-05



$emmeans
nam = 1:
 group emmean     SE  df lower.CL upper.CL
 IG     0.335 0.0211 314    0.294    0.377
 EG     0.422 0.0238 314    0.375    0.469

nam = 2:
 group emmean     SE  df lower.CL upper.CL
 IG     0.321 0.0150 314    0.291    0.350
 EG     0.416 0.0161 314    0.385    0.448

nam = 3:
 group emmean     SE  df lower.CL upper.CL
 IG     0.378 0.0170 314    0.345    0.411
 EG     0.402 0.0129 314    0.376    0.427

Confidence level used: 0.95 

$contrasts
nam = 1:
 contrast estimate     SE  df t.ratio p.value
 IG - EG   -0.0863 0.0318 314 -2.716  0.0070 

nam = 2:
 contrast estimate     SE  df t.ratio p.value
 IG - EG   -0.0956 0.0220 314 -4.347  <.0001 

nam = 3:
 contrast estimate     SE  df t.ratio p.value
 IG - EG   -0.0239 0.0213 314 -1.121  0.2632 



# Section 3

## Model comparisons

### Main analysis

In [11]:
f <- function() {
    # Compute baseline (random-choice) model likelihood and AIC
    log_lik <- rep(.25, times=250) %>% log() %>% sum()
    n_params <- 0
    baseline_AIC <- -2*log_lik + 2*n_params
    options(digits=6)
    cat('Baseline AIC =', baseline_AIC)

    # Load data
    df.long <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::filter(!stringr::str_detect(vars, 'abst')) %>%
        dplyr::select(sid,vars,aic,group) %>%
        dplyr::mutate(
            vars=dplyr::recode(vars, 'rpc,rlp'='biv'),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG')
        )

    # Get AIC means and SD per model form
    message('Average AIC scores per model form')
    stats_summary <- dplyr::select(df.long, vars, group, aic) %>%
        dplyr::group_by(vars, group) %>%
        dplyr::summarize(mean=mean(aic), std=sd(aic)) %>% ungroup()
    print(stats_summary)
    aov(lm(aic ~ group*vars, data=df.long)) %>% summary() %>% print(digits=6)

    # Contrast bivariate models' AIC to the best univariate
    df.wide <- tidyr::spread(df.long, vars, aic) %>%
        dplyr::mutate(min_uni_aic=pmin(rpc, rlp)) %>%
        dplyr::mutate(
            biv_best = biv < min_uni_aic,
            biv_adv = min_uni_aic - biv,
        ) %>%
        dplyr::mutate(two_points = biv_adv >= 2)

    # Print summaries
    message('Useful stats')
    print(
        dplyr::summarize(
            df.wide, 
            biv_best = mean(biv_best), 
            biv_two_points = mean(two_points),
            mean_diff = mean(biv_adv),
            std_diff = sd(biv_adv)
        )
    )

    # Compare bivariate to the best univariate model (Wilcoxon signed-rank test)
    # The test is appropriate when the difference between repeated measures is asymmetrical and non-normal
    message('Wilcoxon sign test (for testing hypothesis that difference in medians is 0)')
    test_results <- dplyr::select(df.wide, sid, min_uni_aic, biv) %>%
        tidyr::pivot_longer(cols=c('min_uni_aic', 'biv'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_uni_aic'='1', 'biv'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)

    # Compare bivariate to the best univariate model (Mann-Whitney U)
    # This test is less appropriate, because we have paired AIC scores (repeated measures)
    test_results <- with(df.wide, wilcox.test(min_uni_aic, biv))
}

f()

Baseline AIC = 693.147

Average AIC scores per model form


[38;5;246m# A tibble: 6 x 4[39m
  vars  group    mean      std
  [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<fct>[39m[23m   [3m[38;5;246m<dbl>[39m[23m    [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m rlp   IG    646.003  65.994[4m9[24m
[38;5;250m2[39m rlp   EG    641.017  77.330[4m2[24m
[38;5;250m3[39m rpc   IG    586.362 123.026 
[38;5;250m4[39m rpc   EG    586.267 114.557 
[38;5;250m5[39m biv   IG    554.616 129.178 
[38;5;250m6[39m biv   EG    544.150 127.161 
             Df   Sum Sq Mean Sq  F value  Pr(>F)    
group         1     6415    6415  0.53919 0.46295    
vars          2  1444046  722023 60.68428 < 2e-16 ***
group:vars    2     4287    2143  0.18015 0.83517    
Residuals   954 11350715   11898                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


Useful stats


[38;5;246m# A tibble: 1 x 4[39m
  biv_best biv_two_points mean_diff std_diff
     [3m[38;5;246m<dbl>[39m[23m          [3m[38;5;246m<dbl>[39m[23m     [3m[38;5;246m<dbl>[39m[23m    [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m  0.756[4m2[24m[4m5[24m        0.631[4m2[24m[4m5[24m   21.017[4m6[24m  38.275[4m7[24m


Wilcoxon sign test (for testing hypothesis that difference in medians is 0)


[38;5;246m# A tibble: 1 x 9[39m
  .y.   group1 group2    n1    n2 statistic    df           p p.signif
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m  [3m[38;5;246m<chr>[39m[23m  [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m     [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m       [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m   
[38;5;250m1[39m aic   2      1        320   320        78   320 1.080[4m0[24m[4m0[24m[38;5;246me[39m[31m-20[39m ****    


## Analyses of fitted coefficients

### Descriptive stats

In [35]:
f <- function() {
    # Prepare data
    df <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        )

    # Print coefficient stats
    message('Descriptive stats')
    df.stats <- dplyr::group_by(df, group) %>%
        dplyr::summarize(
            dplyr::across(c(rpc,rlp,nrpc,nrlp), list(mean=mean, std=sd), .names='{.col}.{.fn}')
        )
    print(df.stats)

    # Examine correlations
    message('Correlations')
    for (g in c('IG','EG')) {
        cat('Correlation analysis in', g, '\n')
        print(
            cor.test(data=dplyr::filter(df, group==g), ~ nrpc + nrlp, method = "pearson")
        )
    }

    # Compare coefficients between groups with ANOVA
    message('ANOVA of PC between groups')
    lm(nrpc ~ group, data=df) %>% aov() %>% summary() %>% print(digits=6)
    message('ANOVA of LP between groups')
    lm(nrlp ~ group, data=df) %>% aov() %>% summary() %>% print(digits=6)
}

f()

Descriptive stats


[38;5;246m# A tibble: 2 x 9[39m
  group  rpc.mean  rpc.std  rlp.mean  rlp.std nrpc.mean nrpc.std nrlp.mean
  [3m[38;5;246m<fct>[39m[23m     [3m[38;5;246m<dbl>[39m[23m    [3m[38;5;246m<dbl>[39m[23m     [3m[38;5;246m<dbl>[39m[23m    [3m[38;5;246m<dbl>[39m[23m     [3m[38;5;246m<dbl>[39m[23m    [3m[38;5;246m<dbl>[39m[23m     [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m IG     0.108[4m8[24m[4m8[24m[4m6[24m 0.620[4m7[24m[4m8[24m[4m0[24m 0.025[4m9[24m[4m5[24m[4m7[24m0 0.555[4m5[24m[4m1[24m[4m5[24m  0.157[4m7[24m[4m6[24m[4m7[24m 0.729[4m5[24m[4m8[24m[4m4[24m 0.094[4m9[24m[4m8[24m[4m4[24m3
[38;5;250m2[39m EG    -[31m0[39m[31m.[39m[31m200[4m5[24m[4m6[24m[4m2[24m[39m 0.587[4m8[24m[4m0[24m[4m9[24m 0.050[4m9[24m[4m7[24m[4m8[24m7 0.513[4m1[24m[4m8[24m[4m8[24m -[31m0[39m[31m.[39m[31m324[4m7[24m[4m9[24m[4m2[24m[39m 0.696[4m4[24m[4m8[24m[4m5[24m 0.075[4m2[24m[4m8[24m[4m8[2

Correlations


Correlation analysis in IG 

	Pearson's product-moment correlation

data:  nrpc and nrlp
t = -0.6168, df = 147, p-value = 0.538
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.209894  0.110897
sample estimates:
       cor 
-0.0508091 

Correlation analysis in EG 

	Pearson's product-moment correlation

data:  nrpc and nrlp
t = 1.575, df = 169, p-value = 0.117
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.030321  0.265577
sample estimates:
     cor 
0.120299 



ANOVA of PC between groups


             Df  Sum Sq  Mean Sq F value     Pr(>F)    
group         1  18.541 18.54102 36.5658 4.1373e-09 ***
Residuals   318 161.245  0.50706                       
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


ANOVA of LP between groups


             Df   Sum Sq  Mean Sq F value Pr(>F)
group         1   0.0309 0.030888 0.07292 0.7873
Residuals   318 134.6941 0.423566               


### Relationships between behavior and coefficients

In [15]:
f <- function() {
    # Prepare coefficients data
    df <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        )

    df2 <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial <= 60+250) %>%
        dplyr::select(sid, trial, activity) %>% 
        dplyr::group_by(sid, activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=count-15,
            sid = as.factor(sid)
        ) %>%
        dplyr::select(sid, activity, relt) %>%
        tidyr::spread(activity, relt) %>%
        dplyr::mutate(
            A4_A3 = A4-A3, 
            A3_A1 = A3-A1,
            A4_A123 = A4-(A1+A2+A3)/3, 
            A3_A12 = A3-(A1+A2)/2)

    # Join datasets
    df <- dplyr::inner_join(df, df2, by='sid') %>%
        dplyr::filter(nrpc <= 0)
    rm(df2)

    # Fit linear models
    message('Linear models of coefficients as a function of activity preferences')
#     lm(nrpc ~ group*(A3_A12 + A4_A123), data=df) %>% summary() %>% print(digits=6)
#     lm(nrlp ~ A3_A12 + A4_A123, data=df) %>% summary() %>% print(digits=6)
    
    lm(A3_A12 ~ group * nrpc, data=df) %>% summary() %>% print(digits=6)
    lm(A4_A123 ~ group * nrpc, data=df) %>% summary() %>% print(digits=6)
    
    lm(A3_A12 ~ group * nrlp, data=df) %>% summary() %>% print(digits=6)
    lm(A4_A123 ~ group * nrlp, data=df) %>% summary() %>% print(digits=6)
    
}

f()

Linear models of coefficients as a function of activity preferences



Call:
lm(formula = A3_A12 ~ group * nrpc, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-99.3052 -30.1053 -11.5489  28.1005 124.3234 

Coefficients:
             Estimate Std. Error  t value   Pr(>|t|)    
(Intercept)   61.9611    14.8086  4.18412 4.5009e-05 ***
groupEG      -27.4649    17.8477 -1.53884    0.12563    
nrpc          31.4356    19.7893  1.58852    0.11395    
groupEG:nrpc -28.1850    23.6557 -1.19146    0.23507    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 43.4837 on 177 degrees of freedom
Multiple R-squared:  0.0217674,	Adjusted R-squared:  0.00518723 
F-statistic: 1.31286 on 3 and 177 DF,  p-value: 0.271756


Call:
lm(formula = A4_A123 ~ group * nrpc, data = df)

Residuals:
       Min         1Q     Median         3Q        Max 
-118.34157  -45.42170   -8.05818   46.04904  138.71738 

Coefficients:
              Estimate Std. Error  t value  Pr(>|t|)   
(Intercept)    6.75844   21.09902  0.32032

# Supplementary analyses

## Supplementary model comparisons

In [None]:
f <- function() {
    # Compute baseline (random-choice) model likelihood and AIC
    log_lik <- rep(.25, times=250) %>% log() %>% sum()
    n_params <- 0
    baseline_AIC <- -2*log_lik + 2*n_params
    options(digits=6)
    cat('Baseline AIC =', baseline_AIC)

    # Load data
    df.long <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::select(sid,vars,aic) %>%
        dplyr::mutate(vars=dplyr::recode(vars,
            'rpc,rlp'='rpc_rlp',
            'rpc,abst'='rpc_abst',
            'rlp,abst'='rlp_abst',
            'rpc,rlp,abst'='triv',
        ))

    # Get AIC means and SD per model form
    message('Average AIC scores per model form')
    stats_summary <- dplyr::select(df.long, vars, aic) %>%
        dplyr::group_by(vars) %>%
        dplyr::summarize(mean=mean(aic), std=sd(aic)) %>% ungroup() %>%
        dplyr::arrange(mean)
    print(stats_summary)

    # Contrast trivaraite models' AIC to next best model
    df.wide <- tidyr::spread(df.long, vars, aic) %>%
        dplyr::mutate(min_bivorless_aic=pmin(rpc, rlp, abst, rpc_rlp, rpc_abst, rlp_abst)) %>%
        dplyr::mutate(min_uni_aic=pmin(rpc, rlp, abst)) %>%
        dplyr::mutate(
            triv_best = triv < min_bivorless_aic,
            triv_adv = min_bivorless_aic - triv,
            rpc_rlp_best = rpc_rlp < min_uni_aic,
            rpc_rlp_adv = min_uni_aic - rpc_rlp
        ) %>%
        dplyr::mutate(
            two_points = triv_adv >= 2,
            two_points_ = rpc_rlp_adv >= 2
        )

    # Print summaries
    message('Useful stats')
    print(
        dplyr::summarize(
            df.wide, 
            triv_best = mean(triv_best), 
            triv_two_points = mean(two_points),
            mean_diff = mean(triv_adv),
            std_diff = sd(triv_adv),
            rpc_rlp_best = mean(rpc_rlp_best), 
            two_points_2 = mean(two_points_),
            mean_diff_2 = mean(rpc_rlp_adv),
            std_diff_2 = sd(rpc_rlp_adv)
        ), width=Inf
    )

    # Compare bivariate to the best univariate model (Wilcoxon signed-rank test)
    # The test is appropriate when the difference between repeated measures is asymmetrical and non-normal
    message('Wilcoxon sign test (for testing hypothesis that difference in medians is 0)')
    test_results <- dplyr::select(df.wide, sid, min_bivorless_aic, triv) %>%
        tidyr::pivot_longer(cols=c('min_bivorless_aic', 'triv'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_bivorless_aic'='1', 'triv'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)

    message('Same test comparing PC+LP with other univariate models')
    test_results <- dplyr::select(df.wide, sid, min_uni_aic, rpc_rlp) %>%
        tidyr::pivot_longer(cols=c('min_uni_aic', 'rpc_rlp'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_uni_aic'='1', 'rpc_rlp'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)
}

f()

## Interest ratings analysis

In [None]:
f <- function() {
    # Load nam dataset
    nam_df <- tbl_df(read.csv('data/nam_data.csv')) %>% 
        dplyr::select(nam,sid) %>% 
        dplyr::distinct() %>% mutate(sid=factor(sid), nam=factor(nam))

    # Load self-reports dataset and merge with NAM data
    df <- tbl_df(read.csv('data/combined_extra.csv')) %>% 
        dplyr::mutate(sid=factor(sid), group=factor(group)) %>% 
        dplyr::inner_join(nam_df, by='sid')

    # Select data for one questionnaire item
    df <- dplyr::filter(df, item=='int', nam!=0) %>% rename(int = rating_norm)

    # Load and join relative time on activities data
    df2 <- tbl_df(read.csv('data/model_data.csv')) %>% 
        dplyr::select(sid, trial, relt1, relt2, relt3, relt4) %>%
        dplyr::filter(trial==250) %>%
        dplyr::rename(A1=relt1, A2=relt2, A3=relt3, A4=relt4) %>%
        tidyr::gather(activity, relt, A1:A4) %>% 
        dplyr::mutate(sid=factor(sid), activity=factor(activity))

    df <- dplyr::left_join(df, df2, by=c('sid'='sid', 'activity'='activity'))
    df <- within(df, nam <- relevel(nam, ref = '2'))

    # Fit linear model
    lm_ <- lm(int ~ relt * group, data=df)
    summary(lm_)

    # # Perform post hoc analysis
    posthoc <- emmeans(lm_, ~ time*nam | group)
    print(pairs(posthoc))
}

f()