# Tools

In [137]:
library(reshape2)    # `melt` function to convert wide format to long
library(dplyr)       # common data transformation functions
library(lme4)        # `lmer` function to perform mixed linear estimation
library(emmeans)     # `emmeans` function for performing pst hoc analyses on fitted models
library(data.table)  # `setnames` function for changing column names

# ANOVAs

## Compare performance during Familiarization

In [45]:
df <- read.csv('data/long_ntm_data.csv')

# Select data to work with and recode ugly labels
df <- df[, c('sid','grp','tid','pc_first')]
names(df)[4] <- 'pc'
df$grp <- recode(df$grp, F=0, S=1)
df$tid <- recode(df$tid, '1_1D'='A1', '2_I1D'='A2', '3_2D'='A3', '4_R'='A4')
# df <- df[df$tid=='A4' | df$tid=='A1', ]

# Factorize categorical variables
df$sid <- factor(df$sid)
df$grp <- factor(df$grp)
df$tid <- factor(df$tid)

# Fit a linear mixed effects model
LM_model <- lm(pc ~ grp * tid, data=df)
print(anova(LM_model))

# Perform a post hoc analysis
posthoc <- emmeans(LM_model, 'tid', adjust = "tukey")
print(pairs(posthoc))

Analysis of Variance Table

Response: pc
            Df  Sum Sq Mean Sq  F value Pr(>F)    
grp          1  0.0600 0.06004   2.4495 0.1178    
tid          3  9.0812 3.02707 123.4869 <2e-16 ***
grp:tid      3  0.1203 0.04010   1.6357 0.1793    
Residuals 1200 29.4160 0.02451                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


NOTE: Results may be misleading due to involvement in interactions


 contrast estimate     SE   df t.ratio p.value
 A1 - A2    0.0615 0.0128 1200  4.820  <.0001 
 A1 - A3    0.1419 0.0128 1200 11.115  <.0001 
 A1 - A4    0.2305 0.0128 1200 18.054  <.0001 
 A2 - A3    0.0804 0.0128 1200  6.295  <.0001 
 A2 - A4    0.1690 0.0128 1200 13.235  <.0001 
 A3 - A4    0.0886 0.0128 1200  6.940  <.0001 

Results are averaged over the levels of: grp 
P value adjustment: tukey method for comparing a family of 4 estimates 


## Compare performance during first 25 trials of free play

In [120]:
# df <- read.csv('data/long_ntm_data_freeplay.csv')
# print(names(df))
# # Factorize categorical variables
# df$sid <- factor(df$sid)
# df$grp <- factor(df$grp)
# df$tid <- factor(df$tid)

# # If you want to obtain type-III sums of squares set the default contrasts to effect coding:
# options(contrasts=c('contr.sum', 'contr.poly'))

# # For this 2-way mixed ANOVA we need to specify the `Error` term to indicate that `tid` varied within `sid`
# linmod <- lme(pc1 ~ grp*tid, data=df, random=~1|sid, correlation=corCompSymm(form = ~1|sid))
# aovmod <- anova(linmod, type='marginal')

# print(aovmod)

# summary(glht(linmod, linfct=mcp(tid = "Tukey", interaction_average=TRUE)), 
#         test = adjusted(type = "bonferroni"))

[1] "X"   "sid" "grp" "tid" "pc1"


## Compare final performance

In [119]:
df <- read.csv('data/long_ntm_data.csv')
print(names(df))
# Select data to work with and recode ugly labels
df <- df[, c('sid','grp','tid','pc_grand')]
names(df)[4] <- 'pc'
df$grp <- recode(df$grp, F=0, S=1)
df$tid <- recode(df$tid, '1_1D'='A1', '2_I1D'='A2', '3_2D'='A3', '4_R'='A4')
# df <- df[df$tid=='A4' | df$tid=='A1', ]

# Factorize categorical variables
df$sid <- factor(df$sid)
df$grp <- factor(df$grp)
df$tid <- factor(df$tid)

# Fit a linear mixed effects model
LM_model <- lm(pc ~ grp * tid, data=df)
res <- summary(LM_model)
adjusted_pvals <- p.adjust(res$coefficients[, 'Pr(>|t|)'], method='bonferroni')

options(scipen=999)
print(round(cbind(res$coefficients, Bonferroni=adjusted_pvals),5))
# Perform a post hoc analysis
# posthoc <- emmeans(LM_model, 'tid', adjust = "tukey")
# print(pairs(posthoc))

 [1] "X"        "sid"      "grp"      "cnd"      "fam"      "tid"     
 [7] "tord"     "pc_grand" "pc_first" "pc_last"  "dpc"      "alltime" 
[13] "freetime" "nblocks"  "avstreak" "lrn"      "int"      "comp"    
[19] "time"     "prog"     "rule"     "lrn2"     "nlrn"     "nint"    
[25] "ncomp"    "ntime"    "nprog"    "nrule"    "nlrn2"    "lp1"     
[31] "lp2"      "lp3"      "ntm"      "good_ord"
            Estimate Std. Error   t value Pr(>|t|) Bonferroni
(Intercept)  0.83369    0.00949  87.87653  0.00000    0.00000
grp1        -0.01957    0.01299  -1.50644  0.13222    1.00000
tidA2       -0.07710    0.01342  -5.74687  0.00000    0.00000
tidA3       -0.21471    0.01342 -16.00324  0.00000    0.00000
tidA4       -0.34434    0.01342 -25.66487  0.00000    0.00000
grp1:tidA2   0.01705    0.01838   0.92770  0.35375    1.00000
grp1:tidA3   0.06299    0.01838   3.42799  0.00063    0.00503
grp1:tidA4   0.03075    0.01838   1.67331  0.09453    0.75621


# Compare task choices

In [95]:
# Load data
df <- read.csv('data/ntm_data_freeplay.csv')[, c('sid','grp','trial','st1','st2','st3','st4')]

# Factorize categorical variables
df$sid <- factor(df$sid)
df$grp <- factor(df$grp)

# Aggregate 
relt <- function(vec){sum(vec)/250}
cols_to_aggr <- list(relt1 = df$st1, relt2 = df$st2, 
                     relt3 = df$st3, relt4 = df$st4)
groupings <- list(grp = df$grp, sid = df$sid)
df <- aggregate(cols_to_aggr, by=groupings, FUN=relt)

# Convert to long format
setnames(df, old = c('relt1', 'relt2', 'relt3', 'relt4'), new = as.character(c(1:4)))
df <- melt(data = df, 
           id.vars = c('grp', 'sid'), 
           measure.vars = , 
           variable.name = 'tid',
           value.name = 'relt')

# Fit a linear mixed effects model
df <- within(df, tid <- relevel(tid, ref=2))
LM_model <- lm(relt ~ grp * tid, data=df)
res <- summary(LM_model)
adjusted_pvals <- p.adjust(res$coefficients[, 'Pr(>|t|)'], method='bonferroni')

options(scipen=999)
print(round(cbind(res$coefficients, Bonferroni=adjusted_pvals),5))

            Estimate Std. Error  t value Pr(>|t|) Bonferroni
(Intercept)  0.23301    0.01510 15.43624  0.00000    0.00000
grp1        -0.03535    0.02067 -1.71040  0.08743    0.69942
tid1         0.05506    0.02135  2.57942  0.01000    0.08004
tid3        -0.00408    0.02135 -0.19102  0.84854    1.00000
tid4         0.01696    0.02135  0.79451  0.42704    1.00000
grp1:tid1   -0.09111    0.02923 -3.11683  0.00187    0.01494
grp1:tid3    0.05885    0.02923  2.01325  0.04429    0.35434
grp1:tid4    0.17368    0.02923  5.94133  0.00000    0.00000


# Comparing numbers of switches in two groups

In [78]:
df <- read.csv('data/joint_data.csv', check.names=FALSE)[, c('# group','sid','trial','switch')]
names(df) <- c('grp','sid','trial','switch')
df <- df[df$trial > 60, ]
df[df$trial == 61, ] <- 0

by_grp <- aggregate(switch ~ grp + sid, df, sum)
res <- wilcox.test(
    x = by_grp[by_grp$grp==0, 'switch'], 
    y = by_grp[by_grp$grp==1, 'switch'],
    alternative = "two.sided"
)

print(res)

sem <- function(x) sqrt(var(x)/length(x))
print(aggregate(switch ~ grp, by_grp, mean))
print(aggregate(switch ~ grp, by_grp, sem))


	Wilcoxon rank sum test with continuity correction

data:  by_grp[by_grp$grp == 0, "switch"] and by_grp[by_grp$grp == 1, "switch"]
W = 4254, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0

  grp    switch
1   0  6.246753
2   1 10.659091
  grp    switch
1   0 0.2941456
2   1 0.2873720
