**Imports**

In [None]:
library(tidyverse)
library(emmeans)
library(rstatix)
library(MASS)
library(stargazer)


options(pillar.sigfig = 6, digits=6, warn=0, dplyr.summarise.inform = FALSE)

# Section 2

## Comparisons of familiarization-stage performance between groups

In [None]:
f <- function() { 
    # Prepare data
    df <- tibble::as_tibble(read.csv('data/clean_data.csv')) %>%
        dplyr::filter(stage=='train') %>%
        dplyr::select(sid,group,activity,correct) %>%
        dplyr::mutate(
            correct = as.numeric(correct=='True'),
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Calculate PC per task per subject
    df <- group_by(df, sid , activity) %>% dplyr::summarize(group = head(group, 1), pc = mean(correct))

    # Perform mixed ANOVA
    AOV_results <- aov(pc ~ group * activity + Error(sid), df)
    print(summary(AOV_results), digits=6)
    
    # Post hoc analysis
    linmod <- lm(pc ~ group * activity, data=df, contrasts=list(group=contr.treatment(2), activity=contr.treatment(4)))
    posthoc <- emmeans(linmod, pairwise ~ activity | group, adjust = "tukey")
    print(pairs(posthoc))
}

f()

## Comparisons of activity preferences between groups

In [None]:
f <- function() { 
    # Prepare data
    df <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Print stats
    dplyr::group_by(df, group, activity) %>% 
    dplyr::summarize(mean=mean(relt), std=sd(relt)) %>%
    print(digits=6)

    # Fit a linear mixed effects model
    df <- within(df, activity <- relevel(activity, ref='A1'))
    df <- within(df, group <- relevel(group, ref='IG'))
    linmod <- lm(
      relt ~ group * activity, 
      data = df, 
      contrasts = list(group=contr.treatment(2), activity=contr.sum(4))
    )
    print(summary(linmod), digits=6)

    # Perform mixed ANOVA
    AOV_results <- aov(relt ~ group * activity + Error(sid), df)
    print(summary(AOV_results), digits=6)
}

f()

## Analyses of learning achievement

### Unweighted average score

In [None]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>% 
        dplyr::select(sid, group, fpc) %>%
        dplyr::mutate(group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'))
    
    print(dplyr::group_by(df, group) %>% dplyr::summarize(mean=mean(fpc), std=sd(fpc)))
    
    # Compare combined scores in the two groups
    scores.IG <- df[df$group=='IG', 'fpc']
    scores.EG <- df[df$group=='EG', 'fpc']
    t.test(scores.IG, scores.EG)
}

f()

### Weighted average score (dwfPC)

In [None]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid, group, dwfpc) %>%
        dplyr::mutate(group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'))
    
    print(dplyr::group_by(df, group) %>% dplyr::summarize(mean=mean(dwfpc), std=sd(dwfpc)))
    
    # Compare combined scores in the two groups
    scores.IG <- df[df$group=='IG', 'dwfpc']
    scores.EG <- df[df$group=='EG', 'dwfpc']
    print(t.test(scores.IG, scores.EG), digits=6)
}

f()

### Compare learning between groups at each level of NAM

In [None]:
f <- function() {   
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid,group,nam,dwfpc,fpc) %>%
        dplyr::filter(nam > 0) %>%
        dplyr::mutate(nam=as.factor(nam), group=as.factor(group))

    # Fit linear model and perform get post hoc contrasts
    linmod <- lm(dwfpc ~ group * nam, data=df)
    print(summary(aov(dwfpc ~ group * nam, data=df)))
    posthoc <- emmeans(linmod, pairwise ~ group | nam, adjust = "tukey")
    print(posthoc)
}

f()

### Count NAMs in each group

In [None]:
f <- function() {   
    # Get data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::select(sid,group,nam) %>%
        dplyr::group_by(group, nam) %>%
        dplyr::summarize(count=n()) %>%
        dplyr::mutate(percentage=count/sum(count)*100) %>% 
        dplyr::ungroup() %>%
        dplyr::mutate(
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=dplyr::recode(as.factor(nam), '0'='NAM-0', '1'='NAM-1', '2'='NAM-2', '3'='NAM-3')
        )
    
    print(df)
}

f()

### Examine relationships between activity selection and NAM

In [None]:
f <- function() {
    # Prepare data
    df <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        )

    # Get NAM labels
    nam_df <- read.csv('data/nam_data.csv') %>%
        dplyr::select(sid, nam) %>%
        dplyr::mutate(
            sid = as.factor(sid),
            nam = as.factor(nam)
        )

    # Join datasets and exclude NAM0 cases
    df <- dplyr::inner_join(df, nam_df, by='sid') %>%
        dplyr::filter(nam != '0')
    rm(nam_df)

    # Join NAM1 and NAM2 groups
    df$nam <- dplyr::recode(df$nam, '2'='1')

    # Perform an ANOVA for each instruction group
    for (i in c('IG','EG')) {
        cat('\nMixed ANOVA for group', i, '\n=======================================')
        subdf <- dplyr::filter(df, group==i)
        print(summary(
            aov(relt ~ nam * activity + Error(sid), data=subdf)
        ), digits=6)
    }
}

f()

## Relationships between learning and self-challenge (SC)

### Performance as a function of SC

In [None]:
f <- function() {
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        # we exclude people who did not master anything
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, dwfpc, dwipc, fpc, ipc, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam = as.factor(nam),
            sid = as.factor(sid),
            sc = (sc_flat-mean(sc_flat))/sd(sc_flat),
            dwfpc = dwfpc,
            fpc = fpc,
            dwipc = dwipc,
            ipc = ipc
        ) %>%
        dplyr::mutate(sc2 = sc^2)

    # Fit linear model
    linmod.flat <-lm(dwfpc ~ dwipc + group + sc, data=df)
    
    # Fit linear-quadratic model
    linmod.quad <-lm(dwfpc ~ dwipc + group + sc + sc2, data=df)
    
    # Print quadratic model summary
    print(summary(linmod.quad), digits=6)
    
    # Print quadratic model summary of unweighted scores for comparison
    print(summary(lm(fpc ~ ipc + group + sc + sc2, data=df)), digits=6)
    
    # Compaire AIC
    delta_aic <- AIC(linmod.flat) - AIC(linmod.quad)
    cat('Delta AIC =', delta_aic)
    
}

f()

### Model of SC as a function of instruction and NAM

In [None]:
f <- function() {
    # Prepare data
    df <- read.csv('data/learning_data.csv') %>%
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam = as.factor(nam),
            sid = as.factor(sid)
        )

    # Fit linear model
    linmod <-lm(sc_flat ~ group * nam, data=df)
    print(summary(linmod))
    
    # Compare SC between groups across different levels of NAM
    emmeans(linmod, pairwise ~ group | nam, adjust='Tukey')

}

f()

# Section 3

## Model comparisons

In [None]:
f <- function() {
    # Compute baseline (random-choice) model likelihood and AIC
    log_lik <- rep(.25, times=250) %>% log() %>% sum()
    n_params <- 0
    baseline_AIC <- -2*log_lik + 2*n_params
    options(digits=6)
    cat('Baseline AIC =', baseline_AIC)

    # Load data
    df.long <- tibble::as_tibble(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::filter(!stringr::str_detect(vars, 'relt')) %>%
        dplyr::filter(!stringr::str_detect(vars, 'abst')) %>%
        dplyr::select(sid,vars,aic,group) %>%
        dplyr::mutate(
            vars=dplyr::recode(vars, 'rpc,rlp'='biv'),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG')
        )

    # Get AIC means and SD per model form
    message('Average AIC scores per model form')
    df.long %>% 
        dplyr::select(vars, group, aic) %>%
#         dplyr::group_by(vars, group) %>% # group two way (by form and group)
        dplyr::group_by(vars) %>% # group one way (by form)
        dplyr::summarize(mean=mean(aic), std=sd(aic)) %>% 
        ungroup() %>% print
    message('ANOVA of AIC ~ GROUP x VARS')
    aov(lm(aic ~ group*vars, data=df.long)) %>% summary() %>% print(digits=6)

    # Contrast bivariate models' AIC to the best univariate
    df.wide <- tidyr::pivot_wider(df.long, id_cols=c(sid, group), names_from=vars, values_from=aic) %>% 
        dplyr::mutate(min_uni_aic=pmin(rpc, rlp)) %>%
        dplyr::mutate(
            biv_best = biv < min_uni_aic,
            biv_adv = min_uni_aic - biv,
        ) %>%
        dplyr::mutate(two_points = biv_adv >= 2)

#     # Print summaries
    message('Useful stats')
    df.wide %>% 
        dplyr::group_by(group) %>%
        dplyr::summarize(
            biv_best = mean(biv_best), 
            biv_two_points = mean(two_points),
            mean_diff = mean(biv_adv),
            std_diff = sd(biv_adv)
        ) %>% print()
    
    # Compare bivariate to the best univariate model (Wilcoxon signed-rank test)
    # The test is appropriate when the difference between repeated measures is asymmetrical and non-normal
    message('[EG] Wilcoxon sign test (for testing hypothesis that difference in medians is 0)')
    test_results <- dplyr::filter(df.wide, group=='EG') %>%
        dplyr::select(sid, min_uni_aic, biv) %>%
        tidyr::pivot_longer(cols=c('min_uni_aic', 'biv'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_uni_aic'='1', 'biv'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)

    message('[IG] Wilcoxon sign test (for testing hypothesis that difference in medians is 0)')
    test_results <- dplyr::filter(df.wide, group=='IG') %>%
        dplyr::select(sid, min_uni_aic, biv) %>%
        tidyr::pivot_longer(cols=c('min_uni_aic', 'biv'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_uni_aic'='1', 'biv'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)

#     # Compare bivariate to the best univariate model (Mann-Whitney U)
#     # This test is less appropriate, because we have paired AIC scores (repeated measures)
#     test_results <- with(df.wide, wilcox.test(min_uni_aic, biv))
}

f()

## Analyses of fitted coefficients

### Descriptive stats

In [None]:
f <- function() {
    # Prepare data
    df <- tibble::as_tibble(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        )

    # Print coefficient stats
    message('Descriptive stats')
    df.stats <- dplyr::group_by(df, group) %>%
        dplyr::summarize(
            dplyr::across(c(nrpc,nrlp), list(mean=mean, std=sd), .names='{.col}.{.fn}')
        )
    print(df.stats, width=Inf)

    # Examine correlations
    message('Correlations')
    for (g in c('IG','EG')) {
        cat('Correlation analysis in', g, '\n')
        print(
            cor.test(data=dplyr::filter(df, group==g), ~ nrpc + nrlp, method = "pearson")
        )
    }

    # Compare coefficients between groups with ANOVA
    message('ANOVA of PC between groups')
    lm(nrpc ~ group, data=df) %>% aov() %>% summary() %>% print(digits=6)
    message('ANOVA of LP between groups')
    lm(nrlp ~ group, data=df) %>% aov() %>% summary() %>% print(digits=6)
}

f()

### Coefficients and learning

In [None]:
f <- function() {
    # Prepare data
    df <- tibble::as_tibble(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        )

    df <- tibble::as_tibble(read.csv('data/learning_data.csv')) %>%
        dplyr::select(sid,progressive,dwfpc,dwipc,fpc,ipc,sc_flat) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::inner_join(df, by='sid') %>%
        dplyr::mutate(
            z_dwipc = (dwipc - mean(dwipc)) / sd(dwipc),
            z_nrpc = (nrpc - mean(nrpc)) / sd(nrpc),
            z_nrlp = (nrlp - mean(nrlp)) / sd(nrlp),
            abs_nrlp = abs(nrlp)
        ) %>% 
        dplyr::mutate(z_nrlp2=z_nrlp^2)

    # Compare coefficients between groups with ANOVA
    message('Linear model of learning as a function of coefs')
    lm(dwfpc ~ dwipc + nrpc + nrlp, data=df) %>% summary() %>% print(digits=6)
#     lm(nrpc ~ abs_nrlp, data=df) %>% summary() %>% print(digits=6)
}

f()

### LP- vs PC-dominant (task selection)

In [None]:
f <- function() {
    # Prepare coefficients data
    df <- tibble::as_tibble(read.csv('data/model_results/param_fits_clean_.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
#             nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        ) %>%
        dplyr::mutate(
                nrpc_bin = cut(nrpc, breaks=3, labels=c(1,2,3)),
                nrlp_bin = cut(nrlp, breaks=3, labels=c(1,2,3)),
        ) %>%
        dplyr::mutate(
                nrpc_dom = (nrpc_bin==1) * (nrlp_bin==2),
                nrlp_dom = (nrpc_bin==2) * (nrlp_bin==3),
            )
    
    df %>% 
        dplyr::group_by(nrlp_dom) %>% 
        dplyr::summarize(
            nam32_prop = sum(nam >= 2) / n(),
#             nam2_prop = sum(nam == 2) / n(),
            nam3_prop = sum(nam == 3) / n()
        ) %>% print()

    df2 <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial <= 60+250) %>%
        dplyr::select(sid, trial, activity) %>% 
        dplyr::group_by(sid, activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt = count-15,
            sid = as.factor(sid)
        ) %>%
        dplyr::select(sid, activity, relt) %>%
        tidyr::spread(activity, relt)

    # Join datasets
    df <- dplyr::inner_join(df, df2, by='sid') %>%
        dplyr::select(sid, group, nrpc_dom, nrlp_dom, A3, A4) %>%
        dplyr::filter(nrpc_dom | nrlp_dom)
    rm(df2)
    print(head(df))
    
    # Print group sizes
    dplyr::group_by(df, group) %>% dplyr::summarize(n1=sum(nrpc_dom), n2=sum(nrlp_dom)) %>% print()
    
    # Pivot-longer for modeling
    df <- dplyr::select(df, sid, group, nrlp_dom, A3, A4) %>%
        tidyr::pivot_longer(cols=c(A3,A4), names_to='activity', values_to='ntrials') %>%
        dplyr::mutate(drive = dplyr::recode(as.factor(nrlp_dom), '0'='PC', '1'='LP')) %>%
        within(drive <- relevel(drive, ref = 'PC'))
    print(head(df))
    
    # Fit linear model
    message('[IG] Linear model of time allocation as a function of Activity (A3, A4) and Drive (PC, LP)')
    linmod <- lm(ntrials ~ activity*drive, data=dplyr::filter(df, group=='IG')) %>% summary() %>% print(digits=6)
    
    message('[EG] Linear model of time allocation as a function of Activity (A3, A4) and Drive (PC, LP)')
    linmod <- lm(ntrials ~ activity*drive, data=dplyr::filter(df, group=='EG')) %>% summary() %>% print(digits=6)
}

f()

# Supplementary analyses

## Demographics

In [None]:
f <- function() {
    dft <- rbind(
        tibble::as_tibble(read.csv('data/raw/ig_extra.csv')), 
        tibble::as_tibble(read.csv('data/raw/eg_extra.csv'))
    ) %>%
      dplyr::select(c(age, gender, race))

    median(dft$age, na.rm = TRUE) %>% print()
    mean(dft$age, na.rm= TRUE) %>% print()
    range(dft$age, na.rm = TRUE) %>% print()

    group_by(dft, race) %>% count() %>% print()
    group_by(dft, age) %>% count() %>% print()

}

f()

## Comparison of performance between excluded and retained participants

In [None]:
f <- function() {
    # Load nam dataset
    df <- tbl_df(read.csv('data/unclean_data.csv')) %>%
    dplyr::mutate(
        correct = as.numeric(correct=='True'),
        sid = as.factor(sid), 
        group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG')
    ) %>%
#     dplyr::filter(rt<1000*60) %>% # Apply this filtering for response time tests only
    dplyr::select(sid, activity, correct, rt, alloc_bias, resp_bias) %>%
    dplyr::group_by(sid) %>%
    dplyr::summarize(
        pc = mean(correct),
        wpc = mean(correct[activity=='A1'])/6 + mean(correct[activity=='A2'])/3 + mean(correct[activity=='A3'])/2,
        rt = mean(rt), 
        ab=alloc_bias[1], 
        rb=resp_bias[1]) %>% 
    dplyr::ungroup() %>%
    dplyr::mutate(
        high_rb = rb > mean(rb) + 2*sd(rb)) %>%
    dplyr::mutate(outlier = as.factor(high_rb))
    
    # Summarize outliers by response-bias vs rest
    dplyr::group_by(df, high_rb) %>% 
    dplyr::summarize(
        wpc.mean = mean(wpc), 
        wpc.sd = sd(wpc),
        rt.mean = mean(rt),
        rt.std = sd(rt)
    ) %>%
    print()
    
    # Compare RT
    message('Compare RTs')
    t.test(
        dplyr::filter(df, high_rb==TRUE)$rt,
        dplyr::filter(df, high_rb==FALSE)$rt,
        var.equal = FALSE) %>% print()
    
    # Compare PC
    message('Compare PCs')
    t.test(
        dplyr::filter(df, high_rb==TRUE)$wpc,
        dplyr::filter(df, high_rb==FALSE)$wpc,
        var.equal = FALSE) %>% print()
}

f()

## Subjective ratings analyses

### Subjective and objective progress

In [None]:
f <- function() {
    # Load nam dataset
    nam_df <- tibble::as_tibble(read.csv('data/nam_data.csv')) %>% 
        dplyr::select(nam,sid) %>% 
        dplyr::distinct() %>% 
        dplyr::mutate(
            sid=as.factor(sid), 
            nam=as.factor(nam))

    # Load self-reports dataset and merge with NAM data
    df <- tibble::as_tibble(read.csv('data/combined_extra.csv')) %>% 
        dplyr::mutate(
            sid = as.factor(sid), 
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG')) %>% 
        dplyr::inner_join(nam_df, by='sid')

    # Show group stats
    dplyr::filter(df, item=='prog') %>%
        dplyr::group_by(group, activity, item) %>% 
        dplyr::summarize(
            mean = mean(rating),
            std = sd(rating)
        ) %>%
    print(digits=5)
    
    # Select normalized data for one questionnaire item (interest)
    df <- dplyr::filter(df, item=='prog', nam!=0) %>% 
        dplyr::rename(prog = rating_norm)

    # Load and join progress on activities data
    df2 <- tbl_df(read.csv('data/model_data.csv')) %>% 
        dplyr::select(sid, trial, pc1, pc2, pc3, pc4) %>%
        dplyr::filter(trial==250 | trial==1)
    
    first <- dplyr::filter(df2, trial==1) %>% 
        dplyr::select(sid,pc1,pc2,pc3,pc4) %>%
        tibble::column_to_rownames('sid')
    last <- dplyr::filter(df2, trial==250) %>% 
        dplyr::select(sid,pc1,pc2,pc3,pc4) %>%
        tibble::column_to_rownames('sid')
    df2 <- last - first 
    
    df2 <- tibble::rownames_to_column(df2, var = 'sid') %>% 
        tibble::as_tibble() %>%
        tidyr::pivot_longer(
            cols = c(pc1, pc2, pc3, pc4),
            names_to = c('.value', 'activity'),
            names_sep = '(?=\\d)'
        ) %>%
        dplyr::mutate(
            sid = as.factor(sid), 
            activity = dplyr::recode(as.factor(activity), '1'='A1', '2'='A2', '3'='A3', '4'='A4')
        ) %>%
        dplyr::select(sid,activity,pc)
    
    # Join datasets
    df <- dplyr::left_join(df, df2, by=c('sid'='sid', 'activity'='activity'))
    df <- within(df, nam <- relevel(nam, ref = '2'))
    
    # IG correlation
    message('Subjective and objective progress made in IG')
    df.IG <- dplyr::filter(df, group=='IG')
    cor.test(df.IG$prog, df.IG$pc) %>% print(digits=6)
    
    # EG correlation
    message('Subjective and objective progress made in EG')
    df.EG <- dplyr::filter(df, group=='EG')
    cor.test(df.EG$prog, df.EG$pc) %>% print(digits=6)
}

f()

### Subjective interest and objective time spent

In [None]:
f <- function() {
    # Load nam dataset
    nam_df <- tibble::as_tibble(read.csv('data/nam_data.csv')) %>% 
        dplyr::select(nam,sid) %>% 
        dplyr::distinct() %>% 
        dplyr::mutate(
            sid=as.factor(sid), 
            nam=as.factor(nam))

    # Load self-reports dataset and merge with NAM data
    df <- tibble::as_tibble(read.csv('data/combined_extra.csv')) %>% 
        dplyr::mutate(
            sid = as.factor(sid), 
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG')) %>% 
        dplyr::inner_join(nam_df, by='sid')

    # Show group stats
    dplyr::filter(df, item=='int') %>%
        dplyr::group_by(group, activity, item) %>% 
        dplyr::summarize(
            mean = mean(rating),
            std = sd(rating)
        ) %>%
    print(digits=5)
    
    # Select normalized data for one questionnaire item (interest)
    df <- dplyr::filter(df, item=='int', nam!=0) %>% 
        dplyr::rename(int = rating_norm)

    # Load and join progress on activities data
    df2 <- tbl_df(read.csv('data/model_data.csv')) %>% 
        dplyr::select(sid, trial, relt1, relt2, relt3, relt4) %>%
        dplyr::filter(trial==250) %>%
        tidyr::pivot_longer(
            cols = c(relt1, relt2, relt3, relt4),
            names_to = c('.value', 'activity'),
            names_sep = '(?=\\d)'
        ) %>%
        dplyr::mutate(
            sid = as.factor(sid), 
            activity = dplyr::recode(as.factor(activity), '1'='A1', '2'='A2', '3'='A3', '4'='A4')
        ) %>%
        dplyr::select(sid,activity,relt)
    
    # Join datasets
    df <- dplyr::left_join(df, df2, by=c('sid'='sid', 'activity'='activity'))
    df <- within(df, nam <- relevel(nam, ref = '2'))
    
    # IG correlation
    message('Subjective and objective progress made in IG')
    df.IG <- dplyr::filter(df, group=='IG')
    cor.test(df.IG$int, df.IG$relt) %>% print(digits=6)
    
    # EG correlation
    message('Subjective and objective progress made in EG')
    df.EG <- dplyr::filter(df, group=='EG')
    cor.test(df.EG$int, df.EG$relt) %>% print(digits=6)
}

f()

### Interest ratings

In [None]:
f <- function() {
    # Load nam dataset
    nam_df <- tbl_df(read.csv('data/nam_data.csv')) %>% 
        dplyr::select(nam,sid) %>% 
        dplyr::distinct() %>% 
        dplyr::mutate(
            sid=as.factor(sid), 
            nam=as.factor(nam))

    # Load self-reports dataset and merge with NAM data
    df <- tbl_df(read.csv('data/combined_extra.csv')) %>% 
        dplyr::mutate(
            sid = as.factor(sid), 
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG')) %>% 
        dplyr::inner_join(nam_df, by='sid')

    # Show group stats
    dplyr::filter(df, item=='int') %>%
        dplyr::group_by(group, activity, item) %>% 
        dplyr::summarize(
            mean = mean(rating),
            std = sd(rating)
        ) %>%
    print(digits=5)
    
    # Select normalized data for one questionnaire item (interest)
    df <- dplyr::filter(df, item=='int', nam!=0) %>% 
        dplyr::rename(int = rating_norm)

    # Load and join relative time on activities data
    df2 <- tbl_df(read.csv('data/model_data.csv')) %>% 
        dplyr::select(sid, trial, relt1, relt2, relt3, relt4, pc1, pc2, pc3, pc4) %>%
        dplyr::filter(trial==250) %>%
        tidyr::pivot_longer(
            cols = c(relt1, relt2, relt3, relt4, pc1, pc2, pc3, pc4),
            names_to = c('.value', 'activity'),
            names_sep = '(?=\\d)'
        ) %>%
        dplyr::mutate(
            sid = as.factor(sid), 
            activity = dplyr::recode(as.factor(activity), '1'='A1', '2'='A2', '3'='A3', '4'='A4')
        ) %>%
        dplyr::select(sid,activity,relt,pc)
    
    # Join datasets
    df <- dplyr::left_join(df, df2, by=c('sid'='sid', 'activity'='activity'))
    df <- within(df, nam <- relevel(nam, ref = '2'))
    
    # Fit linear model
    lm(int ~ (relt + pc) * group, data=df) %>% summary() %>% print(digits=6)
}

f()

## Mastery criteria 

In [None]:
f <- function() {
    # Load and clean up data
    dft <- tibble::as_tibble(read.csv('data/mcrits.csv')) %>%
        dplyr::mutate(
            sid = as.factor(sid),
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            activity = dplyr::recode(as.factor(activity), '1'='A1', '2'='A2', '3'='A3', '4'='A4'),
            crit_val = as.factor(crit_val),
            mastery = as.factor(mastery)
        ) %>%
    print()

    # Contingency tables
    xtabs(~mastery + group, data = dft) %>% print()
    xtabs(~mastery + crit_val, data = dft) %>% print()
    xtabs(~mastery + activity, data = dft) %>% print()

    # Logistic model for each task
    contrasts(dft$crit_val) <- contr.sdif(5)
    
    message('A1')
    logmod <- glm(mastery ~ group * crit_val, data=filter(dft, activity=='A1'), family="binomial")
    summary(logmod) %>% print(digits=6)
    
    message('A2')
    logmod <- glm(mastery ~ group * crit_val, data=filter(dft, activity=='A2'), family="binomial")
    summary(logmod) %>% print(digits=6)
    
    message('A3')
    logmod <- glm(mastery ~ group * crit_val, data=filter(dft, activity=='A3'), family="binomial")
    summary(logmod) %>% print(digits=6)
    
    message('A4')
    logmod <- glm(mastery ~ group * crit_val, data=filter(dft, activity=='A4'), family="binomial")
    summary(logmod) %>% print(digits=6)
}

f()

## Understatnding SC

### SC ~ preference + preference^2

In [None]:
stargazerize <- function(model) {
    model_vars <- formula(model) %>% all.vars() %>% prepend(values='Constant', before = 1)
    model_vars <- model_vars[c(1,3,4)] %>% unlist(use.names=FALSE)
    tbl <- stargazer(model,
        type = 'text', 
        omit.stat = 'all',
        report = ('vctp*'),
        order = model_vars,
        single.row=TRUE) %>% 
    toString()
#     display(data=tbl)
}


f <- function() {
    # Get learning data
    df1 <- tibble::as_tibble(read.csv('data/learning_data.csv')) %>%
        # we exclude people who did not master anything
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, dwfpc, dwipc, fpc, ipc, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam = as.factor(nam),
            sid = as.factor(sid),
            z_sc = (sc_flat-mean(sc_flat))/sd(sc_flat)
        ) %>%
        dplyr::mutate(z_sc2 = z_sc^2)
    
    # Get time allocation data
    df2 <- tibble::as_tibble(read.csv('data/clean_data.csv')) %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        ) %>% 
        dplyr::select(sid, activity, relt) %>%
        tidyr::pivot_wider(names_from = activity, values_from = relt)
    
    # Join datasets
    df <- dplyr::left_join(df1, df2, by=c('sid'='sid')) %>% 
    # and compute pairwise preferences
        dplyr::mutate(
            A2A1 = A2 - A1,
            A3A1 = A3 - A1,
            A3A2 = A3 - A2,
            A4A1 = A4 - A1,
            A4A2 = A4 - A2,
            A4A3 = A4 - A3
        ) %>%
        dplyr::mutate(
            z_A2A1 = (A2A1 - mean(A2A1))/sd(A2A1),
            z_A3A1 = (A3A1 - mean(A3A1))/sd(A3A1),
            z_A3A2 = (A3A2 - mean(A3A2))/sd(A3A2),
            z_A4A1 = (A4A1 - mean(A4A1))/sd(A4A1),
            z_A4A2 = (A4A2 - mean(A4A2))/sd(A4A2),
            z_A4A3 = (A4A3 - mean(A4A3))/sd(A4A3)
        ) %>%
        dplyr::mutate(
            z_A2A1_2 = z_A2A1^2,
            z_A3A1_2 = z_A3A1^2,
            z_A3A2_2 = z_A3A2^2,
            z_A4A1_2 = z_A4A1^2,
            z_A4A2_2 = z_A4A2^2,
            z_A4A3_2 = z_A4A3^2
        ) %>%
        print()

    # A2 preference
    message('Quadratic model of SC as a function of A2 vs A1 preference')
    linmod <-lm(sc_flat ~ z_A2A1 + z_A2A1_2, data=df) %>% stargazerize()
    
    # A3 preference
    message('Quadratic model of SC as a function of A3 vs A1 preference')
    linmod <-lm(sc_flat ~ z_A3A1 + z_A3A1_2, data=df) %>% stargazerize()
    
    message('Quadratic model of SC as a function of A3 vs A2 preference')
    linmod <-lm(sc_flat ~ z_A3A2 + z_A3A2_2, data=df) %>% stargazerize()
    
    # A4 preference
    message('Quadratic model of SC as a function of A4 vs A1 preference')
    linmod <-lm(sc_flat ~ z_A4A1 + z_A4A1_2, data=df) %>% stargazerize()
    
    message('Quadratic model of SC as a function of A4 vs A2 preference')
    linmod <-lm(sc_flat ~ z_A4A2 + z_A4A2_2, data=df) %>% stargazerize()
    
    message('Quadratic model of SC as a function of A4 vs A3 preference')
    linmod <-lm(sc_flat ~ z_A4A3 + z_A4A3_2, data=df) %>% stargazerize()
    
}

f()

### dwfPC ~ preference + preference^2

In [None]:
f <- function() {
    # Get learning data
    df1 <- tibble::as_tibble(read.csv('data/learning_data.csv')) %>%
        # we exclude people who did not master anything
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, nam, dwfpc, dwipc, fpc, ipc, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid),
        ) %>%
        dplyr::select(sid, group, dwfpc, fpc, dwipc, ipc)
    
    # Get time allocation data
    df2 <- tibble::as_tibble(read.csv('data/clean_data.csv')) %>%
        dplyr::filter(trial<=60+250) %>%
        dplyr::select(sid,group,trial,activity) %>% 
        dplyr::group_by(group,sid,activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt=(count-15)/250,
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid)
        ) %>% 
        dplyr::select(sid, activity, relt) %>%
        tidyr::pivot_wider(names_from = activity, values_from = relt)
    
    # Join datasets
    df <- dplyr::left_join(df1, df2, by=c('sid'='sid')) 
    
    # Standardize time allocation scores and square
    df <-  dplyr::mutate(df, z_A3 = (A3 - mean(A3))/sd(A3), z_A4 = (A4 - mean(A4))/sd(A4)) %>% 
        dplyr::mutate(
            z_A3_2 = z_A3^2,
            z_A4_2 = z_A4^2
        ) %>%
    print()

    # A3 preference
    message('Quadratic model of dwfPC as a function of A3 preference')
    linmod.nonq <-lm(dwfpc ~ dwipc + z_A3, data=df)
    linmod.quad <-lm(dwfpc ~ dwipc + z_A3 + z_A3_2, data=df) 
    summary(linmod.quad) %>% print(digits=5)
    print(AIC(linmod.nonq) - AIC(linmod.quad))
    
    # A4 preference
    message('Quadratic model of dwfPC as a function of A4 preference')
    linmod.nonq <-lm(dwfpc ~ dwipc + z_A4, data=df)
    linmod.quad <-lm(dwfpc ~ dwipc + z_A4 + z_A4_2, data=df) 
    summary(linmod.quad) %>% print(digits=5)
    print(AIC(linmod.nonq) - AIC(linmod.quad))
    
    
}

f()

### SC and computational-model coefficients

In [None]:
f <- function() {
    # Get learning data
    df1 <- tibble::as_tibble(read.csv('data/learning_data.csv')) %>%
        # we exclude people who did not master anything
        dplyr::filter(nam > 0) %>%
        dplyr::select(sid, group, sc_flat) %>%
        dplyr::mutate(
            group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            sid = as.factor(sid),
        )
    
    # Get coefficients data
    df2 <- tibble::as_tibble(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        ) %>%
        dplyr::select(sid, nrpc, nrlp)
    
    df <- dplyr::left_join(df1, df2, on='sid') %>% print()
    
    message('Linear model of SC as a function of coefficients')
    linmod <- lm(sc_flat ~ nrpc * nrlp, data=df) %>% summary() %>% print(digits=6)

}
    
f()

## Supplementary model comparisons

In [None]:
f <- function() {
    # Compute baseline (random-choice) model likelihood and AIC
    log_lik <- rep(.25, times=250) %>% log() %>% sum()
    n_params <- 0
    baseline_AIC <- -2*log_lik + 2*n_params
    options(digits=6)
    cat('Baseline AIC =', baseline_AIC)

    # Load data
    df.long <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::select(sid,vars,aic) %>%
        dplyr::mutate(vars=dplyr::recode(vars,
            'rpc,rlp'='rpc_rlp',
            'rpc,abst'='rpc_abst',
            'rlp,abst'='rlp_abst',
            'rpc,rlp,abst'='triv',
        ))

    # Get AIC means and SD per model form
    message('Average AIC scores per model form')
    stats_summary <- dplyr::select(df.long, vars, aic) %>%
        dplyr::group_by(vars) %>%
        dplyr::summarize(mean=mean(aic), std=sd(aic)) %>% ungroup() %>%
        dplyr::arrange(mean)
    print(stats_summary)

    # Contrast trivaraite models' AIC to next best model
    df.wide <- tidyr::spread(df.long, vars, aic) %>%
        dplyr::mutate(min_nontriv_aic=pmin(rpc, rlp, abst, rpc_rlp, rpc_abst, rlp_abst)) %>%
        dplyr::mutate(min_uni_aic=pmin(rpc, rlp, abst)) %>%
        dplyr::mutate(
            triv_best = triv < min_nontriv_aic,
            triv_adv = min_nontriv_aic - triv,
            rpc_rlp_best = rpc_rlp < min_uni_aic,
            rpc_rlp_adv = min_uni_aic - rpc_rlp,
        ) %>%
        dplyr::mutate(
            # model containing rPC and rLP (either biv or triv) is best
            rpc_rlp_plus = rpc_rlp_best | triv_best | (rpc_rlp < triv), 
        ) %>%
        dplyr::mutate(
            two_points = triv_adv >= 2,
            two_points_ = rpc_rlp_adv >= 2
        )

    # Print summaries
    message('Comparisons stats')
    df.wide %>%
        dplyr::summarize(
            triv_best = mean(triv_best), 
            triv_two_points = mean(two_points),
            mean_diff = mean(triv_adv),
            std_diff = sd(triv_adv),
            rpc_rlp_best = mean(rpc_rlp_best), 
            two_points_2 = mean(two_points_),
            mean_diff_2 = mean(rpc_rlp_adv),
            std_diff_2 = sd(rpc_rlp_adv),
            rpc_rlp_plus_best = mean(rpc_rlp_plus)
        ) %>%
        print(width=Inf)

    # Compare bivariate to the best univariate model (Wilcoxon signed-rank test)
    # The test is appropriate when the difference between repeated measures is asymmetrical and non-normal
    message('Wilcoxon sign test (for testing hypothesis that difference in medians is 0)')
    test_results <- dplyr::select(df.wide, sid, min_nontriv_aic, triv) %>%
        tidyr::pivot_longer(cols=c('min_nontriv_aic', 'triv'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_nontriv_aic'='1', 'triv'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)

    message('Same test comparing PC+LP with other univariate models')
    test_results <- dplyr::select(df.wide, sid, min_uni_aic, rpc_rlp) %>%
        tidyr::pivot_longer(cols=c('min_uni_aic', 'rpc_rlp'), names_to='nvars', values_to='aic') %>%
        dplyr::mutate(nvars=dplyr::recode(as.factor(nvars), 'min_uni_aic'='1', 'rpc_rlp'='2')) %>%
        rstatix::sign_test(aic~nvars) %>% add_significance() %>%
        print(digits=6)
}

f()

## Relationship between coefficients and behavioral activity preferences

In [None]:
f <- function() {
    # Prepare coefficients data
    df <- tbl_df(read.csv('data/model_results/param_fits_clean_.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        )

    df2 <- read.csv('data/clean_data.csv') %>%
        dplyr::filter(trial <= 60+250) %>%
        dplyr::select(sid, trial, activity) %>% 
        dplyr::group_by(sid, activity) %>% 
        dplyr::summarize(count=n()) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
            relt = count-15,
            sid = as.factor(sid)
        ) %>%
        dplyr::select(sid, activity, relt) %>%
        tidyr::spread(activity, relt) %>%
        dplyr::mutate(
            A4_A123 = A4-(A1+A2+A3)/2, 
            A3_A12 = A3-(A1+A2)/2)

    # Join datasets
    df <- dplyr::inner_join(df, df2, by='sid')# %>%
#         dplyr::filter(nrpc <= 0)
    rm(df2)

    # Fit linear models
    message('pref ~ wPC * Group')
    lm(A3_A12 ~ group * nrpc, data=df) %>% summary() %>% print(digits=6)
    lm(A4_A123 ~ group * nrpc, data=df) %>% summary() %>% print(digits=6)
    
    message('pref ~ LP * Group')
    lm(A3_A12 ~ group * nrlp, data=df) %>% summary() %>% print(digits=6)
    lm(A4_A123 ~ group * nrlp, data=df) %>% summary() %>% print(digits=6)
    
}

f()

## Comparison of performance between motivated and unmotivated learners

In [None]:
f <- function() {
    # Prepare coefficients data
    df <- tbl_df(read.csv('data/model_results/param_fits_clean.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        dplyr::mutate(
            sid=as.factor(sid),
            group=dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
            nam=as.factor(nam)
        ) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        ) %>%
        dplyr::mutate(
                nrpc_bin = cut(nrpc, breaks=3, labels=c(1,2,3)),
                nrlp_bin = cut(nrlp, breaks=3, labels=c(1,2,3)),
        ) %>%
        dplyr::mutate(
                motivated = (nrpc_bin==2) & (nrlp_bin==3),
                unmotivated = (nrpc_bin==2) & (nrlp_bin==1),
            )
#         dplyr::mutate(
#                 motivated = ((nrpc_bin==1) & (nrlp_bin==2)) | ((nrpc_bin==2) & (nrlp_bin==3)),
#                 unmotivated = ((nrpc_bin==3) & (nrlp_bin==2)) | ((nrpc_bin==2) & (nrlp_bin==1)),
#             )

    # Load learning data
    df2 <- tbl_df(read.csv('data/learning_data.csv')) %>%
        dplyr::mutate(sid=as.factor(sid)) %>%
        dplyr::select(sid, ipc, fpc, dwipc, dwfpc)
    
    # Join datasets
    df <- dplyr::inner_join(df, df2, by='sid') %>%
        dplyr::select(sid, group, motivated, unmotivated, ipc, fpc, dwipc, dwfpc) %>%
        dplyr::filter(motivated | unmotivated) %>%
        tidyr::pivot_longer(cols=c(motivated, unmotivated), names_to='motiv', values_to='keep') %>%
        dplyr::mutate(motiv=as.factor(motiv)) %>%
        dplyr::filter(keep) %>%
        dplyr::select(-keep) %>%
        tidyr::pivot_longer(cols=c(ipc, fpc, dwipc, dwfpc), names_to='test_time', values_to='score') %>%
        dplyr::mutate(test_time=dplyr::recode(as.factor(test_time), ipc='pre_FALSE', fpc='post_FALSE', dwipc='pre_TRUE', dwfpc='post_TRUE')) %>%
        tidyr::separate(test_time, into=c('test_time', 'weighted'), sep='_', convert=TRUE) %>%
        dplyr::mutate(test_time=as.factor(test_time)) %>%
        print()
    rm(df2)

    # Print group sizes
    dplyr::group_by(df, motiv) %>% dplyr::summarize(n=n()/4) %>% print()
    
    # Stats
    df <- dplyr::filter(df, weighted)
    df <- within(df, motiv <- relevel(motiv, ref = 'unmotivated'))
    df <- within(df, test_time <- relevel(test_time, ref = 'pre'))
    # Fit linear model
    message('Linear model of learning score as a function test time and motivation')
    linmod <- lm(score ~ motiv*test_time, data=df)
    # Report ANOVA
    aov(linmod) %>% summary() %>% print(digits=6)
    # Report contrasts
    posthoc <- emmeans(linmod, pairwise ~ test_time | motiv, adjust = 'Tukey') %>% print(digits=6)
    posthoc <- emmeans(linmod, pairwise ~ motiv | test_time, adjust = 'Tukey') %>% print(digits=6)
}

f()

## LP-driven vs PC-driven (ratings)

In [None]:
f <- function() {
    # Prepare coefficients data
    df <- tbl_df(read.csv('data/model_results/param_fits_clean_.csv')) %>%
        dplyr::filter(vars=='rpc,rlp') %>%
        dplyr::select(sid,group,nam,rpc,rlp,tau) %>%
        # normalize by Euclidean norm
        dplyr::mutate(norm=sqrt(rpc^2+rlp^2)) %>%
        dplyr::mutate(
            nrpc = rpc / norm,
            nrlp = rlp / norm
        ) %>%
        dplyr::mutate(
                nrpc_bin = cut(nrpc, breaks=3, labels=c(1,2,3)),
                nrlp_bin = cut(nrlp, breaks=3, labels=c(1,2,3)),
        ) %>% 
        dplyr::mutate(
                nrpc_dom = (nrpc_bin==1) * (nrlp_bin==2),
                nrlp_dom = (nrpc_bin==2) * (nrlp_bin==3),
            ) %>% 
        dplyr::filter(nrpc_dom == 1 | nrlp_dom == 1) %>%
        dplyr::select(sid, nrlp_dom) %>%
        dplyr::inner_join(tbl_df(read.csv('data/combined_extra.csv')), by='sid') %>%
        dplyr::filter(activity == 'A3' | activity == 'A4') %>%
        dplyr::filter(item == 'int' | item == 'lrn2') %>%
        dplyr::mutate(
                sid = as.factor(sid),
                group = dplyr::recode(as.factor(group), '0'='IG', '1'='EG'),
                drive = dplyr::recode(as.factor(nrlp_dom), '0'='PC', '1'='LP')
            ) %>% 
        print()
    
    # Fit linear model
    message('Linear model of INT as a function of Activity (A3, A4) and Drive (PC, LP)')
    linmod <- lm(rating_norm ~ activity*drive, data=dplyr::filter(df, item=='int')) %>% summary() %>% print(digits=6)
    
    message('Linear model of LRN2 as a function of Activity (A3, A4) and Drive (PC, LP)')
    linmod <- lm(rating_norm ~ activity*drive, data=dplyr::filter(df, item=='lrn2')) %>% summary() %>% print(digits=6)

}

f()