Based on https://bambinos.github.io/bambi/notebooks/Strack_RRR_re_analysis.html

In [None]:
from glob import glob

import arviz as az
import bambi as bmb
import pandas as pd
from pathlib import Path
from data_utils import get_csv_list, download_csvs

In [None]:
az.style.use("arviz-darkgrid")

In [None]:
bambi_data_url = (
    "https://github.com/bambinos/bambi/tree/main/docs/notebooks/data/facial_feedback"
)
data_dir = Path("./data")
data_dir.mkdir(parents=True, exist_ok=True)

In [None]:
csv_list = get_csv_list(bambi_data_url)
csv_list

In [None]:
download_csvs(csv_list, data_dir)

In [None]:
DL_PATH = "data/facial_feedback/*csv"

dfs = []
columns = [
    "subject",
    "cond_id",
    "condition",
    "correct_c1",
    "correct_c2",
    "correct_c3",
    "correct_c4",
    "correct_total",
    "rating_t1",
    "rating_t2",
    "rating_c1",
    "rating_c2",
    "rating_c3",
    "rating_c4",
    "self_perf",
    "comprehension",
    "awareness",
    "transcript",
    "age",
    "gender",
    "student",
    "occupation",
]

count = 0
for idx, study in enumerate(glob(DL_PATH)):
    data = pd.read_csv(
        study, encoding="latin1", skiprows=2, header=None, index_col=False
    ).iloc[:, :22]
    data.columns = columns
    # Add study name
    data["study"] = idx
    # Some sites used the same subject id numbering schemes, so prepend with study to create unique ids.
    # Note that if we don't do this, Bambi would have no way of distinguishing two subjects who share
    # the same id, which would hose our results.
    data["uid"] = data["subject"].astype(float) + count
    dfs.append(data)


data = pd.concat(dfs, axis=0).apply(pd.to_numeric, errors="coerce", axis=1)

In [None]:
data

In [None]:
# Keep only subjects who (i) respond appropriately on all trials,
# (ii) understand the cartoons, and (iii) don't report any awareness
# of the hypothesis or underlying theory.
valid = data.query("correct_total==4 and comprehension==1 and awareness==0")
long = pd.melt(
    valid,
    ["uid", "condition", "gender", "age", "study", "self_perf"],
    ["rating_c1", "rating_c2", "rating_c3", "rating_c4"],
    var_name="stimulus",
)

In [None]:
long

In [None]:
# Initialize the model, passing in the dataset we want to use.
model = bmb.Model("value ~ condition + (1|uid)", long, dropna=True)

# Set a custom prior on group specific factor variances—just for illustration
group_specific_sd = bmb.Prior("HalfNormal", sigma=10)
group_specific_prior = bmb.Prior("Normal", mu=0, sigma=group_specific_sd)
model.set_priors(group_specific=group_specific_prior)

# Fit the model, drawing 1,000 MCMC draws per chain
results = model.fit(draws=1000)

In [None]:
model.plot_priors();

In [None]:
az.plot_trace(
    results,
    var_names=["Intercept", "condition", "sigma", "1|uid_sigma"],
    compact=False,
);

In [None]:
az.summary(results, var_names=["Intercept", "condition", "sigma", "1|uid_sigma"])

In [None]:
model = bmb.Model(
    "value ~ condition + age + gender + (1|uid) + (condition|study) + (condition|stimulus)",
    long,
    dropna=True,
)

group_specific_sd = bmb.Prior("HalfNormal", sigma=10)
group_specific_prior = bmb.Prior("Normal", mu=0, sigma=group_specific_sd)
model.set_priors(group_specific=group_specific_prior)

# Not we use 2000 samples for tuning and increase the taget_accept to 0.99.
# The default values result in divergences.
results = model.fit(draws=1000, tune=2000, target_accept=0.99)

In [None]:
az.plot_trace(
    results,
    var_names=[
        "Intercept",
        "age",
        "gender",
        "condition",
        "sigma",
        "1|study",
        "1|stimulus",
        "condition|study",
        "condition|stimulus",
        "1|study_sigma",
        "1|stimulus_sigma",
        "condition|study_sigma",
    ],
    compact=True,
);