# Group-level comparison across confidence SNR

This notebook simulates 20 participants (100 trials each) across 5 levels of type-2 noise
(lower noise = higher confidence SNR) and compares four group-level approaches:

- MLE per-subject mean
- MLE pooled counts (per condition)
- Pooled Bayesian (per condition)
- Hierarchical Bayesian (rm1way, condition offsets)


In [1]:
import os
os.environ.setdefault("PYTENSOR_FLAGS", "compiledir=/tmp/pytensor")
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")

import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt

from metadpy.utils import type2_SDT_simuation, ratings2df
from metadpy.mle import metad
from metadpy.bayesian import hmetad, hmetad_pooled


In [2]:
np.random.seed(123)

n_subjects = 20
n_trials = 100
n_ratings = 4
d = 1.0
c = 0.0

noise_levels = np.array([1.0, 0.8, 0.6, 0.4, 0.2])
snr_levels = 1.0 / noise_levels

num_samples = 400
num_chains = 2
tune = 400
target_accept = 0.9
random_seed = 123


In [3]:
n_levels = len(noise_levels)
nR_S1_all = np.zeros((n_subjects, n_levels, 2 * n_ratings))
nR_S2_all = np.zeros_like(nR_S1_all)
frames = []

for subj in range(n_subjects):
    for idx, noise in enumerate(noise_levels):
        nR_S1, nR_S2 = type2_SDT_simuation(
            d=d,
            noise=float(noise),
            c=c,
            nRatings=n_ratings,
            nTrials=n_trials,
        )
        nR_S1 = nR_S1.astype(int)
        nR_S2 = nR_S2.astype(int)
        nR_S1_all[subj, idx, :] = nR_S1
        nR_S2_all[subj, idx, :] = nR_S2
        df = ratings2df(nR_S1, nR_S2)
        df["Subject"] = subj
        df["Condition"] = snr_levels[idx]
        frames.append(df)

sim_df = pd.concat(frames, ignore_index=True)
sim_df.head()


In [4]:
mle_df = metad(
    data=sim_df,
    nRatings=n_ratings,
    stimuli="Stimuli",
    accuracy="Accuracy",
    confidence="Confidence",
    subject="Subject",
    within="Condition",
    padding=False,
    verbose=0,
)

mle_group = (
    mle_df.groupby("Condition")["m_ratio"]
    .agg(mean="mean", sem=lambda x: x.std(ddof=1) / np.sqrt(x.count()))
    .reset_index()
)
mle_group["lower"] = mle_group["mean"] - 1.96 * mle_group["sem"]
mle_group["upper"] = mle_group["mean"] + 1.96 * mle_group["sem"]
mle_group["method"] = "MLE (per-subject mean)"


  dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  self.f = self.J.dot(self.x)
  results_df = pd.concat([results_df, results])
  dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  self.f = self.J.dot(self.x)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  c_ineq + s))
  return diag_elements*vec
  results_df = pd.concat([results_df, results])
  dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  self.f = self.J.dot(self.x)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  c_ineq + s))
  return diag_elements*vec
  results_df = pd.concat([results_df, results])
  dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  self.f = self.J.dot(self.x)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  c_ineq + s))
  return diag_elements*vec
  results_df = pd.concat([results_df, results])
  dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
  self.f = self.J.dot(self.x)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  c_ineq + s))
  return diag_elements*vec
  re

In [5]:
pooled_rows = []
for idx, snr in enumerate(snr_levels):
    nR_S1_pool = nR_S1_all[:, idx, :].sum(axis=0)
    nR_S2_pool = nR_S2_all[:, idx, :].sum(axis=0)
    res = metad(
        nR_S1=nR_S1_pool,
        nR_S2=nR_S2_pool,
        nRatings=n_ratings,
        padding=False,
        verbose=0,
    )
    pooled_rows.append(
        {
            "Condition": float(snr),
            "mean": float(res["m_ratio"].iloc[0]),
            "lower": float(res["m_ratio"].iloc[0]),
            "upper": float(res["m_ratio"].iloc[0]),
            "method": "MLE (pooled counts)",
        }
    )

pooled_mle = pd.DataFrame(pooled_rows)


In [6]:
def summarize_posterior(da, hdi_prob=0.94):
    if hasattr(da, "to_array"):
        da = da.to_array()
    mean = da.mean(dim=("chain", "draw")).values
    hdi = az.hdi(da, hdi_prob=hdi_prob)
    if hasattr(hdi, "to_array"):
        hdi = hdi.to_array()
    lower = hdi.sel(hdi="lower").values
    upper = hdi.sel(hdi="higher").values
    return np.squeeze(mean), np.squeeze(lower), np.squeeze(upper)

pooled_models = hmetad_pooled(
    data=sim_df,
    nRatings=n_ratings,
    stimuli="Stimuli",
    accuracy="Accuracy",
    confidence="Confidence",
    within="Condition",
    sample_model=True,
    num_samples=num_samples,
    num_chains=num_chains,
    tune=tune,
    target_accept=target_accept,
    random_seed=random_seed,
    progressbar=False,
    output="model",
)

pooled_bayes_rows = []
for cond, (model, trace) in pooled_models.items():
    mratio_post = trace.posterior["meta_d"] / trace.posterior["d1"]
    mean, lower, upper = summarize_posterior(mratio_post)
    pooled_bayes_rows.append(
        {
            "Condition": float(cond),
            "mean": float(mean),
            "lower": float(lower),
            "upper": float(upper),
            "method": "Bayes (pooled)",
        }
    )

pooled_bayes = pd.DataFrame(pooled_bayes_rows)


Initializing NUTS using jitter+adapt_diag...
ERROR (pytensor.graph.rewriting.basic): SequentialGraphRewriter apply <pytensor.tensor.rewriting.elemwise.FusionOptimizer object at 0x15f3ed910>
ERROR (pytensor.graph.rewriting.basic): Traceback:
ERROR (pytensor.graph.rewriting.basic): Traceback (most recent call last):
  File "/Users/yifei/anaconda3/envs/metadpy-dev/lib/python3.11/site-packages/pytensor/graph/rewriting/basic.py", line 289, in apply
    sub_prof = rewriter.apply(fgraph)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/yifei/anaconda3/envs/metadpy-dev/lib/python3.11/site-packages/pytensor/tensor/rewriting/elemwise.py", line 886, in apply
    scalar_inputs, scalar_outputs = self.elemwise_to_scalar(inputs, outputs)
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/yifei/anaconda3/envs/metadpy-dev/lib/python3.11/site-packages/pytensor/tensor/rewriting/elemwise.py", line 538, in elemwise_to_scalar
    scalar_inputs = [replacement[inp]

In [7]:
hb_model, hb_trace = hmetad(
    data=sim_df,
    nRatings=n_ratings,
    stimuli="Stimuli",
    accuracy="Accuracy",
    confidence="Confidence",
    subject="Subject",
    within="Condition",
    sample_model=True,
    num_samples=num_samples,
    num_chains=num_chains,
    tune=tune,
    target_accept=target_accept,
    random_seed=random_seed,
    progressbar=False,
)

group_meta = hb_trace.posterior["mu_meta_d"] + hb_trace.posterior["condition_offset"]
group_mratio = group_meta / hb_trace.posterior["mu_d1"]
hb_mean, hb_lower, hb_upper = summarize_posterior(group_mratio)

hb_rows = []
for cond, mean, lower, upper in zip(snr_levels, hb_mean, hb_lower, hb_upper):
    hb_rows.append(
        {
            "Condition": float(cond),
            "mean": float(mean),
            "lower": float(lower),
            "upper": float(upper),
            "method": "Bayes (hierarchical)",
        }
    )

hb_summary = pd.DataFrame(hb_rows)


                                                                                
                              Step      Grad      Sampli…                       
  Progre…   Draws   Diverg…   size      evals     Speed     Elapsed   Remaini…  
 ────────────────────────────────────────────────────────────────────────────── 
  ━━━━━━━   1400    0         0.046     511       8.54      0:02:43   0:00:00   
                                                  draws/s                       
  ━━━━━━━   1400    7         0.056     63        34.72     0:00:40   0:00:00   
                                                  draws/s                       
                                                                                


In [8]:
summary = pd.concat([mle_group, pooled_mle, pooled_bayes, hb_summary], ignore_index=True)
summary = summary.sort_values(["method", "Condition"])

fig, ax = plt.subplots(figsize=(8, 4))
methods = ["MLE (per-subject mean)", "MLE (pooled counts)", "Bayes (pooled)", "Bayes (hierarchical)"]
markers = {
    "MLE (per-subject mean)": "o",
    "MLE (pooled counts)": "s",
    "Bayes (pooled)": "D",
    "Bayes (hierarchical)": "^",
}

for method in methods:
    sub = summary[summary["method"] == method].sort_values("Condition")
    yerr = np.vstack([sub["mean"] - sub["lower"], sub["upper"] - sub["mean"]])
    ax.errorbar(
        sub["Condition"],
        sub["mean"],
        yerr=yerr,
        label=method,
        marker=markers[method],
        capsize=3,
    )

ax.set_xlabel("Confidence SNR (1 / noise)")
ax.set_ylabel("Group m_ratio")
ax.set_title("Group-level meta-d'/d' across type-2 noise levels")
ax.legend(frameon=False)
fig.savefig("notebooks/group_level_methods_comparison.png", dpi=150, bbox_inches="tight")
plt.show()




## Notes
- Lower noise (higher SNR) should yield higher estimated m_ratio across methods.
- The hierarchical model typically provides more stable group-level trends
  when per-subject estimates are noisy (low SNR / few trials).
- Sampling settings are intentionally light for speed; increase draws for final runs.
