In [11]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.special import expit  # sigmoid

import importlib
import utils.data_processing as data_processing
importlib.reload(data_processing)
from utils.data_processing import set_seed, save_output, get_df

In [17]:
df = get_df()
# df = df.groupby('subject').head(1).reset_index(drop=True) # first trial only

In [18]:
# --- Features ---
df["dR"] = df["m2"] - df["m1"]

def compute_dI(uc):
    if uc == 1: return -1   # A once, B thrice → B less informative
    if uc == 3: return 1    # A thrice, B once → B more informative
    return 0                # both twice
df["dI"] = df["uc"].apply(compute_dI)

df["dS"] = 0  # optional (no side info)
df["choice"] = df["c4"]      # first free choice
df["horizon"] = df["gameLength"]

In [19]:
def neg_log_likelihood(params, dR, dI, dS, choice):
    alpha, bias, log_sigma = params
    sigma = np.exp(log_sigma)
    qdiff = dR + alpha * dI + bias * dS
    p_chooseB = expit(qdiff / sigma)
    eps = 1e-9
    loglik = choice * np.log(p_chooseB + eps) + (1 - choice) * np.log(1 - p_chooseB + eps)
    return -np.sum(loglik)

results = {}
for horizon in [1, 6]:
    subset = df[df["horizon"] == horizon]
    x0 = [0, 0, 0]  # alpha, bias, log_sigma initial guess
    res = minimize(neg_log_likelihood, x0,
                   args=(subset["dR"], subset["dI"], subset["dS"], subset["choice"]))
    results[horizon] = res.x
    print(f"H{horizon} α={res.x[0]:.3f}, bias={res.x[1]:.3f}, σ={np.exp(res.x[2]):.3f}")



H1 α=-2.686, bias=0.000, σ=10.899
H6 α=2.760, bias=0.000, σ=15.667


In [20]:
def predict_acc(params, df):
    alpha, bias, log_sigma = params
    sigma = np.exp(log_sigma)
    qdiff = df["dR"] + alpha * df["dI"] + bias * df["dS"]
    preds = expit(qdiff / sigma) > 0.5
    return np.mean(preds == df["choice"])

for horizon in [1, 6]:
    subset = df[df["horizon"] == horizon]
    acc = predict_acc(results[horizon], subset)
    print(f"H{horizon} accuracy: {acc:.3f}")



H1 accuracy: 0.757
H6 accuracy: 0.699


In [21]:
def subject_accuracy(df, params):
    alpha, bias, log_sigma = params
    sigma = np.exp(log_sigma)
    qdiff = df["dR"] + alpha * df["dI"] + bias * df["dS"]
    preds = expit(qdiff / sigma) > 0.5
    return np.mean(preds == df["choice"])

subject_accs = {1: [], 6: []}

for horizon in [1, 6]:
    sub_df = df[df["horizon"] == horizon]
    for subj, group in sub_df.groupby("subject"):
        acc = subject_accuracy(group, results[horizon])
        subject_accs[horizon].append(acc)

# mean and sem (or sd)
for horizon in [1, 6]:
    arr = np.array(subject_accs[horizon])
    mean_acc = arr.mean()
    sem_acc = arr.std(ddof=1) / np.sqrt(len(arr))
    print(f"H{horizon}: mean={mean_acc:.3f}, sem={sem_acc:.3f}, range=({arr.min():.3f}–{arr.max():.3f})")

H1: mean=0.761, sem=0.003, range=(0.150–1.000)
H6: mean=0.704, sem=0.003, range=(0.156–1.000)
