In [1]:
import numpy as np
from scipy import stats

In [2]:
# Simulation settings

n_clusters = 30
n_obs = 60

In [3]:
# Specific priors that vary over models

# g-priors for theta
rscale = 0.5
prior_g_theta = 1/np.random.gamma(shape=.5, scale=(rscale**2)/2)
prior_g_nu = 1/np.random.gamma(shape=.5, scale=(rscale**2)/2)
prior_nu = np.random.normal(0, np.sqrt(prior_g_nu))

# theta prior for each model
theta_prior_unconstr = np.random.normal(prior_nu, np.sqrt(prior_g_theta), size=n_clusters) # in code: general
theta_prior_pos_eff = stats.truncnorm.rvs(loc=prior_nu, scale=1, a=-prior_nu, b=np.inf, size=n_clusters)
theta_prior_com_eff = np.repeat(prior_nu, n_clusters) # in code: one
theta_prior_null = np.repeat(0, n_clusters) 

In [4]:
# Common priors for all models

# Scale param for alpha_i: prior_g_alpha
prior_g_alpha = 1/np.random.gamma(2**2)

# Scale param for beta: prior_g_beta
prior_g_beta = 1/np.random.gamma(2**2)

In [5]:
# Final priors for linear model
mu = np.random.normal(0, 1) # Grand mean intercept # just standard-normal instead of Jeffreys prior as we don't have sigma anymore
alpha_i = np.random.normal(0, prior_g_alpha, size=n_clusters) # Person-specific intercept
beta = np.random.normal(0, prior_g_beta) # Effect of factual truth of statement
theta_i = theta_prior_unconstr # Individuals truth effect as stated by selected model

In [54]:
%%time

# FOR-LOOP VERSION 

# Simulate a dataset X of binary responses

X = []

t_j = np.repeat([0,1],n_obs/2) # truth status of statement j / assumed to be 50/50
x_k = np.repeat([0,1,0,1],n_obs/4) # repetition condition (new=0 / old=1) / assumed to be 50/50
print(len(t_j)==len(x_k)) # check if both vectors have equal length (satisfied if n_obs is dividable by 4 without rest)

for l in range(n_clusters):

    # list to hold individual responses
    x_i = []
    
    for n in range(n_obs):
        
        # Specify linear model
        lin_model = mu + alpha_i[l] + t_j[n]*beta + x_k[n]*theta_i[l] 
    
        # Estimate p
        p = np.exp(lin_model)/(1+np.exp(lin_model))
        
        # Generate x
        np.random.seed(1)
        x = np.random.binomial(n=1, p=p)
        
        x_i.append(x)
    
    X.append(x_i)

X = np.array(X)
X.mean()

True
Wall time: 17.5 ms


0.26666666666666666

In [52]:
%%time

# VECTORIZED VERSION 

# Simulate a dataset X of binary responses

t_j = np.repeat([0,1],n_obs/2) # truth status of statement j / assumed to be 50/50
x_k = np.repeat([0,1,0,1],n_obs/4) # repetition condition (new=0 / old=1) / assumed to be 50/50
print(len(t_j)==len(x_k)) # check if both vectors have equal length (satisfied if n_obs is dividable by 4 without rest)
        
# Specify linear model
lin_model = (mu + alpha_i)[..., np.newaxis] + t_j*beta + x_k*theta_i[..., np.newaxis]

# Estimate p
p = np.exp(lin_model)/(1+np.exp(lin_model))

# Generate x
np.random.seed(1)
x = np.random.binomial(n=1, p=p, size=(n_clusters,n_obs))

x.mean()

True
Wall time: 500 µs


0.43555555555555553

In [65]:
t_j = np.repeat([0,1],n_obs/2) # truth status of statement j 
t_j

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [66]:
x_k = np.repeat([0,1,0,1],n_obs/4) # repetition condition (new=0 / old=1)
x_k

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [60]:
np.random.binomial(n=1, p=.5, size=n_obs)

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0])

In [61]:
np.random.binomial(n=1, p=.5, size=n_obs)

array([1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0])

In [58]:
t_j

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [56]:
x_k

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])