# Bayesian sampler

gonna write this in python with pyro!

# The Bayesian Sampler model is roughly:

$$P_{BS}(A) \sim Beta(\beta + S(A), \beta+F(A))$$

Where $S(A)$ and $F(A)$ are the number of success and failures sampled.

We can rewrite that:

$$P_{BS}(A) \sim Beta(\beta + \pi(A)N, \beta+(1-\pi(A))N)$$


Where $\pi(A)$ is the proportion of successes in the mental simulation with $N$ samples and is a function of the true underlying model probability, $p(A)$. It is distributed:

$$\pi(A) \sim Beta(p(A)*N, (1-p(A))*N)$$

To which we assign a uniform prior (or the multidimensional dirichlet equivalent):

$$p(A) \sim Beta(1,1)$$

In [970]:
%config IPCompleter.use_jedi = False
import torch as t
import pyro

import pyro
import pyro.infer
import pyro.optim
import pyro.distributions as dist
from pyro.infer import Importance, EmpiricalMarginal

import numpy as np
import pandas as pd
import seaborn as sns
import functools

t.set_default_tensor_type(t.DoubleTensor)

In [733]:
## starting with just one P b/c I am struggling w/ python rustiness and filepaths
from dfply import *
import glob
import pandas as pd

## just to get the skeleton of the final data format
df_raw = pd.read_csv("osfstorage-archive/Experiment 2/PrEstExp_001_111218_115935.csv")
df = (df_raw >> 
      group_by(X.querytype) >> 
      summarize(estimate = X.estimate.mean()) >>
      mutate(estimate = X.estimate/100.)
     )
     
## data munging (for later)

# conjdisj_trials = ["AorB","notAorB","AornotB","notAornotB", "AandB", "notAandB", "AandnotB", "notAandnotB"]
# simple_df = df[~df.querytype.isin(conjdisj_trials)]
# conjdisj_df = df[df.querytype.isin(conjdisj_trials)]

In [971]:

trial_funcs = dict({
    "AandB": lambda theta: t.matmul(theta, t.tensor([1.,0.,0.,0.])) ,
    "AandnotB": lambda theta: t.matmul(theta, t.tensor([0.,1.,0.,0.])),
    "notAandB": lambda theta: t.matmul(theta, t.tensor([0.,0.,1.,0.])),
    "notAandnotB": lambda theta: t.matmul(theta, t.tensor([0.,0.,0.,1.])),
    "A":lambda theta: t.matmul(theta, t.tensor([1.,1.,0.,0.])),
    "B":lambda theta: t.matmul(theta, t.tensor([1.,0.,1.,0.])),
    "notA":lambda theta: t.matmul(theta, t.tensor([0.,0.,1.,1.])),
    "notB":lambda theta: t.matmul(theta, t.tensor([0.,1.,0.,1.])),
    "AorB":lambda theta: t.matmul(theta, t.tensor([1.,1.,1.,0.])),
    "AornotB":lambda theta: t.matmul(theta, t.tensor([1.,1.,0.,1.])),
    "notAorB":lambda theta: t.matmul(theta, t.tensor([0.,1.,1.,1.])),
    "notAornotB":lambda theta: t.matmul(theta, t.tensor([0.,1.,0.,1.])),
    
    "AgB": lambda theta: t.div( t.matmul(theta, t.tensor([1.,0.,0.,0.])), t.matmul(theta, t.tensor([1.,0.,1.,0.])) ),
    "notAgB": lambda theta: t.div( t.matmul(theta, t.tensor([0.,0.,1.,0.])), t.matmul(theta, t.tensor([1.,0.,1.,0.])) ),
    "AgnotB": lambda theta: t.div( t.matmul(theta, t.tensor([0.,1.,0.,0.])), t.matmul(theta, t.tensor([0.,1.,0.,1.])) ),
    "notAgnotB": lambda theta: t.div( t.matmul(theta, t.tensor([0.,0.,0.,1.])), t.matmul(theta, t.tensor([0.,1.,0.,1.])) ),
    "BgA": lambda theta: t.div( t.matmul(theta, t.tensor([1.,0.,0.,0.])), t.matmul(theta, t.tensor([1.,1.,0.,0.])) ),
    "notBgA": lambda theta: t.div( t.matmul(theta, t.tensor([0.,1.,0.,0.])), t.matmul(theta, t.tensor([1.,1.,0.,0.])) ),
    "BgnotA": lambda theta: t.div( t.matmul(theta, t.tensor([0.,0.,1.,0.])), t.matmul(theta, t.tensor([0.,0.,1.,1.])) ),
    "notBgnotA": lambda theta: t.div( t.matmul(theta, t.tensor([0.,0.,0.,1.])), t.matmul(theta, t.tensor([0.,0.,1.,1.])) )
})


def dm_probs(trial_data, theta, n_obs):
    ## compute implied subj. probability from latent theta and trial type
    ## this is a vectorized solution: https://bit.ly/2P6mMcD
    p = t.tensor([])
    for i in range(0, n_obs):
        temp = trial_func[trial_data[i]](theta)
        p = t.cat( (p, t.tensor([temp])), 0)

    return p


In [1057]:
def sim_sampling(p, beta, N, k):
    p_bs = p * N / (N + 2.*beta) + beta/(N + 2.*beta)
    return dist.Beta(p_bs*k, (1-p_bs)*k).sample()

n_participants = 4
n_blocks = 3

trial_types = list(df.querytype)

n_trial_types = len(trial_types)

trials = trial_types*n_blocks
blocks = list(np.repeat(np.array(list(range(0, n_blocks))), n_trial_types))

all_participants = list(np.repeat(np.array(list(range(0, n_participants))), n_trial_types*n_blocks))
all_trials = trials*n_participants
all_blocks = blocks*n_participants
all_responses = t.ones(0)

all_thetas = []

for i in range(0, n_participants):
    theta = dist.Dirichlet(t.ones(4)).sample()

    probs = dm_probs(trials, theta, len(trials))
    responses = sim_sampling(probs, 1, 10, 100)
    
    all_thetas.append(theta)
    all_responses = t.cat((all_responses, responses))

# print(len(all_participants))
# print(len(all_trials))
# print(len(all_blocks))
# print(len(all_responses))
# print(len(all_thetas))


sim_data = pd.DataFrame(
    data = {
        "ID": all_participants,
        "querytype": all_trials, 
        "block": all_blocks,
        "response": all_responses
    }
)

sim_data.head()

Unnamed: 0,ID,querytype,block,response
0,0,A,0,0.750657
1,0,AandB,0,0.182357
2,0,AandnotB,0,0.659741
3,0,AgB,0,0.759823
4,0,AgnotB,0,0.730055


In [1089]:
## simulated data

# for each id
# # for each trial type
# # # for each block

# object[id][trial_type][block]
# {"0": {"AandB": {"0": .35, }, ... }, ... }

participant_keys = list(range(0, n_participants))
trial_keys = set(list(sim_data.querytype))
block_keys = list(range(0, n_blocks))

# data_dict = {key: {key: {key: [] for key in block_keys} for key in participant_keys} for key in trial_keys}
data_dict = {key: {key:[] for key in trial_keys} for key in participant_keys}
# data_dict["A"]

# for each participant (they each have a theta)

# grab their data

# for each trial type

# for each block

# predict their probability

# observe their responses in all blocks



In [1061]:


# df_test = sim_data

# need to change things around, feed in data separately for each trial type maybe?
def sort_trials(trial_data, observations):
    output = dict()
    for key in set(trial_data):
        output[key] = t.ones(0)
        
    for i in range(0, len(trial_data)):
        output[trial_data[i]] = t.cat( (output[trial_data[i]], t.tensor([observations[i]])) )
        
    return output


def sort_trials2(trial_data, block_data, participant_data, observations):
    output = dict()
    for key in set(trial_data):
        output[key] = t.ones(0)
        
    for i in range(0, len(trial_data)):
        output[trial_data[i]] = t.cat( (output[trial_data[i]], t.tensor([observations[i]])) )
        
    return output

test_data = sort_trials(sim_data.querytype, sim_data.response)
test_info = {"n_Ps":n_participants, "n_blocks":n_blocks}

In [None]:
def bsampler_multi2(data, info):

    n_obs = len(data["A"])
    n_total_obs = len(data.keys())*n_obs
    n_Ps = info["n_Ps"]
    n_blocks = info["n_blocks"]

    # population level parameters/priors
    beta = pyro.sample("beta", dist.HalfCauchy(.25))
    N = pyro.sample("N", dist.HalfCauchy(5))
    k = pyro.sample("k", dist.HalfCauchy(5)) ## noise, all causes

    # need a theta per person/querytype (yes?)
    thetas = [pyro.sample("theta_{}".format(j), dist.Dirichlet(t.ones(4))) for j in range(0,n_Ps)]
    for subj in range(0, info["n_Ps"]):
        theta = theta[subj]
        s_trials = 

        for trial in data.keys():
            # ok so this is broken, result is the wrong size 
    #         f = t.vmap(trial_func[trial]) # maybe this isn't working?
            f = trial_funcs[trial]
            pi = f(theta).repeat(n_blocks) # now this is the wrong dim, should be n_blocks
            p_bs = pi * N / (N + 2.*beta) + beta/(N + 2.*beta) # this is the wrong dim 

            yhat = pyro.sample("yhat_{}".format(trial), dist.Beta(p_bs*k, (1.-p_bs)*k), obs=data[trial]) # ???
    #         yhat = pyro.sample("yhat_{}".format(trial), dist.Beta(p_bs*k, (1.-p_bs)*k)) # ???
    
    return p_bs

In [766]:
def bsampler(data, info):

    n_obs = len(data["A"])
    n_total_obs = len(data.keys())*n_obs
    n_Ps = info["n_Ps"]
    n_blocks = info["n_blocks"]

    # population level parameters/priors
    beta = pyro.sample("beta", dist.HalfCauchy(.25))
    N = pyro.sample("N", dist.HalfCauchy(5))
    k = pyro.sample("k", dist.HalfCauchy(5)) ## noise, all causes

    # need a theta per person/querytype (yes?)    
    theta = pyro.sample("theta", dist.Dirichlet(t.ones(n_Ps, 4)))

    for trial in data.keys():
        # ok so this is broken, result is the wrong size 
        f = t.vmap(trial_func[trial]) # I think I could actually map the whole thing and get rid of this outer for loop too
        pi = f(theta) # this is the right dim
        p_bs = pi.repeat(n_blocks) * N / (N + 2.*beta) + beta/(N + 2.*beta) # this is the wrong dim   
        pyro.sample("yhat_{}".format(trial), dist.Beta(p_bs*k, (1.-p_bs)*k), obs=data[trial]) # ???
#         yhat = pyro.sample("yhat_{}".format(trial), dist.Beta(p_bs*k, (1.-p_bs)*k)) # ???
    
    return p_bs


In [768]:
from pyro.infer.mcmc import NUTS, MCMC

pyro.clear_param_store()

nuts_kernel = NUTS(bsampler, adapt_step_size=True)
py_mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=250)

py_mcmc.run(test_data, test_info)
py_mcmc.summary()

Sample: 100%|██████████| 750/750 [06:33,  1.90it/s, step size=1.14e-01, acc. prob=0.951]


                mean       std    median      5.0%     95.0%     n_eff     r_hat
      beta      0.73      0.90      0.46      0.03      1.51    147.61      1.02
         N      4.06      4.76      2.61      0.07      8.60    151.32      1.02
         k      4.54      0.13      4.54      4.33      4.76   1093.23      1.00
theta[0,0]      0.23      0.02      0.23      0.19      0.27    621.16      1.00
theta[0,1]      0.28      0.03      0.28      0.23      0.33    718.37      1.00
theta[0,2]      0.21      0.02      0.21      0.17      0.24    620.99      1.00
theta[0,3]      0.28      0.03      0.28      0.24      0.32    626.43      1.00
theta[1,0]      0.22      0.02      0.22      0.18      0.26   1166.55      1.00
theta[1,1]      0.29      0.03      0.29      0.25      0.34    493.88      1.00
theta[1,2]      0.20      0.02      0.20      0.17      0.24    555.70      1.00
theta[1,3]      0.28      0.03      0.28      0.24      0.32    389.25      1.01
theta[2,0]      0.22      0




In [762]:
latent_probs

tensor([0.1979, 0.0612, 0.4918, 0.2491])