In [262]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import itertools as it
from itertools import product
from collections import Counter, defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats
import copy
import time
sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)

from agents import Agent
from evaluation import get_util
from joblib import Parallel, delayed
from dc_util import *
# from model_utils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import ipyparallel as ipp 
rc = ipp.Client(profile='default', cluster_id='')
ipp.register_joblib_backend()

The following equation gives the likelihood of a click sequence in this model:

$$ l(C) = \sum_{O \in \mathcal{O}(C)} l(O)\mathbb{P}(C|O)$$

where $C$ is a click sequence, and $\mathcal{O}(C)$ is the set of option sequences that $C$ could parse into. To get the likelihood of an option sequence, we use the following equation:

$$ l(O) = \prod_{o \in O} \mathbb{P}_{DC}(o) $$

$$ \mathbb{P}(C|O) = \prod_{o \in O} \mathbb{P}(i_o|o, s) $$ 

where $\mathbb{P}_{DC}(o)$ is the probability of a given option in our model. We assume a generative model that picks a random move with probability $p_r$, or otherwise picks from one of the available options under the directed cognition model. This gives us the following equation: 

$$\mathbb{P}_{DC}(o) = (1-p_r)*s(o)+p_r*\alpha$$ 

$s(o)$ is the softmax probability of our option among all available directed cognition options, which, for a given temperature parameter $t = \frac{1}{\beta}$ is equal to:

$$ s(o)= \frac{e^{\beta v(o)}}{\sum_{o'} e^{\beta v(o')}} $$

where $v(o)$ is the value of an option using the directed cognition model. We define $\alpha$ as the probability that an option in the sequence would have been generated by the error process, and it is defined as:

$$\alpha = \prod_{k = 1}^ \text{length(o)} \frac{1}{n_{ac}-k+1}$$

where $n_{ac}$ is the number of available clicks.

In [3]:
cost = 1
envs = make_envs(cost,10,None,"increasing",branching=[2,2])
env = envs[0]
click_sequence = []

In [4]:
get_all_options(env)

([(0, 1), (0, 2), (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)],
 [0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435],
 [[1, 2], [1, 3], [4, 5], [4, 6]],
 [[3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372]],
 [0, 0, 0, 0],
 [2, 2, 2, 2])

In [5]:
def option_util(x,sigma):
    return sigma*scipy.stats.norm.pdf(x/sigma) - np.abs(x)*scipy.stats.norm.cdf(-np.abs(x)/sigma)

In [6]:
def get_all_options(env):
    paths = env.paths #list of all paths
    avail_moves = [0,]*len(paths) #list of moves available in each path
    path_obs = [] #value of observed nodes in each path
    path_nodes = [] #the unobserved nodes of each path
    path_stds = [] #the std deviation of the unobserved nodes of each path
    
    options = [] #list of all options
    option_utils = [] #list of the utility of each option
    option_insts = dict() #list of all possible option instantiations
    
    for i in range(len(paths)):
        stds = []
        nodes = []
        obs = 0
        
        for node in paths[i]:
            if hasattr(env._state[node],'sample'):
                stds.append(env._state[node].var())
                nodes.append(node)
                avail_moves[i] += 1
            else:
                obs += env._state[node]
                
        path_obs.append(obs)
        path_stds.append(stds)
        path_nodes.append(nodes)
        
        for j in range(avail_moves[i]):
            options.append((i,j+1))
    max_obs = np.max(path_obs)
    
    for option in options:
        path, obs = option
        option_utils.append(option_util(path_obs[path]-max_obs,np.sqrt(np.sum(np.sort(path_stds[path])[::-1][:obs]))) + obs*env.cost)
        option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)
        
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
    n_available_clicks = len(option_insts[sc_opt])

    #end click options
    end_opt = (-99,1)
    options.append(end_opt)
    option_utils.append(0)
    option_insts[end_opt] = [[env.term_action]]
    
    return options, option_insts, np.array(option_utils),n_available_clicks

In [20]:
def wrap_po(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
#                             l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                                         + p_rand*np.prod([1/(n_available_clicks-k) for k in range(option[1])]))
                            alpha = 1 if option == (-1,1) else 0 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                    + p_rand*alpha)
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [21]:
def wrap_po1(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                        + p_rand*np.prod([1/(n_available_clicks-k) for k in range(option[1])]))
#                             alpha = 1 if option == (-1,1) else 0 
#                             l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                                     + p_rand*alpha)
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [22]:
env, trial = participants[2][6]
a,b,c = wrap_po(env,trial,t=2,p_rand=0.01,branching=[3,1,2])
print(np.log(sum(c)))

-22.4644240463


In [23]:
a,b,c = wrap_po1(env,trial,t=2,p_rand=0.5,branching=[3,1,2])
print(np.log(sum(c)))

-21.24977067


In [24]:
a,b,c = wrap_po(env,trial,t=2,p_rand=1,branching=[3,1,2])
print(np.log(sum(c)))

-21.8590166726


In [25]:
a,b,c = wrap_po1(env,trial,t=2,p_rand=1,branching=[3,1,2])
print(np.log(sum(c)))

-21.084841077


In [26]:
options, option_insts, option_utils,n_available_clicks = get_all_options(env)
trial = option_insts[options[np.argmax(option_utils)]][0]
print(trial)

[1, 2, 3]


In [27]:
a,b,c = wrap_po(env,trial,t=2,p_rand=0.5,branching=[3,1,2])
print(np.log(sum(c)))

-7.672211534


In [28]:
a,b,c = wrap_po1(env,trial,t=2,p_rand=0.5,branching=[3,1,2])
print(np.log(sum(c)))

-7.579381981


In [54]:
def dc_log_likelihood(env, trial, temp, p_error):
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

In [55]:
def dc_log_likelihood1(env, trial, temp, p_error):
    done, option_seqs, likelihoods = wrap_po1(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

In [31]:
p = 1
env,trial = participants[2][6]
print(dc_log_likelihood(env, trial, 1, 1))
print(dc_log_likelihood1(env, trial, 1, 1))

-21.8590166726
-21.084841077


# Modeling

## Experiment 1

In [96]:
def make_env(mu=0, sigma=5, quantization=4, cost=1.00, seed=None, branching=[3,1,2], **kwargs):
    if seed is not None:
        np.random.seed(seed)
#     print(branching)
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            return Categorical(mu + sigma * x)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

env = make_env(ground_truth=False)

In [97]:
from analysis_utils import *
VERSION = 'c1.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].copy()
print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].query('pid == @complete').copy()
mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

# pdf['total_time'] = exp_data['survey'].time_elapsed / 60000

pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

60 participants


In [98]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').copy()
    sdf = pd.DataFrame(list(sdf.responses), index=sdf.index)
    correct = pd.Series(['-$10 to $10', '$1', '1 cent for every $1 you make in the game'])
    fail_quiz = (sdf != correct).sum(axis=1) > 1
    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

exclude = excluded_pids()
tdf['exclude'] = list(exclude.loc[tdf.pid])
tdf = tdf.query('~exclude').copy().drop('exclude', axis=1)
print(f'excluding {exclude.sum()} out of {len(exclude)} partipicants')

excluding 9 out of 60 partipicants


In [99]:
def get_env(state_rewards,branching=[3,1,2]):
    state_rewards[0] = 0
    return make_env(ground_truth=state_rewards,branching=branching)
tdf['env'] = tdf.state_rewards.apply(get_env)

In [100]:
tdf.head()

Unnamed: 0,action_times,actions,block,path,queries,rewards,rt,score,simulation_mode,state_rewards,time_elapsed,trial_index,trial_time,trial_id,trial_type,pid,clicks,n_clicks,thinking,env
90,"[11467, 13434, 16274]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[5, -5, -5]","[11465, 1450, 2324]",-6.0,"[None, None, None]","[0, 5, -10, -5, 10, 10, 5, -10, 5, 5, -5, -5, 5]",728336,0,20130.0,8034619116489218048,mouselab-mdp,1,[9],1,11465,<MouselabEnv instance>
91,"[7272, 9200, 10360]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseover': {'state': {'target': [], 'time':...","[10, -5, 10]","[7270, 1410, 654]",13.0,"[None, None, None]","[0, -5, 10, -5, 10, 5, 5, -10, 5, 10, -5, 10, 10]",741664,1,12248.0,38024071126860576,mouselab-mdp,1,"[5, 9]",2,7270,<MouselabEnv instance>
92,"[8883, 10283, 11587]","[right, right, up]",test,"[0, 5, 6, 7]","{'mouseover': {'state': {'target': [], 'time':...","[-10, -5, 5]","[8881, 894, 798]",-12.0,"[None, None, None]","[0, -10, 5, 10, 10, -10, -5, 5, 10, -10, -10, ...",755720,2,12971.0,8607347987074355200,mouselab-mdp,1,"[1, 9]",2,8881,<MouselabEnv instance>
93,"[7585, 9313, 10649]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[5, 10, -10]","[7583, 1215, 818]",3.0,"[None, None, None]","[0, 5, -5, 10, -5, -5, 5, 10, -10, 5, 10, -10, 5]",769472,3,12665.0,4859092002678591488,mouselab-mdp,1,"[1, 5]",2,7583,<MouselabEnv instance>
94,"[3343, 4415, 5295]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseover': {'state': {'target': [], 'time':...","[-5, -10, 10]","[3341, 555, 369]",-5.0,"[None, None, None]","[0, -5, 5, 10, -10, 10, -10, 10, 5, -5, -10, 5...",778857,4,8295.0,4754602433487654912,mouselab-mdp,1,[],0,3341,<MouselabEnv instance>


In [101]:
j = 0
participants = dict()
for i, row in tdf.iterrows():
    if row['pid'] in participants.keys():
        participants[row['pid']].append((row['env'],row['clicks']))
    else:
        participants[row['pid']] =[(row['env'],row['clicks'])]

In [102]:
sm = 0
for p in participants.keys():
    data = participants[p]
    probs = []
    for episode in data:
        env, trial = copy.deepcopy(episode)
        trial = trial+[env.term_action]
        prob = 1
        for a in trial:
            prob*=1/len(list(env.actions(env._state)))
            env.step(a)
        probs.append(prob)
    sm+=np.log(np.prod(probs))
sm

-17289.756439310626

In [103]:
temp, p_error = 1,1
p=1
data = participants[p]
t = time.process_time()
a = sum(dc_log_likelihood(env, trial, temp, p_error) for env, trial in data)
elapsed =time.process_time() - t 
print(elapsed)
print(a)

0.8267271979999862
-213.277395034


In [104]:
temp, p_error = 1,1
p=1
data = participants[p]
t = time.process_time()
a = sum(dc_log_likelihood1(env, trial, temp, p_error) for env, trial in data)
elapsed =time.process_time() - t 
print(elapsed)
print(a)

0.8449958200000083
-226.996116746


In [105]:
# temp, p_error = 1e-2,.01
# trialses = []
# for p in participants.keys():
#     data = participants[p]
#     trials =[]
#     t = t = time.process_time()
#     for i in range(len(data)):
#         env, trial = data[i]
#         trials.append( dc_log_likelihood(env, trial, temp, p_error))
#     elapsed =time.process_time() - t 
#     trialses.append(trials)
#     print(p,elapsed)
# print(np.sum(trials))

In [106]:
def wrap_sum_ll(p, temp, p_error):
    data = participants[p]
    return sum(dc_log_likelihood(env, trial, temp, p_error)
                    for env,trial in data)

In [107]:
def wrap_sum_ll1(p, temp, p_error):
    data = participants[p]
    return sum(dc_log_likelihood1(env, trial, temp, p_error)
                    for env,trial in data)

In [108]:
mles = Parallel(n_jobs=60)(delayed(wrap_sum_ll)(p, 5, 1)
                           for p in participants.keys())

In [109]:
mles1 = Parallel(n_jobs=60)(delayed(wrap_sum_ll1)(p, 5, 1)
                           for p in participants.keys())

In [110]:
np.sum(mles)

-17289.756439310622

In [111]:
np.sum(mles1)

-17793.084867921141

In [93]:
def mle(data):
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-2, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    def loss(x):
        temp, p_error = x
        return -sum(dc_log_likelihood(env, trial, temp, p_error)
                    for env, trial in data)
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                 options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [94]:
def mle_wrap(p):
    print(p)
    d = mle(participants[p])
    d['participant'] = p
    return d

In [118]:
# # mles = dict()
# # for p in participants.keys():
# #     print(p)
# #     data = participants[p]
# #     mles[p] = mle(data)
mles = Parallel(n_jobs=60)(delayed(mle_wrap)(p)
                           for p in participants.keys())

2
3
1
4
6
7
10
11
12
8
14
18
16
15
13
19
25
17
22
24
27
28
26
20
30
29
33
34
36
45
46
49
40
50
39
52
31
37
42
53
48
35
55
61
51
54
47
57
60
56
58


In [119]:
np.save('data/dc_exp1_mles_p2',mles)

In [121]:
mles = np.load('data/dc_exp1_mles_p2.npy')

In [122]:
logps = []
for i in range(len(mles)):
    logps.append(mles[i]['logp'])
np.sum(logps)

-16424.973979672282

In [123]:
temps = [val['temp'] for val in mles]

In [124]:
min(temps)

0.23025944965094047

## Global

In [45]:
participants

{1: [(<mouselab.MouselabEnv at 0x2ae700b9bf28>, [9]),
  (<mouselab.MouselabEnv at 0x2ae700b9b9e8>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae700b9b5c0>, [1, 9]),
  (<mouselab.MouselabEnv at 0x2ae70095e630>, [1, 5]),
  (<mouselab.MouselabEnv at 0x2ae70219a278>, []),
  (<mouselab.MouselabEnv at 0x2ae70219a588>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae70219aa58>, [1, 9]),
  (<mouselab.MouselabEnv at 0x2ae70219aef0>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2ae7021c83c8>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae7021c8860>, [9, 1]),
  (<mouselab.MouselabEnv at 0x2ae7021c8cf8>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae7021d31d0>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2ae7021d3668>, [5]),
  (<mouselab.MouselabEnv at 0x2ae7021d3b00>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2ae7021d3f98>, [5]),
  (<mouselab.MouselabEnv at 0x2ae7021dd470>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae7021dd908>, [5]),
  (<mouselab.MouselabEnv at 0x2ae7021ddda0>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2ae7021e627

In [46]:
DATA = []
for p in participants.keys():
    DATA += participants[p]

In [47]:
len(DATA)

1530

In [48]:
DATA[:10]

[(<mouselab.MouselabEnv at 0x2ae700b9bf28>, [9]),
 (<mouselab.MouselabEnv at 0x2ae700b9b9e8>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2ae700b9b5c0>, [1, 9]),
 (<mouselab.MouselabEnv at 0x2ae70095e630>, [1, 5]),
 (<mouselab.MouselabEnv at 0x2ae70219a278>, []),
 (<mouselab.MouselabEnv at 0x2ae70219a588>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2ae70219aa58>, [1, 9]),
 (<mouselab.MouselabEnv at 0x2ae70219aef0>, [9, 5]),
 (<mouselab.MouselabEnv at 0x2ae7021c83c8>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2ae7021c8860>, [9, 1])]

In [56]:
def dc_idx_log_likelihood(idx, temp, p_error):
#     print(idx)
    env, trial = DATA[idx]
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

In [57]:
t = time.process_time()
dc_idx_log_likelihood(14*30+6, temp, p_error)
elapsed =time.process_time() - t 
print(elapsed)

0.40658631100000164


In [64]:
t = time.time()
mles = Parallel(n_jobs=200)(delayed(dc_idx_log_likelihood)(data_idx, 5, 1)
                           for data_idx in range(len(DATA)))
elapsed = time.time() - t 
print(elapsed)

25.954020500183105


In [65]:
np.sum(mles)

-17289.756439310622

In [62]:
t = time.time()
mles = Parallel(n_jobs=60)(delayed(wrap_sum_ll)(p, 5, 1)
                           for p in participants.keys())
elapsed = time.time() - t 
print(elapsed)

34.84699845314026


In [63]:
np.sum(mles)

-17289.756439310622

In [69]:
def mle_global():
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-2, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    with Parallel(200) as parallel:
        def loss(x):
            temp, p_error = x
            jobs = (delayed(dc_idx_log_likelihood)(data_idx, temp, p_error)
                    for data_idx in range(len(DATA)))
            tot = -sum(parallel(jobs))
            if np.load('data/dc_exp1_result.npy')[2] > tot:
                np.save('data/dc_exp1_result',[temp,p_error,tot])
            return tot
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                  options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [70]:
results = mle_global()

In [71]:
results

{'logp': -17003.668842039369, 'p_error': 0.25, 'temp': 1.4061493262821758}

## Experiment 2

In [155]:
def wrap_po(env,click_sequence,t=1,p_rand=0,branching=[3,1,2],scaling_factors=[1, 1, 1]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching,scaling_factors=scaling_factors)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
#                             l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                                         + p_rand*np.prod([1/(n_available_clicks-k) for k in range(option[1])]))
                            alpha = 1 if option == (-1,1) else 0 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                    + p_rand*alpha)
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [156]:
from mouselab import MouselabEnv
from distributions import Categorical, Normal

def make_env(mu=0, sigma=4, branching=[3,1,2], cost=1.00, scaling_factors=[1, 1, 1], seed=None, **kwargs):
    if seed is not None:
        np.random.seed(seed)
    
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            vals = mu + sigma * x * scaling_factors[depth-1]
            return Categorical(vals).apply(round)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

In [157]:
from analysis_utils import *
VERSION = 'c2.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].set_index('pid', drop=False)
if 'variance' in pdf:
    pdf.variance = pdf.variance.replace(2442, 'decreasing').replace(2424, 'increasing')
else:
    pdf['variance'] = 'constant'

print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].set_index('pid', drop=False)
mdf = mdf.loc[complete]

mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))
mdf['variance'] = pdf['variance']

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

pdf['total_time'] = exp_data['survey-text'].time_elapsed / 60000
pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

69 participants


In [158]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').set_index('pid')
    responses = pd.DataFrame(list(sdf.responses), index=sdf.index)
    grp = responses.groupby(lambda pid: pdf.variance[pid])
    correct = grp.apply(lambda x: x.mode().iloc[0])
    errors = correct.loc[pdf.variance].set_index(pdf.index) != responses
    fail_quiz = errors.sum(1) > 1

    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

excluded = excluded_pids()
tdf = tdf.loc[~excluded]
print(f'excluding {excluded.sum()} out of {len(excluded)} partipicants')

excluding 16 out of 69 partipicants


In [159]:
SCALING = {
    'increasing': [1/2, 1, 6],
    'decreasing': [6, 1, 1/2]
}

In [160]:
def get_env(row):
    row.state_rewards[0] = 0
    return make_env(scaling_factors=SCALING[row.variance], 
                    ground_truth=row.state_rewards)
tdf['env'] = tdf.apply(get_env, axis=1)

In [161]:
tdf.head()

Unnamed: 0_level_0,action_times,actions,block,path,queries,rewards,rt,score,simulation_mode,state_rewards,...,trial_index,trial_time,trial_id,trial_type,pid,clicks,n_clicks,thinking,variance,env
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,"[14748, 15862, 17187]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[-48, 8, -2]","[14746, 590, 809]",-46.0,"[None, None, None]","[0, -48, 4, -2, 4, -48, -8, -2, 4, -48, 8, -2,...",...,0,18927.0,2045193736830000128,mouselab-mdp,0,"[5, 9, 6, 10]",4,14746,decreasing,<MouselabEnv instance>
0,"[3527, 4321, 5260]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[48, 4, 2]","[3525, 268, 409]",52.0,"[None, None, None]","[0, 24, 4, -2, 4, 24, -8, -2, 4, 48, 4, 2, 2]",...,1,8684.0,-8875376664199999488,mouselab-mdp,0,"[5, 9]",2,3525,decreasing,<MouselabEnv instance>
0,"[4544, 5403, 6581]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseover': {'state': {'target': [], 'time':...","[-24, -8, -4]","[4543, 328, 652]",-38.0,"[None, None, None]","[0, 48, -8, -2, -2, -48, 8, -2, 2, -24, -8, -4...",...,2,9475.0,-7461013785340000256,mouselab-mdp,0,"[5, 9]",2,4543,decreasing,<MouselabEnv instance>
0,"[2789, 3497, 4189]","[right, right, up]",test,"[0, 5, 6, 7]","{'mouseover': {'state': {'target': [], 'time':...","[48, -8, 4]","[2788, 185, 172]",42.0,"[None, None, None]","[0, 24, -4, -2, -4, 48, -8, 4, -4, 24, -8, 4, -4]",...,3,9571.0,8449844250360000512,mouselab-mdp,0,"[5, 9]",2,2788,decreasing,<MouselabEnv instance>
0,"[4085, 4761, 5677]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[48, -4, 4]","[4084, 159, 395]",46.0,"[None, None, None]","[0, 48, -8, 4, 2, -24, -4, 2, -2, 48, -4, 4, 4]",...,4,9995.0,1644612169790000128,mouselab-mdp,0,"[5, 9]",2,4084,decreasing,<MouselabEnv instance>


In [162]:
j = 0
participants = dict()
for i, row in tdf.iterrows():
    if row['pid'] in participants.keys():
        participants[row['pid']].append((row['env'],row['clicks'],SCALING[row['variance']]))
    else:
        participants[row['pid']] =[(row['env'],row['clicks'],SCALING[row['variance']])]

In [166]:
env, trial, scaling = participants[0][6]
a,b,c = wrap_po(env,trial,t=2,p_rand=0.01,scaling_factors=scaling)
print(np.log(sum(c)))

-7.36943237423


In [168]:
def dc_scaling_log_likelihood(env, trial, temp, p_error, scaling):
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error, scaling_factors = scaling)
    return np.log(np.sum(likelihoods))

In [188]:
dc_scaling_log_likelihood(env, trial, 0.44168330742195183, 0.25, scaling)

-6.3415850442847486

In [187]:
sum(dc_scaling_log_likelihood(env, trial, 0.44168330742195183, 0.25, scaling)
                    for env,trial,scaling in participants[0])

-197.86969184362766

In [175]:
def wrap_scaling_sum_ll(p, temp, p_error):
    data = participants[p]
    return sum(dc_scaling_log_likelihood(env, trial, temp, p_error, scaling)
                    for env,trial,scaling in data)

In [186]:
wrap_scaling_sum_ll(0,0.44168330742195183,0.25)

-197.86969184362766

In [183]:
def mle(data):
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-1, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    def loss(x):
        temp, p_error = x
        return -sum(dc_scaling_log_likelihood(env, trial, temp, p_error, scaling)
                    for env, trial, scaling in data)
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                 options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [184]:
def mle_wrap(p):
    print(p)
    d = mle(participants[p])
    d['participant'] = p
    return d

In [185]:
[mle_wrap(p)for p in [0]]

0


[{'logp': -197.86969184362766,
  'p_error': 0.25,
  'participant': 0,
  'temp': 0.44168330742195183}]

In [189]:
# mles = dict()
# for p in participants.keys():
#     print(p)
#     data = participants[p]
#     mles[p] = mle(data)
mles = Parallel(n_jobs=60)(delayed(mle_wrap)(p)
                           for p in participants.keys())

0
3
5
9
4
11
13
20
7
8
15
17
19
21
23
27
28
36
30
29
31
32
35
22
24
26
53
44
42
62
38
63
57
41
39
33
54
50
47
61
58
46
56
60
55
65
68
67
66
71
69
72
70


In [190]:
np.save('data/dc_exp2_mles_p4',mles)

In [191]:
mles = np.load('data/dc_exp2_mles_p4.npy')

In [192]:
logps = []
for i in range(len(mles)):
    logps.append(mles[i]['logp'])
np.sum(logps)

-12469.805447303559

In [194]:
temps = [val['temp'] for val in mles]

In [196]:
min(temps)

0.099999999999999645

## Global

In [197]:
participants

{0: [(<mouselab.MouselabEnv at 0x2ae729f1dcf8>, [5, 9, 6, 10], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a4ad518>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a4ad710>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a108dd8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a1282b0>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae729f3b668>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae729f14ac8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae747a60358>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72971b0b8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae701f455f8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a4c2668>, [9, 5], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae729e5c240>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a11a390>, [5], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae72a13e0b8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2ae74876e4e0>, [5, 9], [6

In [198]:
DATA = []
for p in participants.keys():
    DATA += participants[p]

In [199]:
len(DATA)

1590

In [200]:
DATA[:10]

[(<mouselab.MouselabEnv at 0x2ae729f1dcf8>, [5, 9, 6, 10], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae72a4ad518>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae72a4ad710>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae72a108dd8>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae72a1282b0>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae729f3b668>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae729f14ac8>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae747a60358>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae72971b0b8>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2ae701f455f8>, [5, 9], [6, 1, 0.5])]

In [201]:
def dc_scaling_idx_log_likelihood(idx, temp, p_error):
#     print(idx)
    env, trial, scaling = DATA[idx]
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error,scaling_factors=scaling)
    return np.log(np.sum(likelihoods))

In [207]:
t = time.process_time()
env, trial, scaling = participants[0][0]
print(dc_scaling_log_likelihood(env, trial, 2, 0.01, scaling))
elapsed =time.process_time() - t 
print(elapsed)

-23.3713119788
0.06873739200000273


In [208]:
t = time.process_time()
print(dc_scaling_idx_log_likelihood(0, 2, 0.01))
elapsed =time.process_time() - t 
print(elapsed)

-23.3713119788
0.06881609000004119


In [210]:
sm = 0
for p in participants.keys():
    data = participants[p]
    probs = []
    for episode in data:
        env, trial, scaling = copy.deepcopy(episode)
        trial = trial+[env.term_action]
        prob = 1
        for a in trial:
            prob*=1/len(list(env.actions(env._state)))
            env.step(a)
        probs.append(prob)
    sm+=np.log(np.prod(probs))
sm

-14647.59716944564

In [212]:
t = time.time()
mles = Parallel(n_jobs=200)(delayed(dc_scaling_idx_log_likelihood)(data_idx, 5, 1)
                           for data_idx in range(len(DATA)))
elapsed = time.time() - t 
print(elapsed)

5.3675665855407715


In [213]:
np.sum(mles)

-14647.597169445642

In [214]:
t = time.time()
mles = Parallel(n_jobs=60)(delayed(wrap_scaling_sum_ll)(p, 5, 1)
                           for p in participants.keys())
elapsed = time.time() - t 
print(elapsed)

4.095941543579102


In [215]:
np.sum(mles)

-14647.59716944564

In [216]:
def mle_global():
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-1, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    with Parallel(200) as parallel:
        def loss(x):
            temp, p_error = x
            jobs = (delayed(dc_scaling_idx_log_likelihood)(data_idx, temp, p_error)
                    for data_idx in range(len(DATA)))
            tot = -sum(parallel(jobs))
            if np.load('data/dc_exp2_result2.npy')[2] > tot:
                np.save('data/dc_exp2_result2',[temp,p_error,tot])
            return tot
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                  options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [217]:
np.save('data/dc_exp2_result2',[1,.01,-np.inf])
results = mle_global()

In [218]:
results

{'logp': -12668.14942530896, 'p_error': 0.25, 'temp': 0.44114349139302822}

## Behavior Analysis

In [263]:
mles = Parallel(n_jobs=200)(delayed(dc_scaling_idx_log_likelihood)(data_idx, 0.1, 0.00000001)
                           for data_idx in range(len(DATA)))

In [264]:
examples = np.argsort(mles)[::-1][:10]

In [265]:
for ex in examples:
    print(DATA[ex])
    print(pick_option_moves(DATA[ex][0]))

(<mouselab.MouselabEnv object at 0x2ae700e7e048>, [5, 1, 9, 6, 2, 3], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae701a21470>, [5, 1, 9, 2, 10, 11, 12], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae72ad70710>, [5, 6], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae72abed9b0>, [1, 2], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae72a693a58>, [1, 9, 5, 6], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae72a68c588>, [9, 1, 5, 6], [6, 1, 0.5])
[5]
(<mouselab.MouselabEnv object at 0x2ae701afe8d0>, [5, 9, 1, 2], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae70100eda0>, [5, 1, 9, 10], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae74740ac50>, [5, 9, 1, 2], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2ae72a68ec18>, [5, 1, 9, 10], [6, 1, 0.5])
[5]


In [266]:
for i in range(1):
    env = make_env(scaling_factors=SCALING['increasing'])
    print(get_all_options(env))
    print(pick_option_moves(env))

([(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3), (4, 1), (4, 2), (4, 3), (5, 1), (5, 2), (5, 3), (-1, 1), (-99, 1)], {(0, 1): [[3]], (0, 2): [[3, 2]], (0, 3): [[3, 2, 1]], (1, 1): [[4]], (1, 2): [[4, 2]], (1, 3): [[4, 2, 1]], (2, 1): [[7]], (2, 2): [[7, 6]], (2, 3): [[7, 6, 5]], (3, 1): [[8]], (3, 2): [[8, 6]], (3, 3): [[8, 6, 5]], (4, 1): [[11]], (4, 2): [[11, 10]], (4, 3): [[11, 10, 9]], (5, 1): [[12]], (5, 2): [[12, 10]], (5, 3): [[12, 10, 9]], (-1, 1): [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13]], (-99, 1): [[13]]}, array([ 14.139,  13.348,  12.399,  14.139,  13.348,  12.399,  14.139,  13.348,  12.399,  14.139,  13.348,  12.399,  14.139,  13.348,  12.399,  14.139,  13.348,  12.399,    -inf,   0.   ]), 13)
[12]


In [267]:
examples = np.argsort(mles)[:10]
for ex in examples:
    print(DATA[ex])
    print(pick_option_moves(DATA[ex][0]))

(<mouselab.MouselabEnv object at 0x2ae747633fd0>, [1, 2, 3, 4, 9, 10, 11, 12, 5, 6, 7], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2ae74722ef98>, [5, 6, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[12]
(<mouselab.MouselabEnv object at 0x2ae7473fd3c8>, [5, 6, 8, 7, 9, 10, 11, 12, 1, 2, 3], [0.5, 1, 6])
[7]
(<mouselab.MouselabEnv object at 0x2ae7473fd860>, [5, 9, 10, 11, 12, 1, 2, 3, 4, 6, 7, 8], [0.5, 1, 6])
[3]
(<mouselab.MouselabEnv object at 0x2ae72b053c50>, [5, 6, 8, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[7]
(<mouselab.MouselabEnv object at 0x2ae74722eb00>, [5, 6, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[4]
(<mouselab.MouselabEnv object at 0x2ae72b056080>, [9, 10, 12, 11, 5, 6, 7], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2ae74722e668>, [9, 5, 1, 2, 3, 4, 10, 11, 12, 6, 7], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2ae7476339b0>, [5, 6, 8, 7, 9, 10, 11], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2ae72b056e48>, [1, 2, 9, 10, 5, 6, 8],

# Scratch

In [26]:
def wrap_po1(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                        + p_rand*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)]))
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [114]:
def dc_log_likelihood1(env, trial, temp, p_error):
    done, option_seqs, likelihoods = wrap_po1(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

In [21]:
def parse_options_combined(env,click_sequence,temps=np.logspace(-3,1,50),p_errs=np.linspace(0,0.25,25)):
    
    #count things
    n_temps = len(temps)
    n_p_errs = len(p_errs)
    
    #base case
    if click_sequence == []:
        return True, [[]], [np.ones((n_temps,n_p_errs))]
    
    #get the info you'll need for parsing
    paths = env.paths
    options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
    option_insts = dict() #list of all possible option instantiations
    for option in options:
        path, obs = option
        option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)
    
    #set up your return values
    option_seqs = []
    likelihoods = []
    done = False
        
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_utils = np.array(option_utils)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
    n_available_clicks = len(option_insts[sc_opt])-1
    
    #parsing
    for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst):
                    copy_env = copy.deepcopy(env)                   
                    for a in click_sequence[:i]:
                        copy_env._step(a)
                        
                    will_done, remaining, rem_likelihoods = parse_options_combined(copy_env,click_sequence[i:])
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter)
                            
                            for l,m in product(range(n_temps),range(n_p_errs)):
                                l_opt_seq = np.zeros((n_temps,n_p_errs))
                                t = temps[l]
                                p_err = p_errs[m]
                                l_opt_seq[l,m] = (1-p_err)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils[:-1]))
                                l_opt_seq[l,m] += p_err*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
                                l_opt_seq *= rem_likelihoods[k][l,m]
                            
                                likelihoods.append(l_opt_seq)
    return done, option_seqs, likelihoods

In [22]:
def wrap_po1(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):

        if click_sequence == []:
            return True, [[]], [1]
        
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        envc = make_env(ground_truth=init_state,branching=branching)
        envc.reset()
        for a in pre_acts:
#             print(a)
            envc._step(a)
        
        option_insts = dict() #list of all possible option instantiations
        option_seqs = []
        likelihoods = []
        done = False

        paths = envc.paths
        options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(envc)


        for option in options:
            path, obs = option
            option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)

        #single click options
        sc_opt = (-1,1)
        options.append(sc_opt)
        option_utils.append(-np.inf)
        option_insts[sc_opt] = [[a] for a in envc.actions(envc._state)]
        n_available_clicks = len(option_insts[sc_opt])

        #end click options
        end_opt = (-99,1)
        options.append(end_opt)
        option_utils.append(0)

        option_insts[end_opt] = [[envc.term_action]]

        option_utils = np.array(option_utils)
#         print(option_utils)
        for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 

                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter) 
#                             print('doing'+str(option)+stzr(latter))
#                             print(option_utils[j])
#                             print(np.exp(1/t*option_utils))
                            l_opt_seq = (1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                             print(l_opt_seq)
                            l_opt_seq += p_rand*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
#                             print(l_opt_seq)
                            l_opt_seq *= rem_likelihoods[k]
#                             print(rem_likelihoods[k])
                            likelihoods.append(l_opt_seq*1/n_insts)
#                             print(n_insts)
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [7]:
def parse_options(env,click_sequence,t=1,p_rand=0.0001):
    if click_sequence == []:
        return True, [[]], [1]
    option_insts = dict() #list of all possible option instantiations
    option_seqs = []
    likelihoods = []
    done = False
    
    paths = env.paths
    options, option_insts, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
#     for option in options:
#         path, obs = option
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
    n_available_clicks = len(option_insts[sc_opt])
    
    #end click options
    end_opt = (-99,1)
    options.append(end_opt)
    option_utils.append(0)

    option_insts[end_opt] = [[env.term_action]]
            
    option_utils = np.array(option_utils)
    
    for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst):
                    copy_env = copy.deepcopy(env)                   
                    for a in click_sequence[:i]:
                        copy_env._step(a)
                        
                    will_done, remaining, rem_likelihoods = parse_options(copy_env,click_sequence[i:],t,p_rand)
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter) 
                            l_opt_seq = (1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                            l_opt_seq += p_rand*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
                            l_opt_seq *= rem_likelihoods[k]
                            likelihoods.append(l_opt_seq*1/n_insts)
    
    return done, option_seqs, likelihoods