In [125]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import itertools as it
from itertools import product
from collections import Counter, defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats
import copy
sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)

from agents import Agent
from evaluation import get_util
from joblib import Parallel, delayed
from dc_util import *
# from model_utils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [126]:
# import ipyparallel as ipp 
# rc = ipp.Client(profile='default', cluster_id='')
# ipp.register_joblib_backend()

The following equation gives the likelihood of a click sequence in this model:

$$ l(C) = \sum_{O \in \mathcal{O}(C)} l(O)$$

where $C$ is a click sequence, and $\mathcal{O}(C)$ is the set of option sequences that $C$ could parse into. To get the likelihood of an option sequence, we use the following equation:

$$ l(O) = \prod_{o \in O} \mathbb{P}_{DC}(o) $$

where $\mathbb{P}_{DC}(o)$ is the probability of a given option in our model. We assume a generative model that picks a random move with probability $p_r$, or otherwise picks from one of the available options under the directed cognition model. This gives us the following equation: 

$$\mathbb{P}_{DC}(o) = (1-p_r)*s(o)+p_r*\alpha$$ 

$s(o)$ is the softmax probability of our option among all available directed cognition options, which, for a given temperature parameter $t = \frac{1}{\beta}$ is equal to:

$$ s(o)= \frac{e^{\beta v(o)}}{\sum_{o'} e^{\beta v(o')}} $$

where $v(o)$ is the value of an option using the directed cognition model. We define $\alpha$ as the probability that an option in the sequence would have been generated by the error process, and it is defined as:

$$\alpha = \prod_{k = 1}^ \text{length(o)} \frac{1}{n_{ac}-k+1}$$

where $n_{ac}$ is the number of available clicks.

In [127]:
cost = 1
envs = make_envs(cost,10,None,"increasing",branching=[2,2])
env = envs[0]
click_sequence = []

In [172]:
get_all_options(env)

([(0, 1), (0, 2), (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)],
 [0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435,
  0.58208432559063583,
  -0.23117595092121435],
 [[1, 2], [1, 3], [4, 5], [4, 6]],
 [[3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372],
  [3.93168877550093, 15.72675510200372]],
 [0, 0, 0, 0],
 [2, 2, 2, 2])

In [129]:
def parse_options(env,click_sequence,t=1,p_rand=0.0001):
    if click_sequence == []:
        return True, [[]], [1]
    option_insts = dict() #list of all possible option instantiations
    option_seqs = []
    likelihoods = []
    done = False
    
    paths = env.paths
    options, option_insts, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
#     for option in options:
#         path, obs = option
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
    n_available_clicks = len(option_insts[sc_opt])
    
    #end click options
    end_opt = (-99,1)
    options.append(end_opt)
    option_utils.append(0)

    option_insts[end_opt] = [[env.term_action]]
            
    option_utils = np.array(option_utils)
    
    for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst):
                    copy_env = copy.deepcopy(env)                   
                    for a in click_sequence[:i]:
                        copy_env._step(a)
                        
                    will_done, remaining, rem_likelihoods = parse_options(copy_env,click_sequence[i:],t,p_rand)
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter) 
                            l_opt_seq = (1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                            l_opt_seq += p_rand*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
                            l_opt_seq *= rem_likelihoods[k]
                            likelihoods.append(l_opt_seq*1/n_insts)
    
    return done, option_seqs, likelihoods

In [143]:
def option_util(x,sigma):
    return sigma*scipy.stats.norm.pdf(x/sigma) - np.abs(x)*scipy.stats.norm.cdf(-np.abs(x)/sigma)

In [144]:
def get_all_options(env):
    paths = env.paths #list of all paths
    avail_moves = [0,]*len(paths) #list of moves available in each path
    path_obs = [] #value of observed nodes in each path
    path_nodes = [] #the unobserved nodes of each path
    path_stds = [] #the std deviation of the unobserved nodes of each path
    
    options = [] #list of all options
    option_utils = [] #list of the utility of each option
    
    for i in range(len(paths)):
        stds = []
        nodes = []
        obs = 0
        
        for node in paths[i]:
            if hasattr(env._state[node],'sample'):
                stds.append(env._state[node].var())
                nodes.append(node)
                avail_moves[i] += 1
            else:
                obs += env._state[node]
                
        path_obs.append(obs)
        path_stds.append(stds)
        path_nodes.append(nodes)
        
        for j in range(avail_moves[i]):
            options.append((i,j+1))
    max_obs = np.max(path_obs)
    
    for option in options:
        path, obs = option
        option_utils.append(option_util(path_obs[path]-max_obs,np.sqrt(np.sum(np.sort(path_stds[path])[::-1][:obs]))) + obs*env.cost)
    
    return options, option_utils, path_nodes, path_stds, path_obs, avail_moves

In [194]:
def wrap_po(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):

        if click_sequence == []:
            return True, [[]], [1]
        
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        envc = make_env(ground_truth=init_state,branching=branching)
        envc.reset()
        for a in pre_acts:
#             print(a)
            envc._step(a)
        
        option_insts = dict() #list of all possible option instantiations
        option_seqs = []
        likelihoods = []
        done = False

        paths = envc.paths
        options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(envc)


        for option in options:
            path, obs = option
            option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)

        #single click options
        sc_opt = (-1,1)
        options.append(sc_opt)
        option_utils.append(-np.inf)
        option_insts[sc_opt] = [[a] for a in envc.actions(envc._state)]
        n_available_clicks = len(option_insts[sc_opt])

        #end click options
        end_opt = (-99,1)
        options.append(end_opt)
        option_utils.append(0)

        option_insts[end_opt] = [[envc.term_action]]

        option_utils = np.array(option_utils)
#         print(option_utils)
        for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 

                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter) 
#                             print('doing'+str(option)+str(latter))
#                             print(option_utils[j])
#                             print(np.exp(1/t*option_utils))
                            l_opt_seq = (1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                             print(l_opt_seq)
                            l_opt_seq += p_rand*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
#                             print(l_opt_seq)
                            l_opt_seq *= rem_likelihoods[k]
#                             print(rem_likelihoods[k])
                            likelihoods.append(l_opt_seq*1/n_insts)
#                             print(n_insts)
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [195]:
env = envs[1]
trial = []

In [198]:
env,trial = participants[2][6]

In [199]:
a,b,c = wrap_po(env,trial,t=2,p_rand=0.01,branching=[3,1,2])
print(np.log(sum(c)))
a,b,c

-22.4594733701


(True,
 [[(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (0, 1),
   (1, 1),
   (-1, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (0, 1),
   (1, 1),
   (-99, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (0, 1),
   (-1, 1),
   (-1, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (0, 1),
   (-1, 1),
   (-99, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (-1, 1),
   (1, 1),
   (-1, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (-1, 1),
   (1, 1),
   (-99, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (-1, 1),
   (-1, 1),
   (-1, 1)],
  [(0, 1),
   (4, 1),
   (2, 1),
   (2, 1),
   (0, 1),
   (4, 1),
   (3, 1),
   (2, 1),
   (-1, 1),
   (-1, 1),
   (-99, 1)

In [200]:
print(np.log(sum(c)))

-22.4594733701


In [179]:
env.ground_truth

array([  0,  -5,  10,  -5,  10,   5,   5, -10,   5,  10,  -5,  10,  10])

# Modeling

In [201]:
def make_env(mu=0, sigma=5, quantization=4, cost=1.00, seed=None, branching=[3,1,2], **kwargs):
    if seed is not None:
        np.random.seed(seed)
#     print(branching)
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            return Categorical(mu + sigma * x)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

env = make_env(ground_truth=False)

In [202]:
from analysis_utils import *
VERSION = 'c1.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].copy()
print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].query('pid == @complete').copy()
mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

# pdf['total_time'] = exp_data['survey'].time_elapsed / 60000

pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

60 participants


In [203]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').copy()
    sdf = pd.DataFrame(list(sdf.responses), index=sdf.index)
    correct = pd.Series(['-$10 to $10', '$1', '1 cent for every $1 you make in the game'])
    fail_quiz = (sdf != correct).sum(axis=1) > 1
    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

exclude = excluded_pids()
tdf['exclude'] = list(exclude.loc[tdf.pid])
tdf = tdf.query('~exclude').copy().drop('exclude', axis=1)
print(f'excluding {exclude.sum()} out of {len(exclude)} partipicants')

excluding 9 out of 60 partipicants


In [204]:
def get_env(state_rewards,branching=[3,1,2]):
    state_rewards[0] = 0
    return make_env(ground_truth=state_rewards,branching=branching)
tdf['env'] = tdf.state_rewards.apply(get_env)

In [205]:
tdf.head()

Unnamed: 0,action_times,actions,block,path,queries,rewards,rt,score,simulation_mode,state_rewards,time_elapsed,trial_index,trial_time,trial_id,trial_type,pid,clicks,n_clicks,thinking,env
90,"[11467, 13434, 16274]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[5, -5, -5]","[11465, 1450, 2324]",-6.0,"[None, None, None]","[0, 5, -10, -5, 10, 10, 5, -10, 5, 5, -5, -5, 5]",728336,0,20130.0,8034619116489218048,mouselab-mdp,1,[9],1,11465,<MouselabEnv instance>
91,"[7272, 9200, 10360]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseover': {'state': {'target': [], 'time':...","[10, -5, 10]","[7270, 1410, 654]",13.0,"[None, None, None]","[0, -5, 10, -5, 10, 5, 5, -10, 5, 10, -5, 10, 10]",741664,1,12248.0,38024071126860576,mouselab-mdp,1,"[5, 9]",2,7270,<MouselabEnv instance>
92,"[8883, 10283, 11587]","[right, right, up]",test,"[0, 5, 6, 7]","{'mouseover': {'state': {'target': [], 'time':...","[-10, -5, 5]","[8881, 894, 798]",-12.0,"[None, None, None]","[0, -10, 5, 10, 10, -10, -5, 5, 10, -10, -10, ...",755720,2,12971.0,8607347987074355200,mouselab-mdp,1,"[1, 9]",2,8881,<MouselabEnv instance>
93,"[7585, 9313, 10649]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseover': {'state': {'target': [], 'time':...","[5, 10, -10]","[7583, 1215, 818]",3.0,"[None, None, None]","[0, 5, -5, 10, -5, -5, 5, 10, -10, 5, 10, -10, 5]",769472,3,12665.0,4859092002678591488,mouselab-mdp,1,"[1, 5]",2,7583,<MouselabEnv instance>
94,"[3343, 4415, 5295]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseover': {'state': {'target': [], 'time':...","[-5, -10, 10]","[3341, 555, 369]",-5.0,"[None, None, None]","[0, -5, 5, 10, -10, 10, -10, 10, 5, -5, -10, 5...",778857,4,8295.0,4754602433487654912,mouselab-mdp,1,[],0,3341,<MouselabEnv instance>


In [206]:
j = 0
participants = dict()
for i, row in tdf.iterrows():
    if row['pid'] in participants.keys():
        participants[row['pid']].append((row['env'],row['clicks']))
    else:
        participants[row['pid']] =[(row['env'],row['clicks'])]

In [207]:
p_errs = np.linspace(0.01,0.25, 25)
temp = np.logspace(-5,1, 50)

In [215]:
def dc_log_likelihood(env, trial, temp, p_error):
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error)
#     print(option_seqs)
#     print(likelihoods)
    return np.log(np.sum(likelihoods))

In [217]:
p = list(participants.keys())[0]
env,trial = participants[p][6]

dc_log_likelihood(env, trial, 1, 0.25)

-9.6839091543431586

In [218]:
import time
t = time.process_time()
for i in range(1000):
    dc_log_likelihood(env, trial, 1, 0.01)
elapsed =time.process_time() - t 
print(elapsed)

13.596963642000006


In [219]:
temp, p_error = 1,.01
data = participants[p]
t = time.process_time()
sum(dc_log_likelihood(env, trial, temp, p_error) for env, trial in data)
elapsed =time.process_time() - t 
print(elapsed)

0.42570432899999844


In [220]:
def mle(data):
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-5, 1e2),  # temp
        (0, .25)  # p_error
    ]
    def loss(x):
        temp, p_error = x
        return -sum(dc_log_likelihood(env, trial, temp, p_error)
                    for env, trial in data)
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds)
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [221]:
def mle_wrap(p):
    d = mle(participants[p])
    d['participant']:p
    return d

In [222]:
mles=dict()
p = list(participants.keys())[0]
t = time.process_time()
mles[p] = mle_wrap(p)
elapsed =time.process_time() - t 
print(elapsed)
print(mles[p])

11.79423328899999
{'temp': 100.0, 'p_error': 0.25, 'logp': -254.29114630683654}


In [89]:
sum(dc_log_likelihood(env, trial, 100.0, 0.25) for env, trial in data)

-3472.9150719069462

In [223]:
mles = dict()
for p in participants.keys():
    print(p)
    data = participants[p]
    mles[p] = mle(data)
# mles = Parallel(n_jobs=20)(delayed(mle_wrap)(p)
#                            for p in participants.keys())

1
2


KeyboardInterrupt: 

In [None]:
np.save('dc_mles',mles)

In [95]:
np.load('dc_mles.npy')

array([{'temp': 100.0, 'p_error': 0.25, 'logp': -3472.9150719069462}], dtype=object)

# Scratch

In [21]:
def parse_options_combined(env,click_sequence,temps=np.logspace(-3,1,50),p_errs=np.linspace(0,0.25,25)):
    
    #count things
    n_temps = len(temps)
    n_p_errs = len(p_errs)
    
    #base case
    if click_sequence == []:
        return True, [[]], [np.ones((n_temps,n_p_errs))]
    
    #get the info you'll need for parsing
    paths = env.paths
    options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
    option_insts = dict() #list of all possible option instantiations
    for option in options:
        path, obs = option
        option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)
    
    #set up your return values
    option_seqs = []
    likelihoods = []
    done = False
        
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_utils = np.array(option_utils)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
    n_available_clicks = len(option_insts[sc_opt])-1
    
    #parsing
    for i,j in product(range(1,min(len(paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst):
                    copy_env = copy.deepcopy(env)                   
                    for a in click_sequence[:i]:
                        copy_env._step(a)
                        
                    will_done, remaining, rem_likelihoods = parse_options_combined(copy_env,click_sequence[i:])
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter)
                            
                            for l,m in product(range(n_temps),range(n_p_errs)):
                                l_opt_seq = np.zeros((n_temps,n_p_errs))
                                t = temps[l]
                                p_err = p_errs[m]
                                l_opt_seq[l,m] = (1-p_err)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils[:-1]))
                                l_opt_seq[l,m] += p_err*np.prod([1/(n_available_clicks-k+1) for k in range(1,option[1]+1)])
                                l_opt_seq *= rem_likelihoods[k][l,m]
                            
                                likelihoods.append(l_opt_seq)
    return done, option_seqs, likelihoods