In [22]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import itertools as it
from itertools import product
from collections import Counter, defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats
import copy
import time
sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)

from agents import Agent
from evaluation import get_util
from joblib import Parallel, delayed
from dc_util import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
import sys
sys.version

'3.5.4 |Anaconda custom (64-bit)| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [24]:
import ipyparallel as ipp 
rc = ipp.Client(profile='default', cluster_id='')
ipp.register_joblib_backend()

The following equation gives the likelihood of a click sequence in this model:

$$ l(C) = \sum_{O \in \mathcal{O}(C)} l(O)\mathbb{P}(C|O)$$

where $C$ is a click sequence, and $\mathcal{O}(C)$ is the set of option sequences that $C$ could parse into. To get the likelihood of an option sequence, we use the following equation:

$$ l(O) = \prod_{o \in O} \mathbb{P}_{DC}(o) $$

$$ \mathbb{P}(C|O) = \prod_{o \in O} \mathbb{P}(i_o|o, s) $$ 

where $\mathbb{P}_{DC}(o)$ is the probability of a given option in our model. We assume a generative model that picks a random move with probability $p_r$, or otherwise picks from one of the available options under the directed cognition model. This gives us the following equation: 

$$\mathbb{P}_{DC}(o) = (1-p_r)*s(o)+p_r*\alpha$$ 

$s(o)$ is the softmax probability of our option among all available directed cognition options, which, for a given temperature parameter $t = \frac{1}{\beta}$ is equal to:

$$ s(o)= \frac{e^{\beta v(o)}}{\sum_{o'} e^{\beta v(o')}} $$

where $v(o)$ is the value of an option using the directed cognition model. We define $\alpha$ as the probability that an option in the sequence would have been generated by the error process, and it is defined as:

$$\alpha = \prod_{k = 1}^ \text{length(o)} \frac{1}{n_{ac}-k+1}$$

where $n_{ac}$ is the number of available clicks.

In [4]:
def wrap_po(env,click_sequence,t=1,p_rand=0,branching=[3,1,2]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
#                             l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                                         + p_rand*np.prod([1/(n_available_clicks-k) for k in range(option[1])]))
                            alpha = 1 if option == (-1,1) else 0 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                    + p_rand*alpha)
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [5]:
def dc_log_likelihood(env, trial, temp, p_error):
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

# Modeling

## Experiment 1

In [6]:
def make_env(mu=0, sigma=5, quantization=4, cost=1.00, seed=None, branching=[3,1,2], **kwargs):
    if seed is not None:
        np.random.seed(seed)
#     print(branching)
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            return Categorical(mu + sigma * x)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

env = make_env(ground_truth=False)

In [7]:
from analysis_utils import *
VERSION = 'c1.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].copy()
# print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].query('pid == @complete').copy()
mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

# pdf['total_time'] = exp_data['survey'].time_elapsed / 60000

pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

Error importing rpy2


In [8]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').copy()
    sdf = pd.DataFrame(list(sdf.responses), index=sdf.index)
    correct = pd.Series(['-$10 to $10', '$1', '1 cent for every $1 you make in the game'])
    fail_quiz = (sdf != correct).sum(axis=1) > 1
    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

exclude = excluded_pids()
tdf['exclude'] = list(exclude.loc[tdf.pid])
tdf = tdf.query('~exclude').copy().drop('exclude', axis=1)
# print(f'excluding {exclude.sum()} out of {len(exclude)} partipicants')

In [9]:
def get_env(state_rewards,branching=[3,1,2]):
    state_rewards[0] = 0
    return make_env(ground_truth=state_rewards,branching=branching)
tdf['env'] = tdf.state_rewards.apply(get_env)

In [10]:
tdf.head()

Unnamed: 0,action_times,actions,block,path,queries,rewards,rt,score,simulation_mode,state_rewards,time_elapsed,trial_index,trial_time,trial_id,trial_type,pid,clicks,n_clicks,thinking,env
90,"[11467, 13434, 16274]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseout': {'edge': {'time': [], 'target': [...","[5, -5, -5]","[11465, 1450, 2324]",-6.0,"[None, None, None]","[0, 5, -10, -5, 10, 10, 5, -10, 5, 5, -5, -5, 5]",728336,0,20130.0,8034619116489218048,mouselab-mdp,1,[9],1,11465,<MouselabEnv instance>
91,"[7272, 9200, 10360]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseout': {'edge': {'time': [], 'target': [...","[10, -5, 10]","[7270, 1410, 654]",13.0,"[None, None, None]","[0, -5, 10, -5, 10, 5, 5, -10, 5, 10, -5, 10, 10]",741664,1,12248.0,38024071126860576,mouselab-mdp,1,"[5, 9]",2,7270,<MouselabEnv instance>
92,"[8883, 10283, 11587]","[right, right, up]",test,"[0, 5, 6, 7]","{'mouseout': {'edge': {'time': [], 'target': [...","[-10, -5, 5]","[8881, 894, 798]",-12.0,"[None, None, None]","[0, -10, 5, 10, 10, -10, -5, 5, 10, -10, -10, ...",755720,2,12971.0,8607347987074355200,mouselab-mdp,1,"[1, 9]",2,8881,<MouselabEnv instance>
93,"[7585, 9313, 10649]","[left, left, up]",test,"[0, 9, 10, 11]","{'mouseout': {'edge': {'time': [], 'target': [...","[5, 10, -10]","[7583, 1215, 818]",3.0,"[None, None, None]","[0, 5, -5, 10, -5, -5, 5, 10, -10, 5, 10, -10, 5]",769472,3,12665.0,4859092002678591488,mouselab-mdp,1,"[1, 5]",2,7583,<MouselabEnv instance>
94,"[3343, 4415, 5295]","[left, left, down]",test,"[0, 9, 10, 12]","{'mouseout': {'edge': {'time': [], 'target': [...","[-5, -10, 10]","[3341, 555, 369]",-5.0,"[None, None, None]","[0, -5, 5, 10, -10, 10, -10, 10, 5, -5, -10, 5...",778857,4,8295.0,4754602433487654912,mouselab-mdp,1,[],0,3341,<MouselabEnv instance>


In [11]:
j = 0
participants = dict()
for i, row in tdf.iterrows():
    if row['pid'] in participants.keys():
        participants[row['pid']].append((row['env'],row['clicks']))
    else:
        participants[row['pid']] =[(row['env'],row['clicks'])]

In [12]:
sm = 0
for p in participants.keys():
    data = participants[p]
    probs = []
    for episode in data:
        env, trial = copy.deepcopy(episode)
        trial = trial+[env.term_action]
        prob = 1
        for a in trial:
            prob*=1/len(list(env.actions(env._state)))
            env.step(a)
        probs.append(prob)
    sm+=np.log(np.prod(probs))
sm

-17289.756439310626

In [16]:
def wrap_sum_ll(p, temp, p_error):
    data = participants[p]
    return sum(dc_log_likelihood(env, trial, temp, p_error)
                    for env,trial in data)

In [17]:
mles = Parallel(n_jobs=60)(delayed(wrap_sum_ll)(p, 5, 1)
                           for p in participants.keys())

In [18]:
np.sum(mles)

-17289.756439310622

In [19]:
def mle(data):
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-2, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    def loss(x):
        temp, p_error = x
        return -sum(dc_log_likelihood(env, trial, temp, p_error)
                    for env, trial in data)
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                 options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [20]:
def mle_wrap(p):
    print(p)
    d = mle(participants[p])
    d['participant'] = p
    return d

In [21]:
mles = Parallel(n_jobs=60)(delayed(mle_wrap)(p)
                           for p in participants.keys())

1
3
7
6
8
4
2
13
15
10
14
11
17
18
12
19
25
16
20
26
27
22
30
29
24
28
31
33
34
35
36
37
39
40
45
42
46
47
48
49
51
50
52
53
54
55
56
61
57
58
60


In [23]:
np.save('data/dc_exp1_mles_p3',mles)

In [24]:
mles = np.load('data/dc_exp1_mles_p3.npy')

In [25]:
logps = []
for i in range(len(mles)):
    logps.append(mles[i]['logp'])
np.sum(logps)

-16424.973979672282

In [26]:
temps = [val['temp'] for val in mles]

In [27]:
min(temps)

0.23025944965094047

## Global

In [28]:
participants

{1: [(<mouselab.MouselabEnv at 0x2b2e2ac8dc18>, [9]),
  (<mouselab.MouselabEnv at 0x2b2e2ac8f630>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e2ac8fb38>, [1, 9]),
  (<mouselab.MouselabEnv at 0x2b2e2ac3d5f8>, [1, 5]),
  (<mouselab.MouselabEnv at 0x2b2e3105e710>, []),
  (<mouselab.MouselabEnv at 0x2b2e3105ec18>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e310820f0>, [1, 9]),
  (<mouselab.MouselabEnv at 0x2b2e31082588>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2b2e31082a20>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e31082eb8>, [9, 1]),
  (<mouselab.MouselabEnv at 0x2b2e3108d390>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e3108d828>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2b2e3108dcc0>, [5]),
  (<mouselab.MouselabEnv at 0x2b2e31096198>, [9, 5]),
  (<mouselab.MouselabEnv at 0x2b2e31096630>, [5]),
  (<mouselab.MouselabEnv at 0x2b2e31096ac8>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e31096f60>, [5]),
  (<mouselab.MouselabEnv at 0x2b2e3109f438>, [5, 9]),
  (<mouselab.MouselabEnv at 0x2b2e3109f8d

In [29]:
DATA = []
for p in participants.keys():
    DATA += participants[p]

In [30]:
len(DATA)

1530

In [31]:
DATA[:10]

[(<mouselab.MouselabEnv at 0x2b2e2ac8dc18>, [9]),
 (<mouselab.MouselabEnv at 0x2b2e2ac8f630>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2b2e2ac8fb38>, [1, 9]),
 (<mouselab.MouselabEnv at 0x2b2e2ac3d5f8>, [1, 5]),
 (<mouselab.MouselabEnv at 0x2b2e3105e710>, []),
 (<mouselab.MouselabEnv at 0x2b2e3105ec18>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2b2e310820f0>, [1, 9]),
 (<mouselab.MouselabEnv at 0x2b2e31082588>, [9, 5]),
 (<mouselab.MouselabEnv at 0x2b2e31082a20>, [5, 9]),
 (<mouselab.MouselabEnv at 0x2b2e31082eb8>, [9, 1])]

In [32]:
def dc_idx_log_likelihood(idx, temp, p_error):
#     print(idx)
    env, trial = DATA[idx]
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error)
    return np.log(np.sum(likelihoods))

In [37]:
t = time.time()
mles = Parallel(n_jobs=200)(delayed(dc_idx_log_likelihood)(data_idx, 5, 1)
                           for data_idx in range(len(DATA)))
elapsed = time.time() - t 
print(elapsed)

26.081668615341187


In [38]:
np.sum(mles)

-17289.756439310622

In [41]:
def mle_global():
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-2, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    with Parallel(200) as parallel:
        def loss(x):
            temp, p_error = x
            jobs = (delayed(dc_idx_log_likelihood)(data_idx, temp, p_error)
                    for data_idx in range(len(DATA)))
            tot = -sum(parallel(jobs))
            return tot
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                  options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [42]:
results = mle_global()

In [44]:
results

{'logp': -17003.66884203937, 'p_error': 0.25, 'temp': 1.4061493262821758}

## Experiment 2

In [25]:
def wrap_po(env,click_sequence,t=1,p_rand=0,branching=[3,1,2],scaling_factors=[1, 1, 1]):
    memo = dict() 
    def parse_options_clean(init_state,pre_acts,click_sequence,t=1,p_err=0.001):
        if click_sequence == []: 
            return True, [[]], [1]
        if (tuple(pre_acts),tuple(click_sequence),t,p_err) in memo:
            return memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)]
        
        envc = make_env(ground_truth=init_state, branching=branching,scaling_factors=scaling_factors)
        envc.reset()
        for a in pre_acts:
            envc._step(a)

        option_seqs = []
        likelihoods = []
        done = False
        options, option_insts, option_utils,n_available_clicks = get_all_options(envc)

        for i,j in product(range(1,min(len(envc.paths[0]),len(click_sequence))+1),range(len(options))):  
            option = options[j]
#             n_insts = 1 if option == (-1,1) else len(option_insts[option])
            n_insts = len(option_insts[option])
            for inst in option_insts[option]:      
                if np.array_equal(click_sequence[:i],inst): 
                    will_done, remaining, rem_likelihoods = (parse_options_clean
                                  (init_state,pre_acts+click_sequence[:i],click_sequence[i:],t,p_rand))
                    done = done or will_done  
                    if done:
                        for k in range(len(remaining)): 
                            option_seqs.append([option]+remaining[k]) 
#                             l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
#                                         + p_rand*np.prod([1/(n_available_clicks-k) for k in range(option[1])]))
                            alpha = 1 if option == (-1,1) else 0 
                            l_opt_seq = ((1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(np.exp(1/t*option_utils))
                                    + p_rand*alpha)
                            likelihoods.append(l_opt_seq*rem_likelihoods[k]/n_insts)
                            
        memo[(tuple(pre_acts),tuple(click_sequence),t,p_err)] = done, option_seqs, likelihoods
        return done, option_seqs, likelihoods
    return parse_options_clean(env.ground_truth,[],click_sequence+[env.term_action],t,p_rand)

In [26]:
from mouselab import MouselabEnv
from distributions import Categorical, Normal

def make_env(mu=0, sigma=4, branching=[3,1,2], cost=1.00, scaling_factors=[1, 1, 1], seed=None, **kwargs):
    if seed is not None:
        np.random.seed(seed)
    
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            vals = mu + sigma * x * scaling_factors[depth-1]
            return Categorical(vals).apply(round)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

In [27]:
from analysis_utils import *
VERSION = 'c2.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].set_index('pid', drop=False)
if 'variance' in pdf:
    pdf.variance = pdf.variance.replace('24_4_2', 'decreasing').replace('2_4_24', 'increasing')
else:
    pdf['variance'] = 'constant'

# print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].set_index('pid', drop=False)
# mdf = exp_data['mouselab-mdp']
mdf = mdf.loc[complete]

mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))
mdf['variance'] = pdf['variance']

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

pdf['total_time'] = exp_data['survey-text'].time_elapsed / 60000
pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

Defaulting to column but this will raise an ambiguity error in a future version
Defaulting to column but this will raise an ambiguity error in a future version
Defaulting to column but this will raise an ambiguity error in a future version


In [28]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').set_index('pid')
    responses = pd.DataFrame(list(sdf.responses), index=sdf.index)
    grp = responses.groupby(lambda pid: pdf.variance[pid])
    correct = grp.apply(lambda x: x.mode().iloc[0])
    errors = correct.loc[pdf.variance].set_index(pdf.index) != responses
    fail_quiz = errors.sum(1) > 1

    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

excluded = excluded_pids()
tdf = tdf.loc[~excluded]
# print(f'excluding {excluded.sum()} out of {len(excluded)} partipicants')

Defaulting to column but this will raise an ambiguity error in a future version
  # Remove the CWD from sys.path while we load stuff.


In [29]:
SCALING = {
    'increasing': [1/2, 1, 6],
    'decreasing': [6, 1, 1/2]
}

In [30]:
def get_env(row):
    row.state_rewards[0] = 0
    return make_env(scaling_factors=SCALING[row.variance], 
                    ground_truth=row.state_rewards)
tdf['env'] = tdf.apply(get_env, axis=1)

In [31]:
tdf.head()

Unnamed: 0_level_0,action_times,actions,block,path,queries,rewards,rt,score,simulation_mode,state_rewards,...,trial_index,trial_time,trial_id,trial_type,pid,clicks,n_clicks,thinking,variance,env
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,"[14748, 15862, 17187]","[left, left, up]",test,"[0, 9, 10, 11]","{'click': {'edge': {'time': [], 'target': []},...","[-48, 8, -2]","[14746, 590, 809]",-46.0,"[None, None, None]","[0, -48, 4, -2, 4, -48, -8, -2, 4, -48, 8, -2,...",...,0,18927.0,2045193736830000128,mouselab-mdp,0,"[5, 9, 6, 10]",4,14746,decreasing,<MouselabEnv instance>
0,"[3527, 4321, 5260]","[left, left, up]",test,"[0, 9, 10, 11]","{'click': {'edge': {'time': [], 'target': []},...","[48, 4, 2]","[3525, 268, 409]",52.0,"[None, None, None]","[0, 24, 4, -2, 4, 24, -8, -2, 4, 48, 4, 2, 2]",...,1,8684.0,-8875376664199999488,mouselab-mdp,0,"[5, 9]",2,3525,decreasing,<MouselabEnv instance>
0,"[4544, 5403, 6581]","[left, left, down]",test,"[0, 9, 10, 12]","{'click': {'edge': {'time': [], 'target': []},...","[-24, -8, -4]","[4543, 328, 652]",-38.0,"[None, None, None]","[0, 48, -8, -2, -2, -48, 8, -2, 2, -24, -8, -4...",...,2,9475.0,-7461013785340000256,mouselab-mdp,0,"[5, 9]",2,4543,decreasing,<MouselabEnv instance>
0,"[2789, 3497, 4189]","[right, right, up]",test,"[0, 5, 6, 7]","{'click': {'edge': {'time': [], 'target': []},...","[48, -8, 4]","[2788, 185, 172]",42.0,"[None, None, None]","[0, 24, -4, -2, -4, 48, -8, 4, -4, 24, -8, 4, -4]",...,3,9571.0,8449844250360000512,mouselab-mdp,0,"[5, 9]",2,2788,decreasing,<MouselabEnv instance>
0,"[4085, 4761, 5677]","[left, left, up]",test,"[0, 9, 10, 11]","{'click': {'edge': {'time': [], 'target': []},...","[48, -4, 4]","[4084, 159, 395]",46.0,"[None, None, None]","[0, 48, -8, 4, 2, -24, -4, 2, -2, 48, -4, 4, 4]",...,4,9995.0,1644612169790000128,mouselab-mdp,0,"[5, 9]",2,4084,decreasing,<MouselabEnv instance>


In [32]:
j = 0
participants = dict()
for i, row in tdf.iterrows():
    if row['pid'] in participants.keys():
        participants[row['pid']].append((row['env'],row['clicks'],SCALING[row['variance']]))
    else:
        participants[row['pid']] =[(row['env'],row['clicks'],SCALING[row['variance']])]

In [33]:
env, trial, scaling = participants[0][6]
a,b,c = wrap_po(env,trial,t=2,p_rand=0.01,scaling_factors=scaling)
print(np.log(sum(c)))

-7.369432374229789


In [34]:
def dc_scaling_log_likelihood(env, trial, temp, p_error, scaling):
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error, scaling_factors = scaling)
    return np.log(np.sum(likelihoods))

In [35]:
dc_scaling_log_likelihood(env, trial, 0.44168330742195183, 0.25, scaling)

-6.341585044284749

In [36]:
sum(dc_scaling_log_likelihood(env, trial, 0.44168330742195183, 0.25, scaling)
                    for env,trial,scaling in participants[0])

-197.86969184362766

In [37]:
def wrap_scaling_sum_ll(p, temp, p_error):
    data = participants[p]
    return sum(dc_scaling_log_likelihood(env, trial, temp, p_error, scaling)
                    for env,trial,scaling in data)

In [38]:
wrap_scaling_sum_ll(0,0.44168330742195183,0.25)

-197.86969184362766

In [119]:
def mle(data):
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-1, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    def loss(x):
        temp, p_error = x
        return -sum(dc_scaling_log_likelihood(env, trial, temp, p_error, scaling)
                    for env, trial, scaling in data)
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                 options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [120]:
def mle_wrap(p):
    print(p)
    d = mle(participants[p])
    d['participant'] = p
    return d

In [121]:
[mle_wrap(p)for p in [0]]

0


[{'logp': -197.86969184362766,
  'p_error': 0.25,
  'participant': 0,
  'temp': 0.44168330742195183}]

In [122]:
# mles = dict()
# for p in participants.keys():
#     print(p)
#     data = participants[p]
#     mles[p] = mle(data)
mles = Parallel(n_jobs=60)(delayed(mle_wrap)(p)
                           for p in participants.keys())

0
7
3
4
5
15
9
11
13
8
19
22
20
24
17
21
23
28
29
30
26
27
31
41
33
32
35
38
36
46
39
44
47
53
61
56
50
55
54
57
60
58
63
67
42
65
69
66
62
71
72
68
70


In [123]:
np.save('data/dc_exp2_mles_p5',mles)

In [124]:
mles = np.load('data/dc_exp2_mles_p5.npy')

In [125]:
logps = []
for i in range(len(mles)):
    logps.append(mles[i]['logp'])
np.sum(logps)

-12469.805447303559

In [126]:
temps = [val['temp'] for val in mles]

In [127]:
min(temps)

0.09999999999999964

## Global

In [39]:
participants

{0: [(<mouselab.MouselabEnv at 0x2b1f5f6ace80>, [5, 9, 6, 10], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f5f5b8b00>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f5f5f3e10>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f5f6bcb38>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f86b6ab00>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f5f6bc128>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f86b6ada0>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c4e0f0>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c23f60>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c23780>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c232e8>, [9, 5], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c230b8>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c12a20>, [5], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c12630>, [5, 9], [6, 1, 0.5]),
  (<mouselab.MouselabEnv at 0x2b1f52c12198>, [5, 9], [6

In [40]:
DATA = []
for p in participants.keys():
    DATA += participants[p]

In [41]:
len(DATA)

1590

In [42]:
DATA[:10]

[(<mouselab.MouselabEnv at 0x2b1f5f6ace80>, [5, 9, 6, 10], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f5f5b8b00>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f5f5f3e10>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f5f6bcb38>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f86b6ab00>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f5f6bc128>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f86b6ada0>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f52c4e0f0>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f52c23f60>, [5, 9], [6, 1, 0.5]),
 (<mouselab.MouselabEnv at 0x2b1f52c23780>, [5, 9], [6, 1, 0.5])]

In [43]:
def dc_scaling_idx_log_likelihood(idx, temp, p_error):
#     print(idx)
    env, trial, scaling = DATA[idx]
    done, option_seqs, likelihoods = wrap_po(env,trial,t=temp,p_rand=p_error,scaling_factors=scaling)
    return np.log(np.sum(likelihoods))

In [44]:
t = time.process_time()
env, trial, scaling = participants[0][0]
print(dc_scaling_log_likelihood(env, trial, 2, 0.01, scaling))
elapsed =time.process_time() - t 
print(elapsed)

-23.37131197884898
0.0663213560000031


In [45]:
t = time.process_time()
print(dc_scaling_idx_log_likelihood(0, 2, 0.01))
elapsed =time.process_time() - t 
print(elapsed)

-23.37131197884898
0.06538293100000203


In [46]:
sm = 0
for p in participants.keys():
    data = participants[p]
    probs = []
    for episode in data:
        env, trial, scaling = copy.deepcopy(episode)
        trial = trial+[env.term_action]
        prob = 1
        for a in trial:
            prob*=1/len(list(env.actions(env._state)))
            env.step(a)
        probs.append(prob)
    sm+=np.log(np.prod(probs))
sm

-14647.59716944564

In [47]:
t = time.time()
mles = Parallel(n_jobs=200)(delayed(dc_scaling_idx_log_likelihood)(data_idx, 5, 1)
                           for data_idx in range(len(DATA)))
elapsed = time.time() - t 
print(elapsed)

4.349771976470947


In [48]:
np.sum(mles)

-14647.597169445642

In [49]:
t = time.time()
mles = Parallel(n_jobs=60)(delayed(wrap_scaling_sum_ll)(p, 5, 1)
                           for p in participants.keys())
elapsed = time.time() - t 
print(elapsed)

3.881887197494507


In [50]:
np.sum(mles)

-14647.59716944564

In [51]:
def mle_global():
# You can adjust the temperature bounds if you think the MLE
# is not in the bounds below. Don't change the p_error bounds.
    bounds = [
        (1e-1, 1e2),  # temp
        (0, 0.25)  # p_error
    ]
    with Parallel(200) as parallel:
        def loss(x):
            temp, p_error = x
            jobs = (delayed(dc_scaling_idx_log_likelihood)(data_idx, temp, p_error)
                    for data_idx in range(len(DATA)))
            tot = -sum(parallel(jobs))
            if np.load('data/dc_exp2_result2.npy')[2] > tot:
                np.save('data/dc_exp2_result2',[temp,p_error,tot])
            return tot
    res = scipy.optimize.minimize(loss, (1,.01),bounds = bounds,
                                  options={'maxiter':100,'disp':True})
    temp, p_error = res.x
    return {'temp': res.x[0], 'p_error': res.x[1], 'logp': -res.fun}

In [53]:
np.save('data/dc_exp2_result3',[1,.01,-np.inf])
results = mle_global()

In [54]:
results

{'logp': -12668.14942530896, 'p_error': 0.25, 'temp': 0.4411434913930282}

## Behavior Analysis

In [55]:
mles = Parallel(n_jobs=200)(delayed(dc_scaling_idx_log_likelihood)(data_idx, 0.1, 0.00000001)
                           for data_idx in range(len(DATA)))

In [56]:
examples = np.argsort(mles)[::-1][:10]

In [57]:
for ex in examples:
    print(DATA[ex])
    print(pick_option_moves(DATA[ex][0]))

(<mouselab.MouselabEnv object at 0x2b1f528ff240>, [5, 1, 9, 6, 2, 3], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2b1f869b3fd0>, [5, 1, 9, 2, 10, 11, 12], [6, 1, 0.5])
[1]
(<mouselab.MouselabEnv object at 0x2b1f867992b0>, [5, 6], [6, 1, 0.5])
[1]
(<mouselab.MouselabEnv object at 0x2b1f867a2550>, [1, 2], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2b1f51fb9898>, [1, 9, 5, 6], [6, 1, 0.5])
[9]
(<mouselab.MouselabEnv object at 0x2b1f51fbe198>, [9, 1, 5, 6], [6, 1, 0.5])
[5]
(<mouselab.MouselabEnv object at 0x2b1f528f4358>, [5, 9, 1, 2], [6, 1, 0.5])
[1]
(<mouselab.MouselabEnv object at 0x2b1f51b34940>, [5, 1, 9, 10], [6, 1, 0.5])
[5]
(<mouselab.MouselabEnv object at 0x2b1f525317f0>, [5, 9, 1, 2], [6, 1, 0.5])
[5]
(<mouselab.MouselabEnv object at 0x2b1f51fcc048>, [5, 1, 9, 10], [6, 1, 0.5])
[5]


In [58]:
for i in range(1):
    env = make_env(scaling_factors=SCALING['increasing'])
    print(get_all_options(env))
    print(pick_option_moves(env))

([(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3), (4, 1), (4, 2), (4, 3), (5, 1), (5, 2), (5, 3), (-1, 1), (-99, 1)], {(0, 1): [[3]], (1, 2): [[4, 2]], (3, 2): [[8, 6]], (1, 3): [[4, 2, 1]], (3, 3): [[8, 6, 5]], (5, 2): [[12, 10]], (3, 1): [[8]], (-1, 1): [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13]], (2, 1): [[7]], (1, 1): [[4]], (-99, 1): [[13]], (2, 3): [[7, 6, 5]], (4, 3): [[11, 10, 9]], (2, 2): [[7, 6]], (5, 1): [[12]], (4, 2): [[11, 10]], (0, 3): [[3, 2, 1]], (4, 1): [[11]], (0, 2): [[3, 2]], (5, 3): [[12, 10, 9]]}, array([14.139, 13.348, 12.399, 14.139, 13.348, 12.399, 14.139, 13.348, 12.399, 14.139, 13.348, 12.399, 14.139, 13.348, 12.399, 14.139, 13.348, 12.399,   -inf,  0.   ]), 13)
[3]


In [59]:
examples = np.argsort(mles)[:10]
for ex in examples:
    print(DATA[ex])
    print(pick_option_moves(DATA[ex][0]))

(<mouselab.MouselabEnv object at 0x2b1f521a8f28>, [1, 2, 3, 4, 9, 10, 11, 12, 5, 6, 7], [0.5, 1, 6])
[12]
(<mouselab.MouselabEnv object at 0x2b1f521c5b38>, [5, 6, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[12]
(<mouselab.MouselabEnv object at 0x2b1f5251cf28>, [5, 6, 8, 7, 9, 10, 11, 12, 1, 2, 3], [0.5, 1, 6])
[7]
(<mouselab.MouselabEnv object at 0x2b1f5251f240>, [5, 9, 10, 11, 12, 1, 2, 3, 4, 6, 7, 8], [0.5, 1, 6])
[7]
(<mouselab.MouselabEnv object at 0x2b1f521a07f0>, [5, 6, 8, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[12]
(<mouselab.MouselabEnv object at 0x2b1f521c56a0>, [5, 6, 7, 9, 10, 11, 12, 1, 2, 3, 4], [0.5, 1, 6])
[7]
(<mouselab.MouselabEnv object at 0x2b1f521b3be0>, [9, 10, 12, 11, 5, 6, 7], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2b1f521c5208>, [9, 5, 1, 2, 3, 4, 10, 11, 12, 6, 7], [0.5, 1, 6])
[8]
(<mouselab.MouselabEnv object at 0x2b1f5237f400>, [5, 6, 8, 7, 9, 10, 11], [0.5, 1, 6])
[4]
(<mouselab.MouselabEnv object at 0x2b1f521ae9e8>, [1, 2, 9, 10, 5, 6, 8