In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import itertools as it
from collections import Counter, defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats
import itertools
sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)

from agents import Agent
from evaluation import get_util
# from model_utils import *

In [2]:
from mouselab import MouselabEnv
from distributions import Categorical, Normal

def make_envs(cost=1.00, n=100, seed=None,variance_structure="constant_high",branching=[4,1,2]):
    if seed is not None:
        np.random.seed(seed)
    
    depth = len(branching)
    
    if variance_structure is "constant_high":
        sigmas = np.concatenate( (np.array([0]),20*np.ones(depth)))
    if variance_structure is "increasing":
        sigmas = [0, 2, 4, 20]
    if variance_structure is "decreasing":
        sigmas = [0,20,4,2]
    if variance_structure is "constant_low":
        sigmas = np.concatenate( (np.array([0]),3*np.ones(depth)))
        
    def reward(depth):
        if depth > 0:
            return Normal(0, sigmas[depth]).to_discrete(6)
        return 0.

    envs = [MouselabEnv.new_symmetric(branching, reward)
            for _ in range(n)]
    for env in envs:
        env.cost=-cost
    
    return envs

In [3]:
cost = 1
envs = make_envs(cost,10,None,"increasing",branching=[2,2])
env = envs[0]

In [4]:
env.paths

[[1, 2], [1, 3], [4, 5], [4, 6]]

In [5]:
env._state

(0, Cat, Cat, Cat, Cat, Cat, Cat)

In [6]:
env.tree

[[1, 4], [2, 3], [], [], [5, 6], [], []]

In [7]:
# fancy initialization function, but doesn't seem necessary
# simpler version built into the class
def build_path(r):   
    if env.tree[r] == []:
        return ([[]], [0])
    paths = []
    path_moves = []
    for n in env.tree[r]:
        new_paths, new_path_moves = build_path(n)
        for i in range(len(new_paths)):
            new_paths[i].insert(0,n)
            new_move = int(hasattr(env._state[n],'sample'))
            path_moves.append(new_path_moves[i]+new_move)
            paths.append(new_paths[i])
    return (paths,path_moves)

In [8]:
def option_util(x,sigma):
    return sigma*scipy.stats.norm.pdf(x/sigma) - np.abs(x)*scipy.stats.norm.cdf(-np.abs(x)/sigma)

In [9]:
def get_all_options(env):
    paths = env.paths #list of all paths
    avail_moves = [0,]*len(paths) #list of moves available in each path
    path_obs = [] #value of observed nodes in each path
    path_nodes = [] #the unobserved nodes of each path
    path_stds = [] #the std deviation of the unobserved nodes of each path
    
    options = [] #list of all options
    option_utils = [] #list of the utility of each option
    
    for i in range(len(paths)):
        stds = []
        nodes = []
        obs = 0
        
        for node in paths[i]:
            if hasattr(env._state[node],'sample'):
                stds.append(env._state[node].var())
                nodes.append(node)
                avail_moves[i] += 1
            else:
                obs += env._state[node]
                
        path_obs.append(obs)
        path_stds.append(stds)
        path_nodes.append(nodes)
        
        for j in range(avail_moves[i]):
            options.append((i,j+1))
    max_obs = np.max(path_obs)
    
    for option in options:
        path, obs = option
        option_utils.append(option_util(path_obs[path]-max_obs,np.sqrt(np.sum(path_stds[path][:obs]))) + obs*env.cost)
    
    return options, option_utils, path_nodes, path_stds, path_obs, avail_moves

In [10]:
def pick_option_moves(env):
    options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
    #c is for chosen
    cpath, cobs = options[np.random.choice(np.arange(len(options))[option_utils == np.max(option_utils)])]
    cpath_stds = np.array(path_stds[cpath])[:cobs]
    cpath_nodes = np.array(path_nodes[cpath])[:cobs]
    b = np.random.random(cpath_nodes.size)
    
    return cpath_nodes[np.lexsort((b,cpath_stds))]

In [11]:
import time
t = time.process_time()
for i in range(1000):
#     print(get_options(env))
    pick_option_moves(env)
elapsed =time.process_time() - t 
print(elapsed)

8.827519306000001


In [12]:
def all_option_insts(path_nodes,path_stds,n_obs):
    insts = [[]]
    n_remaining_obs = n_obs

    vals, inverse, count = np.unique(path_stds, return_inverse=True,
                                  return_counts=True)
    rows, cols = np.where(inverse == np.arange(len(vals))[:, np.newaxis])
    _, inverse_rows = np.unique(rows, return_index=True)
    res = np.split(cols, inverse_rows[1:])

    for i in range(len(res)):
        new_insts = []

        n_new_nodes = len(res[-i-1])
        if n_new_nodes < n_remaining_obs:
            n_remaining_obs -= n_new_nodes
        else:
            n_new_nodes = n_remaining_obs
            n_remaining_obs = 0  

        for new_nodes in itertools.permutations(res[-i-1],n_new_nodes):
            for inst in insts:
                new_insts.append(inst + list(np.array(path_nodes)[list(new_nodes)]))
        insts = new_insts
        if n_remaining_obs == 0:
            break
            
    return insts

In [13]:
path_stds= [10.00,10.00,9,8.0,8,7,7,6]
path_nodes = np.arange(len(path_stds))
all_option_insts(path_nodes,path_stds,6)

[[0, 1, 2, 3, 4, 5],
 [1, 0, 2, 3, 4, 5],
 [0, 1, 2, 4, 3, 5],
 [1, 0, 2, 4, 3, 5],
 [0, 1, 2, 3, 4, 6],
 [1, 0, 2, 3, 4, 6],
 [0, 1, 2, 4, 3, 6],
 [1, 0, 2, 4, 3, 6]]

In [18]:
#reorganize using modularity
#try nested function (helper)
def parse_options(env,click_sequence,t=1,p_rand=0.0001):
    if click_sequence == []:
        return True, [[]], [1]
    option_insts = dict() #list of all possible option instantiations
    option_seqs = []
    likelihoods = []
    done = False
    
    paths = env.paths
    options, option_utils, path_nodes, path_stds, path_obs, avail_moves = get_all_options(env)
    
    for option in options:
        path, obs = option
        option_insts[option] = all_option_insts(path_nodes[path],path_stds[path],obs)
        
    #single click options
    sc_opt = (-1,1)
    options.append(sc_opt)
    option_utils.append(-np.inf)
    option_insts[sc_opt] = [[a] for a in env.actions(env._state)]
        
    for i in range(1,min(len(paths[0]),len(click_sequence))+1):  
        for j in range(len(options)):
            option = options[j]
            for inst in option_insts[option]:            
                if np.array_equal(click_sequence[:i],inst):
                    
                    copy_env = copy.deepcopy(env)                   
                    for a in click_sequence[:i]:
                        copy_env._step(a)
                        
                    will_done, remaining, rem_likelihoods = parse_options(copy_env,click_sequence[i:])
                    done = done or will_done
                    
                    if done:
                        for k in range(len(remaining)): 
                            latter = remaining[k]
                            option_seqs.append([option]+latter)
                            l_opt_seq = (1-p_rand)*np.exp(1/t*option_utils[j])/np.sum(1/t*np.exp(option_utils))*rem_likelihoods[k]
                            l_opt_seq += p_rand*1/len(options)
                            likelihoods.append(l_opt_seq)
    
    return done, option_seqs, likelihoods

In [19]:
cost = 1
envs = make_envs(cost,10,None,"increasing",branching=[2,2])
env = envs[0]

In [20]:
list(env.actions(env._state))

[1, 2, 3, 4, 5, 6, 7]

In [21]:
click_seq = [4,5,1,3]
done,option_seqs,likelihoods = parse_options(env,click_seq)
option_seqs

NameError: name 'copy' is not defined

In [312]:
np.exp(np.log(np.sum(likelihoods))/len(click_seq))

0.081649658092772609

In [294]:
def dc_model_ave(env,click_seq,t=1,p_rand=0.0001):
    done,option_seqs,likelihoods = parse_options(env,click_seq,t,p_rand)
    return np.exp(np.log(np.sum(likelihoods))/len(click_seq))

In [295]:
dc_model_ave(env,click_seq)

0.3690103614353254