In [None]:
import importlib

import matplotlib.pyplot as plt
import numpy as np
rng = np.random.default_rng(seed=12345)
from tqdm import tqdm

import agents
import users
import utils

importlib.reload(agents);
importlib.reload(users);
importlib.reload(utils);

In [None]:
# rewrite user model
class UserModel():

    def __init__(self, num_targets):
        self.num_targets = num_targets

    def emission_probability(self, user_state, app_action):
        """
        Vectorized user emission probability.
        user_state (np.array): (num_particles, num_state_dims)
        app_action (np.array): (num_plans, num_timesteps)
        """
        f0 = user_state[:,0][:,None, None]
        f1 = f0
        target = user_state[:,-1][:,None, None]
        
        is_equal = np.isclose(target, app_action) # (particle x plan x timestep)
        is_lower = target < app_action
        p = np.empty(shape=(2,) + is_lower.shape)
        p[0] = 0.5*is_equal + (1-is_equal)*( (1-f0) * is_lower + f1 * (1-is_lower) )
        p[1] = 0.5*is_equal + (1-is_equal)*( f0 * is_lower + (1-f1) * (1-is_lower) )
        return p

In [None]:
num_targets = 2**6

user = users.UserSimulator(target=4, f0=0.1, f1=0.1, rng=rng)
agent = agents.AInfAgentContinuous(model=users.UserModel(num_targets), 
                                   rng=rng,
                                   max_f=0.5,
                                   init_particles=None, #beliefs[-1],
                                   num_particles=1_000,
                                   planning_horizon=1,
                                   weight_info_gain=1.,
                                   select_optimal_plan=True)

oo_system = [agent.reset()]
beliefs = [agent.copy_particles()]
pragmatic_values = [agent.pragmatic.copy()]
info_gains = [agent.info_gain.copy()]
oo_user = []

agent.show_particles()

In [None]:
oo_user.append(user.step(oo_system[-1]))
oo_system.append(agent.step(oo_user[-1]))
beliefs.append(agent.copy_particles())
pragmatic_values.append(agent.pragmatic.copy())
info_gains.append(agent.info_gain.copy())

fig, axes = plt.subplots(2, 3, figsize=(3*8, 2*6) )

ax = axes[0][0]
agent.show_particles(ax=ax)
ax.set_title('Particles in state spaces');
ax.set_xlabel('target')
ax.set_ylabel('f0 = f1')
# TODO: add colorbar for particle weight

hist, limits = agent.marginal_q_flip(bins=40)
ax = axes[0][1]
ax.plot((limits[:-1] + limits[1:])/2, hist)
ax.plot([user.f0, user.f0], ax.get_ylim(), 'k--', label='user target')
ax.set_title('Marginal belief over symmetric flip probabilities.');
ax.set_xlabel('P( f0 ), where f0=f1')

ax = axes[0][2]
ax.plot(agent.marginal_q_target(agent.particles))
ax.plot([user.target, user.target], ax.get_ylim(), 'k--', label='user target')
ax.set_title('Marginal belief over targets.');

# axes[1][0].plot(agent.pragmatic, label='pragmatic')
axes[1][0].plot(agent.info_gain, label='info gain')
# axes[1][0].plot(agent.nefe, label='-EFE')
ylim = axes[1][0].get_ylim()
axes[1][0].plot([user.target, user.target], ylim, 'k--', label='user target')
axes[1][0].legend()
utils.plot_interaction_sequence(oo_user, oo_system, user.target, ax=axes[1][1])

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(16, 3*6))
utils.plot_interaction_sequence(oo_user, oo_system, user.target, ax=axes[0])
q_target = np.asarray([agent.marginal_q_target(particles=belief) for belief in beliefs])
utils.plot_target_belief_distribution_sequence(q_target, user.target, ax=axes[2])
bins = np.linspace(-0.025, 0.525, 12)
p_flip = np.asarray([agent.marginal_q_flip(particles=belief, bins=bins)[0] for belief in beliefs])
utils.plot_f_belief_distribution_sequence(p_flip, user.f0, bins, ax=axes[1])

In [None]:
def run_experiment_ainf(num_targets, 
                        target, 
                        f0, 
                        f1, 
                        max_f, 
                        max_num_steps, 
                        num_particles, 
                        diffusion_scale_f,
                        diffusion_scale_target,
                        use_pragmatic_value, 
                        weight_info_gain,
                        init_particles=None):
    user = users.UserSimulator(target=target, f0=f0, f1=f1, rng=rng)
    
    # 1. test belief update
    agent = agents.AInfAgentContinuous(model=users.UserModel(num_targets), 
                                       rng=rng,
                                       max_f=max_f,
                                       init_particles=init_particles,
                                       num_particles=num_particles,
                                       diffusion_scale_f=diffusion_scale_f,
                                       diffusion_scale_target=diffusion_scale_target,
                                       planning_horizon=1,
                                       weight_info_gain=weight_info_gain,
                                       use_pragmatic_value=use_pragmatic_value,
                                       select_optimal_plan=True)
    
    oo_system = [agent.reset()]
    beliefs = [agent.copy_particles()]
    info_gains = [agent.info_gain.copy()]
    pragmatic_values = [agent.pragmatic.copy()]
    info_gains = [agent.info_gain.copy()]
    oo_user = []
    for _ in range(max_num_steps):
        
      oo_user.append(user.step(oo_system[-1]))
      oo_system.append(agent.step(oo_user[-1]))
      beliefs.append(agent.copy_particles())
      pragmatic_values.append(agent.pragmatic.copy())
      info_gains.append(agent.info_gain.copy())

    return oo_system, beliefs, pragmatic_values, info_gains, oo_user

In [None]:
k = 5
num_targets = 2**k
config = {
    'num_targets': 2**5,
    'target': 4,
    'f0': 0.2,
    'f1': 0.2,
    'max_f': 0.5,
    'max_num_steps': 100,
    'use_pragmatic_value': True,
    'weight_info_gain': 1.,
    'init_particles': None,
    'num_particles': 40 * num_targets, 
    'diffusion_scale_f': 0.005, 
    'diffusion_scale_target': 0}

oo_system, beliefs, pragmatic_values, info_gains, oo_user = run_experiment_ainf(**config)

fig, axes = plt.subplots(3, 1, figsize=(16, 3*6))
utils.plot_interaction_sequence(oo_user, oo_system, config['target'], ax=axes[0])
q_target = np.asarray([agent.marginal_q_target(particles=belief) for belief in beliefs])
utils.plot_target_belief_distribution_sequence(q_target, config['target'], ax=axes[2])
bins = np.linspace(-0.05, 1.05, 12) * config['max_f']
p_flip = np.asarray([agent.marginal_q_flip(particles=belief, bins=bins)[0] for belief in beliefs])
utils.plot_f_belief_distribution_sequence(p_flip, config['f0'], bins, ax=axes[1])

# axes[1].set_title('Marginal belief over flip probabilities Q( f ).');
# bins = np.linspace(0, 1, 12)
# p_flip = np.asarray([agent.marginal_q_flip(particles=belief, bins=bins)[0] for belief in beliefs])
# axes[1].imshow(p_flip.T, aspect='auto', origin='lower')
# axes[1].set(yticks=np.arange(11), yticklabels=[f'{x:.2}' for x in np.linspace(0,1, 11)]);
# axes[1].set_xlim(axes[0].get_xlim());

# axes[2].set_title('Marginal belief over targets Q( target ).');
# p_target = np.asarray([agent.marginal_q_target(particles=belief) for belief in beliefs])
# axes[2].imshow(p_target.T, aspect='auto', origin='lower')
# axes[2].set(yticks=np.arange(agent.model.num_targets)[::5]);
# axes[2].set_xlim(axes[0].get_xlim());

In [None]:
# experiment 1

def run_experiment_series(mode, polarity):
    print("-------------------")
    print(mode, polarity)
    print("-------------------")

    num_episodes = 5
    use_pragmatic_value = False
    weight_info_gain = 1.
    
    # targets
    k = 5 # k=5: 32 symbols, k=8: 256 symbols
    num_targets = 2**k
    target = np.arange(num_targets)

    # particles
    num_particles = 100 * num_targets
    diffusion_scale_f = 0.001 # 0.005
    diffusion_scale_target = 0 # 0.1
    
    # user error rates
    num_f = 11
    if (polarity == "unknown_polarity"):
        max_f = 1.
    elif (polarity == "known_polarity"):
        max_f = 0.5
    else:
        print("ERROR unknown polarity", polarity)
        max_f = None
        
    f0 = np.linspace(0, max_f, num_f)
    f0 = f0[1:-1] # exclude minimum and maximum from explored range
    f0 = np.array([0.1, 0.2, 0.3])
    
    # repetitions
    num_repetitions = 10
    repetition = np.arange(num_repetitions)
    
    target, f0, repetition = np.meshgrid(target, f0, repetition)
    target = target.reshape(-1)
    f0 = f0.reshape(-1)
    repetition = repetition.reshape(-1)
    num_conditions = len(target)
    
    if (mode == "symmetric"):
        f1 = f0
    elif (mode == "fully_biased"):
        f1 = np.zeros_like(f0)
    else:
        print("ERROR: unknown mode", mode)
        f1 = None
    
    conditions = {'target': target, 'f0': f0, 'f1': f1}
    log = []
    for i in tqdm(range(num_conditions)):
        config = {'num_targets': num_targets, 
                  'target': target[i],
                  'f0': f0[i],
                  'f1': f1[i],
                  'max_f': max_f,
                  'num_particles': num_particles,
                  'diffusion_scale_f': diffusion_scale_f,
                  'diffusion_scale_target': diffusion_scale_target,
                  'use_pragmatic_value': use_pragmatic_value,
                  'weight_info_gain': weight_info_gain,
                  'max_num_steps': 20*k}

        if i == 0:
            print("first config")
            print(config)

        oo_system, beliefs, oo_user = [], [], []
        for j in range(num_episodes):
            if j > 0:
                config['init_particles'] = beliefs[-1]
                
            _oo_system, _beliefs, _pragmatic_values, _info_gains, _oo_user = run_experiment_ainf(**config)
            oo_system = np.concatenate((oo_system, _oo_system), axis=0)
            beliefs = np.concatenate((beliefs, _beliefs), axis=0)
            oo_user = np.concatenate((oo_user, _oo_user), axis=0)
            
        result = {'oo_system': oo_system, 'beliefs': beliefs, 'oo_user': oo_user}
        config['num_episodes'] = num_episodes
        config['init_particles'] = None
        log.append({'config': config, 'result': result})
    
    np.save(f'ainf_{num_targets}_{num_particles}_{mode}_{polarity}_{use_pragmatic_value}_{num_episodes}', [{'conditions': conditions, 'experiments': log}])
    print("DONE")

import itertools
#for mode, polarity in itertools.product(["symmetric", "fully_biased"], ["unknown_polarity", "known_polarity"]):
for mode, polarity in itertools.product(["symmetric"], ["known_polarity"]):
    run_experiment_series(mode, polarity)

In [None]:
plt.plot(self.pragmatic, label='new')
# plt.plot(self.pragmatic_old, label='old')
plt.plot(self.info_gain, label='IG')
plt.plot(self.nefe, label='nefe')
plt.legend()