In [1]:
%load_ext jupyter_black
%matplotlib inline

backend = "torch"

A brief tutorial for how to use agent.Discrete class from pybefit to define a computational model of behaviour

In [2]:
import numpy as np
import torch
import jax.numpy as jnp
from jax import nn, random

torch.set_default_dtype(torch.float32)

if backend == "torch":
    tensor = torch
    LONG = torch.long
    one_hot = torch.nn.functional.one_hot
    sigmoid = torch.sigmoid

    class Cat(object):
        def __init__(self, seed=0):
            torch.manual_seed(seed)

        def __call__(self, logits):
            dist = torch.distributions.Categorical(logits=logits)
            return dist.sample()

else:
    tensor = jnp
    LONG = jnp.int32
    one_hot = nn.one_hot
    sigmoid = nn.sigmoid

    class Cat(object):
        def __init__(self, seed=0):
            self.key = random.PRNGKey(seed)

        def __call__(self, logits):
            self.key, key = random.split(self.key)
            return random.categorical(key, logits)


categorical = Cat(seed=0)



In [3]:
# One would access this class using 'from pybefit.agents import Discrete'.
# Note that each of the methods has to be implemented for simulations and inference to be possible with using pybefit classes


class Discrete(object):
    """Agent with discrete and finite number of actions."""

    def __init__(self, runs, blocks, trials, na, ns, no):
        self.runs = (
            runs  # number of independent runs of the experiment or agents/subjects
        )
        self.nb = blocks  # number of experimental blocks
        self.nt = trials  # number of trials in each block

        self.na = na  # number of actions
        self.ns = ns  # number of states
        self.no = no  # number of outcomes

    @property
    def num_params(self):
        """Return the number of model parameters"""
        raise NotImplementedError

    def set_parameters(self, *args, **kwargs):
        """Set free model parameters."""
        raise NotImplementedError

    def update_beliefs(self, block, trial, *args, **kwargs):
        """Update beliefs about hidden states given some sensory stimuli and action outcomes."""
        raise NotImplementedError

    def planning(self, block, trial, *args, **kwargs):
        """Compute choice probabilities in current block and trial."""
        raise NotImplementedError

    def sample_responses(self, block, trial, *args, **kwargs):
        """Generate responses given response probability."""
        raise NotImplementedError

We will implement a simple agent solving a static multi armed bandit task based on the UCB model

In [4]:
class MABUCB(Discrete):
    def __init__(self, runs=1, blocks=1, trials=1, num_arms=2):
        # define bernoulli bandit with two outcomes (0, 1) for each arm
        super().__init__(runs, blocks, trials, num_arms, num_arms, 2)

    @property
    def num_params(self):
        return 3

    def set_parameters(self, z):
        self.lr = sigmoid(z[..., 0])  # learning rate
        self.c = tensor.exp(z[..., 1])  # exploration strength
        self.beta = tensor.exp(z[..., 2])  # response noise

        self.q = tensor.zeros((self.runs, self.na))  # q values
        self.count = tensor.zeros((self.runs, self.na))  # response count

    def update_beliefs(self, block, trial, response_outcome):
        # encode reponses as zero/one array where one is assigned to the chosen arm and zero to all other arms
        response = one_hot(response_outcome[0], self.na)

        # add one dimension to the right to outcomes to match dimensionality of responses
        obs = tensor.broadcast_to(response_outcome[1][..., None], self.q.shape)

        alpha = self.lr[..., None] / (self.count + 1)

        # implements self.q[..., response] += alpha * (outcome - self.q[..., response])
        self.q += alpha * response * (obs - self.q)
        self.count += response

    def planning(self, block, trial, *args, **kwargs):
        logits = self.q + self.c[..., None] * tensor.sqrt(
            tensor.log(trial + tensor.ones(1)) / (self.count + 1e-6)
        )
        return self.beta[..., None] * logits

    def sample_responses(self, block, trial, logits, *args, **kwargs):
        return categorical(logits)

Next we will define a task environmnet for the multi armed bandit

In [5]:
# As with the agent implementation we start with a base Task class which is imported as "from pybefit.task import Task"
class Task(object):
    def __init__(self, nsub, blocks, trials):
        self.blocks = blocks  # number of experimental blocks
        self.trials = trials  # number of trials
        self.nsub = nsub  # number of subjects

    def get_offer(self, block, trial, *args, **kwargs):
        """Define an offer for a current block, trial pair that defines a unique stimuli"""

        return None

    def update_environment(self, block, trial, *args, **kwargs):
        """Generate stimuli for task's current block and trial"""

        raise NotImplementedError


class MABTask(Task):
    def __init__(self, outcomes):
        blocks, trials, nsub, _ = outcomes.shape
        super().__init__(nsub, blocks, trials)
        self.outcomes = outcomes

    def update_environment(self, block, trial, responses):
        return self.outcomes[block, trial, list(range(self.nsub)), responses]


blocks = 10
trials = 20
num_arms = 3
num_subjects = 100

probs = np.random.dirichlet(np.ones(num_arms), size=(blocks, 1, num_subjects))
outcomes = tensor.ones(1) * np.random.binomial(
    1, probs, size=(blocks, trials, num_subjects, num_arms)
).astype(np.float32)

print(outcomes.dtype, outcomes.shape)

mab_task = MABTask(outcomes)
mab_task.update_environment(0, 0, tensor.zeros(num_subjects, dtype=LONG))

float32 (10, 20, 100, 3)


Array([1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0.,
       0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1.,
       0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
       0., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0.],      dtype=float32)

In [6]:
mab_agent = MABUCB(runs=num_subjects, blocks=blocks, trials=trials, num_arms=num_arms)

z = tensor.ones(1) * np.random.standard_normal(
    size=(num_subjects, mab_agent.num_params)
).astype(np.float32)

mab_agent.set_parameters(z)

# run for a single time step
logits = mab_agent.planning(0, 0)
actions = mab_agent.sample_responses(0, 0, logits)
obs = mab_task.update_environment(0, 0, actions)
mab_agent.update_beliefs(0, 0, [actions, obs])