In [1]:
%load_ext jupyter_black
%matplotlib inline

backend = "torch"

A brief tutorial for how to use agent.Discrete class from pybefit to define a computational model of behaviour

In [2]:
import numpy as np
import torch
import jax.numpy as jnp
from jax import nn, random

torch.set_default_dtype(torch.float32)

if backend == "torch":
    tensor = torch
    LONG = torch.long
    one_hot = torch.nn.functional.one_hot
    sigmoid = torch.sigmoid
    torch.manual_seed(seed=0)

    class Cat(object):
        def __call__(self, logits, *args):
            dist = torch.distributions.Categorical(logits=logits)
            return dist.sample()

else:
    tensor = jnp
    LONG = jnp.int32
    one_hot = nn.one_hot
    sigmoid = nn.sigmoid

    class Cat(object):
        def __call__(self, logits, key):
            return random.categorical(key, logits)


rng_key = random.PRNGKey(0)
categorical = Cat()



In [3]:
# One would access this class using 'from pybefit.agents import Discrete'.
# Note that each of the methods has to be implemented for simulations and inference to be possible with using pybefit classes


class Discrete(object):
    """Agent with discrete and finite number of actions."""

    def __init__(self, runs, blocks, trials, na, ns, no):
        self.runs = (
            runs  # number of independent runs of the experiment or agents/subjects
        )
        self.nb = blocks  # number of experimental blocks
        self.nt = trials  # number of trials in each block

        self.na = na  # number of actions
        self.ns = ns  # number of states
        self.no = no  # number of outcomes

    @property
    def num_params(self):
        """Return the number of model parameters"""
        raise NotImplementedError

    @property
    def get_beliefs(self):
        """Return a tuple of beliefs, that is, internal dynamical model states. Only used for
        numpyro/jax based models"""
        raise NotImplementedError

    def set_parameters(self, *args, **kwargs):
        """Set free model parameters."""
        raise NotImplementedError

    def update_beliefs(self, block, trial, *args, **kwargs):
        """Update beliefs about hidden states given some sensory stimuli and action outcomes."""
        raise NotImplementedError

    def planning(self, block, trial, *args, **kwargs):
        """Compute choice probabilities in current block and trial."""
        raise NotImplementedError

    def sample_responses(self, block, trial, *args, **kwargs):
        """Generate responses given response probability."""
        raise NotImplementedError

We will implement a simple agent solving a static multi armed bandit task based on the UCB model

In [4]:
class UCBAgent(Discrete):
    def __init__(self, runs=1, blocks=1, trials=1, num_arms=2):
        # define bernoulli bandit with two outcomes (0, 1) for each arm
        super().__init__(runs, blocks, trials, num_arms, num_arms, 2)

    @property
    def num_params(self):
        return 3

    @property
    def get_beliefs(self):
        return (self.q, self.count)

    def set_parameters(self, z):
        self.lr = sigmoid(z[..., 0])  # learning rate
        self.c = tensor.exp(z[..., 1])  # exploration strength
        self.beta = tensor.exp(z[..., 2])  # response noise

        self.q = tensor.zeros((self.runs, self.na))  # q values
        self.count = tensor.zeros((self.runs, self.na))  # response count

    def update_beliefs(self, block, trial, response_outcome, **kwargs):
        # encode reponses as zero/one array where one is assigned to the chosen arm and zero to all other arms

        q, count = kwargs.pop("beliefs", self.get_beliefs)

        response = one_hot(response_outcome[0], self.na)

        # add one dimension to the right to outcomes to match dimensionality of responses
        obs = response_outcome[1][..., None]

        alpha = self.lr[..., None] / (count + 1)

        # implements self.q[..., response] += alpha * (outcome - self.q[..., response])
        self.q = q + alpha * response * (obs - q)
        self.count = count + response

        return self.get_beliefs

    def planning(self, block, trial, **kwargs):
        q, count = kwargs.pop("beliefs", self.get_beliefs)

        t = block * self.nt + trial
        logits = q + self.c[..., None] * tensor.sqrt(
            tensor.log(t + tensor.ones(1)) / (count + 1e-2)
        )
        return self.beta[..., None] * logits

    def sample_responses(self, block, trial, logits, **kwargs):
        key = kwargs.pop("key", None)
        return categorical(logits, key)

Next we will define a task environmnet for the multi armed bandit

In [5]:
# As with the agent implementation we start with a base Task class which is imported as "from pybefit.task import Task"
class Task(object):
    def __init__(self, nsub, blocks, trials):
        self.blocks = blocks  # number of experimental blocks
        self.trials = trials  # number of trials
        self.nsub = nsub  # number of subjects

    def get_offer(self, block, trial, *args, **kwargs):
        """Define an offer for a current block, trial pair that defines a unique stimuli"""

        return None

    def update_environment(self, block, trial, *args, **kwargs):
        """Generate stimuli for task's current block and trial"""

        raise NotImplementedError


class MABTask(Task):
    def __init__(self, outcomes):
        blocks, trials, nsub, num_arms = outcomes.shape
        super().__init__(nsub, blocks, trials)
        self.outcomes = outcomes
        self.num_arms = num_arms

    def update_environment(self, block, trial, responses):
        return tensor.sum(
            self.outcomes[block, trial] * one_hot(responses, self.num_arms), -1
        )


blocks = 10
trials = 20
num_arms = 3
num_subjects = 100

probs = np.random.dirichlet(np.ones(num_arms), size=(num_subjects))
outcomes = tensor.ones(1) * np.random.binomial(
    1, probs, size=(blocks, trials, num_subjects, num_arms)
).astype(np.float32)

print(outcomes.dtype, outcomes.shape)

mab_task = MABTask(outcomes)
mab_task.update_environment(0, 0, tensor.zeros(num_subjects, dtype=LONG))

torch.float32 torch.Size([10, 20, 100, 3])


tensor([1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
        0., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1.,
        0., 1., 1., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
mab_agent = UCBAgent(runs=num_subjects, blocks=blocks, trials=trials, num_arms=num_arms)

z = tensor.ones(1) * np.random.standard_normal(
    size=(num_subjects, mab_agent.num_params)
).astype(np.float32)

mab_agent.set_parameters(z)

# run for a single time step
logits = mab_agent.planning(0, 0)
actions = mab_agent.sample_responses(0, 0, logits, key=rng_key)
obs = mab_task.update_environment(0, 0, actions)
beliefs = mab_agent.get_beliefs
new_beliefs = mab_agent.update_beliefs(0, 0, [actions, obs], beliefs=beliefs)