In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../") # go to parent dir

In [3]:
import gym
from gym import spaces
import numpy as np
import sciunit
import scipy

In [4]:
from src.ldmunit.models import decision_making, associative_learning
from src.ldmunit.models.utils import loglike, train_with_obs, simulate

In [5]:
from bandit import BanditEnv, BanditAssociateEnv

## Decision Marking

In [6]:
n_actions = 2
n_obs = 3
n_trials = 100

In [7]:
print("testing for random responding")
paras = {'bias': 0.8, 'action_bias': 1}

model = decision_making.RandomRespondModel(n_actions, n_obs, paras)
env = BanditEnv([0.3, 0.7], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for random responding
The log-likelihood:    -43.109
[0.8 1. ]


In [8]:
print("testing for noisy-win-stay-lose-shift")
paras = {'epsilon': 0.8}

model = decision_making.NWSLSModel(n_actions, n_obs, paras)
env = BanditEnv([0.3, 0.7], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for noisy-win-stay-lose-shift
The log-likelihood:    -71.538
[0.8]


In [9]:
print("testing for RWCK")
paras = {'w0': 0.1, 'alpha': 0.5, 'alpha_c': 0.5, 'beta': 0.5, 'beta_c': 0.5}

model = decision_making.RWCKModel(n_actions, n_obs, paras)
env = BanditEnv([0.3, 0.7], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for RWCK
The log-likelihood:     -72.59
[0.1 0.5 0.5 0.5 0.5]


In [64]:
from src.ldmunit.capabilities import Interactive

class MultiMeta(type):
    def __new__(cls, name, bases, dct):
        single_cls = dct['single_cls']
        base_classes = (single_cls.__bases__)
        out_cls = super().__new__(cls, name, base_classes, dct)

        def multi_init(self, param_list, *args, **kwargs):
            self.models = []
            for param in param_list:
                self.models.append(single_cls(*args, **kwargs))
        out_cls.__init__ = multi_init

        def multi_predict(self, idx, *args, **kwargs):
            return self.models[idx].predict(*args, **kwargs)
        out_cls.predict = multi_predict

        def multi_reset(self, idx, *args, **kwargs):
            return self.models[idx].reset(*args, **kwargs)
        out_cls.reset = multi_reset
        
        def multi_update(self, idx, *args, **kwargs):
            return self.models[idx].update(*args, **kwargs)
        out_cls.update = multi_update
        
        def multi_act(self, idx, *args, **kwargs):
            return self.models[idx].act(*args, **kwargs)
        out_cls.act = multi_act

        return out_cls

def multi_from_single(single_cls, multi_cls_name):
    return MultiMeta(multi_cls_name, (), {'single_cls': single_cls})


param_list = [{'epsilon': 0.5}] * 10
MultiA = multi_from_single(decision_making.NWSLSModel, 'MultiNWSLS')
mA = MultiA(param_list=range(10), n_actions=5, n_obs=5)

## Associate learning

In [10]:
n_obs = 3
n_trials = 100

In [11]:
print("testing for RwNormModel")
paras = {'w0': 0.1, 'alpha': 0.5, 'sigma': 0.5, 'b0': 0.5, 'b1': 0.5}

model = associative_learning.RwNormModel(n_obs, paras)
env = BanditAssociateEnv([0.3, 0.7, 0.8], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for RwNormModel
The log-likelihood:    -91.849
[0.1 0.5 0.5 0.5 0.5]


In [12]:
print("testing for KrwNormModel")
paras = {'w0': 0.1, 'alpha': 0.5, 'sigma': 0.5, 'b0': 0.5, 'b1': 0.5, 'logSigmaWInit': 0.23, 'logTauSq': 0.32,
        'logSigmaRSq': 0.34}

model = associative_learning.KrwNormModel(n_obs, paras)
env = BanditAssociateEnv([0.3, 0.7, 0.8], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for KrwNormModel
The log-likelihood:    -142.21
[0.1  0.5  0.5  0.5  0.5  0.23 0.32 0.34]


In [13]:
print("testing for lsspd")
paras = {'w0': 0.1, 'alpha': 0.5, 'sigma': 0.5, 'b0': 0.5, 'b1': 0.5, 'eta': 0.23, 'kappa': 0.32,
        'mix_coef': 0.34}

model = associative_learning.LSSPDModel(n_obs, paras)
env = BanditAssociateEnv([0.3, 0.7, 0.8], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for lsspd
The log-likelihood:    -52.967
[0.1  0.5  0.5  0.5  0.5  0.23 0.32 0.34]


In [22]:
print("testing for beta binomial")
paras = {'b0': 0.5, 'b1': 0.5, 'mix_coef': 0.34}

model = associative_learning.BetaBinomialModel(n_obs, paras)
env = BanditAssociateEnv([0.3, 0.7, 0.8], [1,1])
stimuli, rewards, actions = simulate(env, model, n_trials)
a = loglike(model, stimuli, rewards, actions)
print("The log-likelihood: {:10.5}".format(a))

opt = train_with_obs(model, stimuli, rewards, actions)
if opt.success:
    print(opt.x)

testing for beta binomial
The log-likelihood:       -inf
[0.5  0.5  0.34]
