In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from sciunit import TestSuite

## Import decision making models

In [3]:
from src.ldmunit.models.rw import RWModel
from src.ldmunit.models.nwsls import NWSLSModel

In [4]:
dm_rw = RWModel([{'alpha': 0.5, 'beta': 0.5}])
dm_nwsls = NWSLSModel([{'epsilon': 0.3}])

## Set up the environment for two-armed bandit test

In [5]:
from bandit import BanditTwoArmedHighLowFixed
env_0 = BanditTwoArmedHighLowFixed()

In [6]:
env_0.p_dist

[0.8, 0.2]

## Stimulate data

In [7]:
n_trials = 1000

In [75]:
obs_nwsls_0 = dm_nwsls.stimulate_exp(env_0, [{'epsilon': 0.3}, {'epsilon': 0.4}], n_trials)

In [76]:
obs_rw_0 = dm_rw.stimulate_exp(env_0, [{'alpha': 0.5, 'beta': 2.5}] * 2, n_trials)

## Set up test suit

In [77]:
from src.ldmunit.tests import AICTest, BICTest, NLLTest

In [78]:
aictest = AICTest(obs_rw_0)
bictest = BICTest(obs_rw_0)
nlltest = NLLTest(obs_rw_0)
test_suite = TestSuite([aictest, bictest, nlltest])

In [79]:
score_matrix = test_suite.judge([dm_nwsls, dm_rw])

In [80]:
score_matrix

Unnamed: 0,AICTest,BICTest,NLLTest
NWSLSModel,652.207,649.594,648.207
RWModel,573.493,570.88,569.493


In [81]:
aictest_ = AICTest(obs_nwsls_0)
bictest_ = BICTest(obs_nwsls_0)
nlltest_ = NLLTest(obs_nwsls_0)
test_suite_ = TestSuite([aictest_, bictest_, nlltest_])

In [82]:
score_matrix_ = test_suite_.judge([dm_nwsls, dm_rw])

In [35]:
score_matrix_

Unnamed: 0,AICTest,BICTest,NLLTest
NWSLSModel,842.748,840.748,840.748
RWModel,621.187,619.187,619.187


In [None]:
dm_rw.train_with_observations

In [None]:
n_trials = 100
n_features = 4
n_subjects = 2

In [None]:
paras = [dict(zip(['alpha', 'b0', 'b1', 'sigma', 'w0'], [0.5, 0.5, 0.5, 1, 0]))] * 2
print(paras)
model = RwNormNativeModel(paras)

In [None]:
def stimulate_data(n_trials, n_features, n_subjects, seed=123):
    import numpy as np
    np.random.seed(seed)
    stimuli, actions, rewards = [], [], []
    
    for i in range(n_subjects):
        stimuli.append(np.random.ranf((n_trials, n_features)))
        actions.append(np.random.ranf((n_trials,)))
        rewards.append(np.random.choice([0, 1, 2], n_trials))
        
    return stimuli, actions, rewards

stimuli, actions, rewards = stimulate_data(n_trials, n_features, n_subjects)

In [None]:
actions[0].shape

In [None]:
f = model.produce_loglikelihood(stimuli, rewards)
print(f(actions))

In [None]:
def foo(x, y, z):
    return x + y + z

fun = lambda x: foo(x[0], x[1], 2)

In [None]:
fun([3,4])

In [None]:
list(paras[0].keys())

In [None]:
from src.ldmunit.models.rw import RWModel
from bandit import BanditTwoArmedHighLowFixed

env = BanditTwoArmedHighLowFixed()
model = RWModel([{'alpha': 0.5, 'beta': 0.5}])

In [None]:
model.paras

In [None]:
stimuli[0][0]

In [None]:
a, r = model.stimulate_exp(env, 10, 0, 0.5, 1)
print(a)
print(r)

In [None]:
import numpy as np

x = np.array([1,1,1,2,2,2,5,25,1,1])
unique, counts = np.unique(x, return_counts=True)

a = np.asarray((unique, counts)).T
a.shape

In [None]:
actions[0][0]

In [None]:
stimuli[0][0]

In [None]:
rewards[0][0]

In [None]:
def stimulate_sl_RW(env, alpha, beta, n_trials, w0=0):
    """Stimuli/observation is the same."""
    
    n_actions = env.action_space.n

    Q = np.zeros(n_actions)
    Q += w0

    a = np.empty(n_trials, dtype=int)
    r = np.empty(n_trials, dtype=int)

    for t in range(n_trials):
        action = env.action_space.sample()

        
        
        _, reward, _, _ = env._step(action)
        
        Q[action] += alpha * (reward - Q[action])
        
        a[t], r[t] = action, reward

    env.close()
        
    return a, r

In [None]:
stimulate_sl_RW(env, 0.5, 0.5, 10)

In [None]:
from gym import spaces
a = spaces.box.Box(-1.0, 1.0, (2,))
t = a.sample()

In [None]:
from bandit import BanditTwoArmedIndependentUniform
env = BanditTwoArmedIndependentUniform()