In [1]:
#import autograd.numpy as np
#from autograd import jacobian
import numpy as np
from scipy.optimize import minimize
import random
import time
import tensorflow_probability as tfp



In [2]:
class simulate_RLWM():
    """ This class includes the function for fitting a simulation of the RLWM task.
    It can also be used to train the model with the behavioral data. 
    
    Args:
    
    Alpha (float): learning rate bounded from 0 to 1
    K (int): working memory capacity
    prior (): working memory prior weight
    beta (int): inverse temperature average value is 5 . Fixed to 100
    epsilon (float): noise bounded from 0 to 1
    phi (float): decay bounded from 0 to 1
    pers (float): neglect of negative values bounded from 0 to 1
    T (int): number of trials per block
    actions (int): number of possible responses
    reward (dict): keys are the stimulus, values are the correct action response
    
    """
    def __init__(self, alpha, K, prior, beta, epsilon, phi, pers, T, actions,reward):
        self.alpha = alpha
        self.pers = pers
        self.beta = beta
        self.epsilon = epsilon
        self.phi = phi
        self.prior = prior
        self.K = K
        
        self.T = T
        self.reward = reward
        self.actions = actions
        
        self.Wwm = self.prior*(np.min([1, self.K/len(self.reward.keys())]))
        self.neg_alpha = (1-self.pers)*self.alpha
        
        self.correct = []
        self.key_answer = []
        self.stimulus = []
        self.outcome = []
        self.Q = np.ones((len(self.reward.keys()), self.actions))/self.actions
        self.W = np.ones((len(self.reward.keys()), self.actions))/self.actions
        self.W0 = self.W.copy()
    def fit(self):
        """This function will train the model
        """
        for i in range(0, self.T):
            if i >=1:
                
                self.W = self.W + self.phi*(self.W0-self.W)


            set_size = len(self.reward.keys())
            s = np.random.randint(0, set_size)
            self.stimulus.append(s)
 
            Prl = np.exp(self.beta * self.Q[s, :])
            Prl = Prl/np.sum(Prl)
            
            Pwm = np.exp(self.beta * self.W[s, :])
            Pwm = Pwm/np.sum(Pwm)
            
            Pall = self.Wwm*Pwm + (1-self.Wwm)*Prl
            Pall = (1-self.epsilon)*Pall + self.epsilon*(1/self.actions)
            a = random.choices(list(range(0, self.actions)), weights=Pall)[0]
            self.key_answer.append(a)
            correct = self.reward[s]
            if correct == a:
                r = 1
            else:
                r = 0
            self.outcome.append(r)
            if r==0:
                self.Q[s,a] = self.Q[s,a] + self.neg_alpha*(r-self.Q[s,a])
               
            else:
                self.Q[s, a] = self.Q[s, a] + self.alpha*(r-self.Q[s, a])
            
            
            self.W[s,a] = r
            self.correct.append(r)
        
    def get_delay(self):
        if len(self.stimulus)==0:
            raise NameError("model not yet fit, please fit model")
        else:
            self.delay = []
            for j, i in enumerate(self.stimulus):
                idx = np.where(np.array(self.stimulus)==i)[0]
                if j <= np.min(idx):
                    self.delay.append(0)
                else:
                    biggest_idx = np.max(np.where(idx < j)[0])
                    self.delay.append(j-idx[biggest_idx])
        
    def get_iteration(self):
        if len(self.correct) == 0:
            print("model not yet fit please fit model")
        else:
            self.iteration = []
            for j, i in enumerate(self.stimulus):
                idx = np.where(np.array(self.stimulus)==i)[0]
                if j <= np.min(idx):
                    self.iteration.append(0)
                else:
                    self.iteration.append(len(np.where(idx<j)[0]))
    def get_reward_history():
        if len(self.correct)==0:
            print("model not yet fit, please fit model")
        else:
            pass
        

In [3]:
""" 

Args:

Alpha (float): learning rate bounded from 0 to 1
K (int): working memory capacity
prior (): working memory prior weight
beta (int): inverse temperature average value is 5 . Fixed to 100
epsilon (float): noise bounded from 0 to 1
phi (float): decay bounded from 0 to 1
pers (float): neglect of negative values bounded from 0 to 1
T (int): number of trials per block
actions (array): all actions chosen per block
reward (array): all rewards recieved per block

"""

def likelihood_RLWM(actions, rewards, stimulus, set_size, K, alpha, prior, epsilon, phi, pers):
    """This function calculate the probability 
    """
    beta = 100
    action_options = 3
    T = len(actions)
    Wwm = prior*(np.min([1, K/set_size]))
    neg_alpha = (1-pers)*alpha
    Q = np.ones((6, action_options))/action_options
    W = np.ones((6, action_options))/action_options
    W0 = W.copy()
    choiceProb = []
    
    for a, r, s in zip(actions, rewards, stimulus):
      

        W = W + phi*(W0-W)



        Prl = np.exp(beta * Q[s, :])
        Prl = Prl/np.sum(Prl)

        Pwm = np.exp(beta * W[s, :])
        Pwm = Pwm/np.sum(Pwm)

        Pall = Wwm*Pwm + (1-Wwm)*Prl
        Pall = (1-epsilon)*Pall + epsilon*(1/action_options)

        choiceProb.append(Pall[a])

        if r==0:
            Q[s,a] = Q[s,a] + neg_alpha*(r-Q[s,a])

        else:
            Q[s, a] = Q[s, a] + alpha*(r-Q[s, a])


        W[s,a] = r
    NegLL = -np.sum(np.log(choiceProb))
    return NegLL





In [4]:
fun = lambda x, Data : likelihood_RLWM(Data[0], Data[1], Data[2], set_size, K, x[0], x[1], x[2], x[3], x[4])
bnds = ((0, 1), (0, 1), (0,1), (0, 1), (0,1))
x0 = np.random.rand(20, 5)


In [None]:
tfp.optimizer.lbfgs_minimize?

In [14]:
x0[0]

array([0.65673268, 0.77894822, 0.87241084, 0.82105534, 0.60987192])

In [5]:
alpha = 0.01
beta= 50
epsilon =0.1
phi=0.1
pers=0.1
T=110
actions = 3
K=4
set_size=6
prior=0.8
rewards = {0:1, 1:0, 2:1, 3:2, 4:2, 5:0}
# alpha, prior, epsilon, phi, pers
# 0.01, 0.8, 0.1, 0.1, 0.1

In [6]:
action = []
reward = []
stimulus = []

for i in range(10):
    model = simulate_RLWM(alpha, K, prior, beta, epsilon, phi, pers, T, actions, rewards)
    model.fit()
    
    action.append(model.key_answer)
    reward.append(model.outcome)
    stimulus.append(model.stimulus)
data = np.array([np.concatenate(action), np.concatenate(reward), np.concatenate(stimulus)])
print(data.shape)

(3, 1100)


In [34]:
start = time.time()
times = []
results = []
for x in x0:
    t0 = time.time()
    result = minimize(fun, x, args=data, method= 'L-BFGS-B', tol=None, bounds=bnds)#, jac=jacobian)
    times.append(time.time()-t0)
    results.append(result.x)
print("--- %s seconds ---" % (np.mean(times)))
print("--- %s seconds ---" % (time.time() - start))
#     print(f' Number of iterations: {result.nit}')
#     print(f'output result: {result.x}')

--- 2.9943212985992433 seconds ---
--- 59.88690114021301 seconds ---


In [35]:
result

      fun: 687.9425768313781
 hess_inv: <5x5 LbfgsInvHessProduct with dtype=float64>
      jac: array([-4.54747349e-05,  9.09494702e-04, -4.20641300e-03, -5.00222083e-04,
        7.95807860e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 90
      nit: 12
     njev: 15
   status: 0
  success: True
        x: array([0.81511519, 0.44806349, 0.1709512 , 0.55485761, 0.56752301])

In [40]:
start = time.time()
times = []
results = []
for x in x0:
    t0 = time.time()
    result = minimize(fun, x, args=data, method= 'L-BFGS-B', tol=1e-2, bounds=bnds)#, jac=jacobian)
    times.append(time.time()-t0)
    results.append(result.x)
print("--- %s seconds ---" % (np.mean(times)))
print("--- %s seconds ---" % (time.time() - start))
# print(f' Number of iterations: {result.nit}')
# print(f'output result: {result.x}')

--- 0.7605793476104736 seconds ---
--- 15.21233582496643 seconds ---


In [41]:
result

      fun: 706.4466802835186
 hess_inv: <5x5 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 2.27373674e-05, -3.35664936e+00,  6.55214990e+01, -7.26814961e+01,
       -4.54747349e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 24
      nit: 2
     njev: 4
   status: 0
  success: True
        x: array([0.81513508, 0.49029891, 0.31509891, 0.14621098, 0.56748422])

In [28]:
start = time.time()
times = []
results = []
for x in x0:
    t0 = time.time()
    result = minimize(fun, x, args=data, method= 'Powell', tol=1e-5, bounds=bnds)#, jac=jacobian)
    times.append(time.time()-t0)
    results.append(result.x)
print("--- %s seconds ---" % (np.mean(times)))
print("--- %s seconds ---" % (time.time() - start))
# print("--- %s seconds ---" % (time.time() - t0))
# print(f' Number of iterations: {result.nit}')
# print(f'output result: {result.x}')

  Prl = np.exp(beta * Q[s, :])
  Prl = Prl/np.sum(Prl)
  NegLL = -np.sum(np.log(choiceProb))


--- 12.389606165885926 seconds ---
--- 247.79245018959045 seconds ---


In [29]:
start = time.time()
times = []
results = []
for x in x0:
    t0 = time.time()
    result = minimize(fun, x, args=data, method= 'SLSQP', tol=1e-5, bounds=bnds)#, jac=jacobian)
    times.append(time.time()-t0)
    results.append(result.x)
print("--- %s seconds ---" % (np.mean(times)))
print("--- %s seconds ---" % (time.time() - start))
# print("--- %s seconds ---" % (time.time() - t0))
# print(f' Number of iterations: {result.nit}')
# print(f'output result: {result.x}')
# SLSQP

--- 2.5953839778900147 seconds ---
--- 51.90800619125366 seconds ---


In [31]:
result

     fun: 687.9425803780821
     jac: array([-4.57763672e-05,  2.88696289e-02,  7.69729614e-02,  5.59692383e-02,
        9.15527344e-05])
 message: 'Optimization terminated successfully'
    nfev: 92
     nit: 14
    njev: 14
  status: 0
 success: True
       x: array([0.81439332, 0.44807697, 0.17094469, 0.55498084, 0.56908962])

In [30]:
import os
os.system('say "your program has finished"')

0

In [None]:
action = []
reward = []
stimulus = []
# alpha, prior, epsilon, phi, pers
# 0.01, 0.8, 0.1, 0.1, 0.1
all_x = []
for i in range(20):
    real_x = np.random.rand(5)
    all_x.append(real_x)
    for i in range(10):
        model = simulate_RLWM(real_x[0], K, real_x[1], beta, real_x[2], real_x[3], real_x[4], T, actions, rewards)
        model.fit()

        action.append(model.key_answer)
        reward.append(model.outcome)
        stimulus.append(model.stimulus)
    data = np.array([np.concatenate(action), np.concatenate(reward), np.concatenate(stimulus)])
    print(data.shape)

    times = []
    results = []
    for x in x0:
        t0 = time.time()
        result = minimize(fun, x, args=data, method= 'L-BFGS-B', tol=1e-5, bounds=bnds)#, jac=jacobian)
        times.append(time.time()-t0)
        results.append(result.x)
    print("--- %s seconds ---" % (np.mean(times)))
