In [4]:
#!/usr/local/bin/env python
from __future__ import division
import numpy as np
from numpy import array
from numpy.random import sample as rs
from numpy import newaxis as na
import pandas as pd 
from scipy.stats import distributions
from scipy.stats import sem
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from radd import multirace
import os 
from collections import OrderedDict as odict
import itertools

sns.set(style='darkgrid', context='paper', font_scale=1.4)

temporal_dynamics = lambda p, t: np.cosh(p['xb'][:, na] * t)
updateQ = lambda q, winner, r, A: q[winner][-1] + A*(r - q[winner][-1])
boltzmann_choiceP = lambda q, name, B: np.exp(B*q[name][-1])/np.sum([np.exp(B*q[k][-1]) for k in q.keys()])

basedir = os.path.expanduser('~')
cards = pd.read_csv(basedir + "/Dropbox/CoAx/Projects/IGTob/IGTCards.csv")
cards_scaled=cards.copy()*.01
cards_normed = (cards-cards.mean().mean())/cards.std().mean()

In [None]:
class Environment(object):
    
    def __init__(self, feedback=None):
        
        self.feedback = feedback
        
    def set_environment(self, cards, nblocks=2):
            
        self.cards = cards
        self.nblocks = nblocks
        self.trials = np.array([cards.index.values]*nblocks).flatten()
        self.ntrials = len(self.trials)
        
        self.nalt = len(cards.iloc[1:].columns)
        self.names = np.sort(cards.columns.values)
        
        self.qdict={k:[0] for k in self.names}
        self.choice_prob={k:[1./self.nalt] for k in self.names}
    

In [None]:
class multirace(object):
    
    def __init__(self, p, pc_map={'vd': ['vd_a', 'vd_b', 'vd_c', 'vd_d'], 'vi': ['vi_a', 'vi_b', 'vi_c', 'vi_d']}):
        self.pcopy = deepcopy(p)
        self.p = deepcopy(p)
        self.pc_map=pc_map
        
        self.temporal_dynamics = lambda p, t: np.cosh(p['xb'][:, na] * t)
        self.updateQ = lambda q, winner, r, A: q[winner][-1] + A*(r - q[winner][-1])
        self.choiceP = lambda q, name, B: np.exp(B*q[name][-1])/np.sum([np.exp(B*q[k][-1]) for k in q.keys()])
        
        
        
    def simulate_race(self, dt=.001, si=.01, tb=2.5):

        nresp = len(self.pc_map.values()[0])
        p = vectorize_params(self.p, pc_map=self.pc_map, nresp=nresp)

        dx=np.sqrt(si*dt)
        Pd = 0.5*(1 + self.p['vd']*dx/si)
        Pi = 0.5*(1 + self.p['vi']*dx/si)

        Tex = np.ceil((tb-self.p['tr'])/dt).astype(int)
        xtb = temporal_dynamics(self.p, np.cumsum([dt]*Tex.max()))

        direct = np.where((rs((nresp, Tex.max())).T < Pd),dx,-dx).T
        indirect = np.where((rs((nresp, Tex.max())).T < Pi),dx,-dx).T
        execution = xtb[0] * np.cumsum(direct-indirect, axis=1)
        return execution

    
    def reweight_drift(self, alt_i, cp_delta, a_pos, a_neg):
        """ update direct & indirect drift-rates for multirace winner """
        vd_exp = self.p['vd'][alt_i]
        vi_exp = self.p['vi'][alt_i]
        
        self.p['vi'][alt_i] = self.p['vd'][alt_i] + (vd_exp*a_pos * cp_delta)
        self.p['vi'][alt_i] = self.p['vi'][alt_i] + (vi_exp*a_neg * -cp_delta)
        

    
    def analyze_multiresponse(execution, qdict={}, vals=[], names=[], a_pos=.06, a_neg=.06,  dt=.001, beta=5, choice_prob={}):
        """analyze multi-race execution processes"""
        p = self.p
        nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1)
        rts = p['tr'] + nsteps_to_rt*dt

        # set non responses to 999
        rts[rts==p['tr'][0]]=999
        if np.all(rts==999):
            # if no response occurs, increase exponential bias (up to 3.0)
            if p['xb'] <= 3.0:
                p['xb']=p['xb']*1.005
            return np.nan, rts, execution, p, qdict, choice_prob

        # get accumulator with fastest RT (winner) in each cond
        winner = np.argmin(rts)

        # get rt of winner in each cond
        winrt = rts[winner]

        # slice all traces at time the winner crossed boundary
        traces = [execution[i, :nsteps_to_rt[winner]] for i in xrange(len(rts))]

        reward = vals[winner]
        winner_name = names[winner]
        loser_names = names[names!=winner_name]

        # update action value
        qval = qdict[names[winner]][-1]
        if reward>=qval:
            alpha=a_pos
        else:
            alpha=a_neg

        Qt = self.updateQ(qdict, winner_name, reward, alpha)
        qdict[winner_name].append(Qt)
        for lname in loser_names:
            qdict[lname].append(qdict[lname][-1])

        for alt_i, name in enumerate(names):
            cp_old = choice_prob[name][-1]
            # update choice probability using boltzmann eq. w/ inv. temp beta
            cp_new = choiceP(qdict, name, beta)
            choice_prob[name].append(cp_new)
            # calc. change in choice probability for alt_i
            cp_delta = cp_new - cp_old
            # update direct & indirect drift-rates with cp_delta
            p = reweight_drift(p, alt_i, cp_delta, a_pos, a_neg)
        
        return winner, rts, traces, p, qdict, choice_prob

    

In [31]:
class Agent(object):
    
    def __init__(self, ap=.1, an=.1, b=5, cards=None, niter=10):
        
        self.updateQ = lambda q, winner, r, A: q[winner][-1] + A*(r - q[winner][-1])
        self.updateP = lambda q, name, b: np.exp(b*q[name][-1])/np.sum([np.exp(b*q[k][-1]) for k in q.keys()])
        
        if cards is not None:
            self.set_environment(cards)    
        if any(np.size(v)>1 for v in [ap, an, b]):
            self.track_params(niter=niter, ap=ap, an=an, b=b)
        else:
            self.set_params(ap=ap, an=an, b=b)
    
    
    def set_params(self, ap, an, b):
        self.ap = ap
        self.an = an
        self.b = b
        self.choices = []
    
    
    def track_params(self, niter=10, ap=.1, an=.1, b=5, ):
        
        blks = np.arange(niter)
        param_names = ['ap', 'an', 'b']
        param_values = [ap, an, b]
        
        for i, pvalue in enumerate(param_values):
            if not hasattr(pvalue, '__iter__'):
                param_values[i] = [pvalue]
        
        apos, aneg, beta = param_values
        perm_param_values = list(itertools.product(apos, aneg, beta))
        
        nsets = len(perm_param_values)
        blocksdf = pd.DataFrame(perm_param_values, columns=param_names, index=np.arange(nsets))
        
        self.blocksdf = pd.concat([blocksdf]*niter)
        self.blocksdf.reset_index(inplace=True)
        self.blocksdf.rename(columns={'index':'block'}, inplace=True)
        self.blocksdf['P']=0
        self.blocksdf['Q']=0
    
    
    def iter_params(self):
        
        for i in self.blocksdf.index.values:
            ap, an, b = self.blocksdf.loc[i, ['ap', 'an', 'b']].values
            self.set_params(ap=ap, an=an, b=b)
            
            P, Q = self.simulate_task(return_scores=True)
            self.blocksdf.loc[i, 'P'] = P
            self.blocksdf.loc[i, 'Q'] = Q
            
            
    def set_environment(self, cards, nblocks=2):
            
        self.cards = cards
        self.nblocks = nblocks
        self.trials = np.array([cards.index.values]*nblocks).flatten()
        self.ntrials = len(self.trials)
        
        self.nalt = len(cards.iloc[1:].columns)
        self.names = np.sort(cards.columns.values)
        
    
    def simulate_task(self, return_scores=False):
        
        self.qdict={k:[0] for k in self.names}
        self.choice_prob={k:[1./self.nalt] for k in self.names}
        
        for t in self.trials:
            rew_vals = self.cards.iloc[t, :].values
            qvals = np.array([self.qdict[name][-1] for name in self.names])
            pvals = np.array([self.choice_prob[name][-1] for name in self.names])
            winner = np.random.choice(np.arange(self.nalt), p=pvals)
            wname = self.names[winner]
            
            r = rew_vals[winner]
            q = qvals[winner]
            rpe = r - q
            
            if rpe>0:
                alpha=self.ap
            else:
                alpha=self.an
            
            Qup = q + (alpha * rpe)
            
            self.qdict[wname].append(Qup)
            self.choice_prob[wname].append(self.updateP(self.qdict, wname, self.b))
            
            for loser in self.names[self.names!=wname]:
                self.qdict[loser].append(self.qdict[loser][-1])
                self.choice_prob[loser].append(self.updateP(self.qdict, loser, self.b))
            
            self.choices.append(winner)
        
        if return_scores:
            return self.igt_scores()
    
    def igt_scores(self):
        
        ch = np.asarray(self.choices)
        A = ch[ch==0].size
        B = ch[ch==1].size
        C = ch[ch==2].size
        D = ch[ch==3].size
        
        # payoff (P) score
        P = (C+D) - (A+B)
        # sensitivity (Q) score
        Q = (B+D) - (A+C)
        
        return [P, Q]

    def plot_summary(self, titles=['Order of Choices','Number of Choices per Card', 'Change in Q(card)',
        'Change in P(card)']):
        
        
        f, axes = plt.subplots(2, 2, figsize=(10,8))
        a1, a2, a3, a4 = axes.flatten()
        
        name_labels = [n.upper() for n in self.names]
        
        a1.plot(self.choices, lw=0, marker='o')
        a1.set_ylim(-.5, 3.5); a1.set_yticks(np.arange(self.nalt))
        a1.set_yticklabels(name_labels)
        
        a2.hist(np.asarray(self.choices))
        a2.set_xticks(np.arange(self.nalt))
        a2.set_xticklabels(name_labels)
        
        for i, n in enumerate(self.names):
            a3.plot(self.qdict[n], label=name_labels[i])
            a4.plot(self.choice_prob[n], label=name_labels[i])
        
        a3.legend(loc=0)
        a4.legend(loc=0)
        
        for i, ax in enumerate(axes.flatten()):
            ax.set_title(titles[i])
        
        f.subplots_adjust(hspace=.35)

In [47]:
agx=Agent(ap=.1, an=.1, b=.001, cards=cards)
#agx.iter_params()

In [48]:
agx.simulate_task()

In [49]:
agx.plot_summary()

In [46]:
agx.plot_summary()

In [25]:
agx.plot_summary()

In [556]:
n = 3
alphas = np.linspace(.01, .5, n)
betas = np.linspace(.01, .5, n)
agx=Agent(ap=alphas, an=alphas.mean(), b=betas, cards=cards)
agx.iter_params()

In [570]:
pmu = blocksdf.groupby('block').mean().P
pstd = blocksdf.groupby('block').std().P

qmu = blocksdf.groupby('block').mean().Q
qstd = blocksdf.groupby('block').std().Q

In [573]:
blocksdf.groupby('block').mean()

Unnamed: 0_level_0,ap,an,b,P,Q
block,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.01,0.255,0.01,49.2,-18.4
1,0.01,0.255,0.255,81.6,-61.2
2,0.01,0.255,0.5,85.4,-74.8
3,0.255,0.255,0.01,42.0,-1.0
4,0.255,0.255,0.255,84.0,-77.2
5,0.255,0.255,0.5,82.8,-78.6
6,0.5,0.255,0.01,34.2,1.0
7,0.5,0.255,0.255,85.6,-73.0
8,0.5,0.255,0.5,83.4,-77.2


In [569]:
blocksdf= agx.blocksdf.sort(['ap', 'b'])

  if __name__ == '__main__':


In [None]:
agx.

In [543]:
agx.blocksdf.rename(columns={'index':'block'}, inplace=True)

In [560]:
len(agx.blocksdf.index)

90

In [526]:
agx.blocksdf['bk'] = np.array([np.arange(10)]*270).flatten()

In [None]:
agx.track_params()

In [424]:
nblocks = 25
Pmu, Qmu = [], []
Pse, Qse = [], []
betas = np.linspace(0.01, 0.5, 100)
#agents = [Agent(a_pos=.2, a_neg=.2, beta=i, cards=cards) for i in betas]
agentx=Agent(a_pos=.1, a_neg=.1, beta=.5, cards=cards)
for i, a in enumerate(agents):
    blocks = [a.simulate_task(return_scores=True) for i in range(nblocks)]
    
    Pmu.append(np.mean([PQ[0] for PQ in blocks]))
    Pse.append(sem([PQ[0] for PQ in blocks]))
    
    Qmu.append(np.mean([PQ[1] for PQ in blocks]))
    Qse.append(sem([PQ[1] for PQ in blocks]))
    
#Ps, Qs = [np.asarray(l) for l in [Pse, Qse]]

#idx=Agent(a_pos=.1, a_neg=.1, beta=.5, cards=cards)
#idx.set_environment(cards=cards, nblocks=5)
#idx.simulate_task()
#idx.calc_igt_scores()

In [None]:
agx.

In [562]:
index = agx.blocksdf.ap
columns = agx.blocksdf.b
len(index)

90

In [565]:
agx.blocksdf.P.size#reshape(90,90)

90

In [563]:
pd.DataFrame(agx.blocksdf.P.reshape(90,90), index = agx.blocksdf.ap, columns = agx.blocksdf.b)

ValueError: total size of new array must be unchanged

In [557]:
sns.heatmap(data = np.vstack([agx.ap.values, agx.b.values]))

AttributeError: 'numpy.float64' object has no attribute 'values'

In [571]:
plt.errorbar(np.arange(len(pmu)), pmu, yerr=pstd)
#plt.errorbar(betas, qmu, yerr=qstd)

<Container object of 3 artists>

In [407]:
plt.plot(betas, Ps)
plt.plot(betas, Qs)

[<matplotlib.lines.Line2D at 0x7f712d01ed10>]

In [405]:
plt.plot(betas, Ps)
plt.plot(betas, Qs)

[<matplotlib.lines.Line2D at 0x7f712c005410>]

In [414]:
idx.plot_summary()

In [259]:
for n in idx.names:
    plt.plot(idx.qdict[n], label=n)
plt.gca().legend()

<matplotlib.legend.Legend at 0x7f712f494b10>

In [None]:
from random import random
from itertools import takewhile

def accumulate(iterator):
    """Returns a cumulative sum of the elements.
    accumulate([1, 2, 3, 4, 5]) --> 1 3 6 10 15"""
    current = 0
    for value in iterator:
        current += value
        yield current

def weightedChoice(weights, objects):
    """Return a random item from objects, with the weighting defined by weights 
    (which must sum to 1)."""
    limit = random()
    return objects[sum(takewhile(bool, (value < limit for value in accumulate(weights))))]

In [None]:
def run_trials(cards, nblocks=2, a_pos=.01, a_neg=.01, beta=2):
    """simulate series of trials with learning
    Arguments:
        p (dict): parameter dictionary
        cards (DataFrame): pandas DF (ntrials x nalt) with choice outcome vaulues
    Returns:
        choices (list): choice made on each trial
        rts (dict): rt for each trial (winner rt)
        all_traces (list): execution process traces truncated to length of winner
        qdict (dict): sequence of Q-value updates for each alt
    """

    trials = cards.append([cards]*(nblocks-1)).reset_index()
    trials.rename(columns={'index':'t'}, inplace=True)
    ntrials=len(trials)
    choices, all_traces = [], []
    names = np.sort(cards.columns.values)
    rts={k:[] for k in names}
    qdict={k:[0] for k in names}
    choice_prob={k:[.25] for k in names}
    
    for i in xrange(ntrials):
        vals = trials.iloc[i, 1:].values
        if i==0:
            winner = np.random.randint(0,len(vals))
            rew_t = vals[winner]
            Qt = updateQ(qdict, names[winner], rew_t, a_pos)
            qdict[winner_name].append(Qt)
            
            loser_names = names[names!=names[winner]]
            for lname in loser_names:
                qdict[lname].append(qdict[lname][-1])
            
        else:
            
            
            

In [50]:
def analyze_multiresponse(execution, p, qdict={}, vals=[], names=[], a_pos=.06, a_neg=.06,  dt=.001, beta=5, choice_prob={}):
        """analyze multi-race execution processes"""

        nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1)
        rts = p['tr'] + nsteps_to_rt*dt

        # set non responses to 999
        rts[rts==p['tr'][0]]=999
        if np.all(rts==999):
            # if no response occurs, increase exponential bias (up to 3.0)
            if p['xb'] <= 3.0:
                p['xb']=p['xb']*1.005
            return np.nan, rts, execution, p, qdict, choice_prob

        # get accumulator with fastest RT (winner) in each cond
        winner = np.argmin(rts)

        # get rt of winner in each cond
        winrt = rts[winner]

        # slice all traces at time the winner crossed boundary
        traces = [execution[i, :nsteps_to_rt[winner]] for i in xrange(len(rts))]

        reward = vals[winner]
        winner_name = names[winner]
        loser_names = names[names!=winner_name]

        # update action value
        qval = qdict[names[winner]][-1]
        if reward>=qval:
            alpha=a_pos
        else:
            alpha=a_neg

        Qt = updateQ(qdict, winner_name, reward, alpha)
        qdict[winner_name].append(Qt)
        for lname in loser_names:
            qdict[lname].append(qdict[lname][-1])

        for alt_i, name in enumerate(names):
            cp_old = choice_prob[name][-1]
            # update choice probability using boltzmann eq. w/ inv. temp beta
            cp_new = boltzmann_choiceP(qdict, name, beta)
            choice_prob[name].append(cp_new)
            # calc. change in choice probability for alt_i
            cp_delta = cp_new - cp_old
            # update direct & indirect drift-rates with cp_delta
            p = reweight_drift(p, alt_i, cp_delta, a_pos, a_neg)


        return winner, rts, traces, p, qdict, choice_prob

In [None]:

    rew_t = rew_vals[winner]
    winner_name = self.names[winner]

    loser_names = self.names[self.names!=self.names[winner]]
    for lname in loser_names:
        self.qdict[lname].append(self.qdict[lname][-1])

    if rew_t >= self.qdict[winner_name][-1]:
        alpha = self.a_pos
    else:
        alpha = self.a_neg

    Qt = self.updateQ(self.qdict, winner_name, rew_t, alpha)
    self.qdict[winner_name].append(Qt)

    else:  
            
                
            #if np.all(qvals == qvals[0]):
            #    winner = np.random.choice(names)
            #else:
            #    winner = qvals.argmax()
            
