#Fit two step task with an associative algorithm

In [1]:
import os
import numpy as np
import glob
import csv
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats, optimize
from pandas import DataFrame, Series
import seaborn as sns
import random as rd
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import scipy.stats
import patsy
from scipy.optimize import minimize
from scipy.optimize import basinhopping
from sklearn import linear_model
import multiprocessing
import random
from scipy.stats import norm
from scipy.stats import beta
##Code for analysis of fMRI experiment

In [158]:
ntrials = 200
alpha = .2

#initialize data structures
objects = ['a1','a2','b1','b2','c1','c2']
states = ['a','b','c','terminal']
actions = ['1','2']

#Initialize transition, rewards, values matrics
transitions = {}
rewards = {}
V = {}
associations = {}
for s in states:
    transitions[s] = {}
    rewards[s] = {}
    for a in actions:
        transitions[s][a] = {}
        rewards[s][a] = 0
for o in objects:
    V[o] = 0
    associations[o] = {}

for o1 in associations:
    for o2 in objects:
        if o1 != o2: #avoid self associations
            associations[o1][o2] = 0

#fill in transition probs
for s1 in states:
    for a in actions:
        for s2 in states:
            transitions[s1][a][s2] = 0         
transitions['b']['1']['terminal'] = 1
transitions['b']['2']['terminal'] = 1
transitions['c']['1']['terminal'] = 1
transitions['c']['2']['terminal'] = 1
transitions['a']['1']['b'] = .7
transitions['a']['1']['c'] = .3
transitions['a']['2']['b'] = .3
transitions['a']['2']['c'] = .7

#set up reward probs
rewards['b']['1'] = .6
rewards['b']['2'] = .4
rewards['c']['1'] = .4
rewards['c']['2'] = .6

In [126]:
#gradually shift reward probabilitites to encourage learning
def update_rewards(rewards):
    for s in ['b','c']: #only update end states
        for a in actions:
            shift = np.random.normal(0,.025)
            if (rewards[s][a] + shift > .75) or (rewards[s][a] + shift < .25): #reflecting boundaries
                rewards[s][a] = rewards[s][a] - shift
            else:
                rewards[s][a] = rewards[s][a] + shift
    return rewards

In [127]:
def get_reward(state,action,rewards):
    return scipy.stats.bernoulli.rvs(rewards[state][action])

In [128]:
def next_state(state,action):
    probs = map(lambda x: transitions[state][action][x], states)
    return np.random.choice(a=states,p=probs)

In [129]:
def get_action(state):
    return actions[0]

In [163]:
def update_associations(state,new_state,action,associations):
    if new_state != 'terminal':
        for a in actions:
            associations[state + action][new_state + a] = associations[state + action][new_state + a] + 1
            associations[new_state + a][state + action] = associations[state + action][new_state + a] #make symmetric
    
    #normalize associations
    total_strength = 0
    for o1 in objects:
        for o2 in objects:
            if o1 != o2:
                total_strength += associations[o1][o2]
    for o1 in objects:
        for o2 in objects:
            if o1 != o2:
                associations[o1][o2] = associations[o1][o2] / (total_strength*1.0)
    return associations

In [164]:
def update_value(rew,state,action,V,associations):
    delta = rew - V[state+action]
    V[state+action] += alpha*delta
    
    #percolate value one step back, weighted by the strength of association
    for o in associations[state+action]:
        delta = rew - V[o]
        V[o] = V[o] + associations[state+action][o] * alpha* delta
    
    for o1 in associations[state+action]:
        for o2 in associations[o1]:
            if o1 != o2:
                delta = rew - V[o2]
                V[o2] = V[o2] + associations[state+action][o] * associations[o1][o2]* alpha* delta
    return V

In [179]:
#run trial
def take_step(state,rewards,associations):
    if state == 'terminal': #end state
        rewards = update_rewards(rewards)
        return rewards
    
    #do standard MDP stuff
    action = get_action(state)
    new_state = next_state(state,action)
    rew = get_reward(state,action,rewards)

    #update values and associations
    associations = update_associations(state,new_state,action,associations) #update associations
    value = update_value(rew,state,action,V,associations)
    
    take_step(new_state,rewards,associations)
    
    return V
take_step('a',rewards,associations)

{'a1': 0.09482371585115376,
 'a2': 0.0,
 'b1': 0.3664965207688938,
 'b2': 0.0,
 'c1': 0.31561346899170006,
 'c2': 0.0}