# Replicating fig 2 graphs with the curves of the RL model with different richness and the foraging model for different thresholds

I just realized that I shouldn't had calculated discriminability, richness, and delta reward using the values perceived by the human (agent, I should have used the word agent, but it's too late for that), but the actual environment values. So that's what we're gonna do now

## Import the data we've been using to try to replicate it with a function

In [1]:
import random
import numpy as np
import pandas as pd
from scipy import interpolate


In [None]:

# Read the CSV file. Option 1 is red, Option 2 is blue
df_red = pd.read_csv('../path_trace_foraging_red.csv')
df_blue = pd.read_csv('../path_trace_foraging_blue.csv')

# Create probability functions with interpolation for both options
p_red = interpolate.interp1d(df_red['trial'], df_red['p'], kind='linear', fill_value='extrapolate')
p_blue = interpolate.interp1d(df_blue['trial'], df_blue['p'], kind='linear', fill_value='extrapolate')


# Simulation

In [2]:
# num_episodes = 10000
max_steps = 300
k = 2

n_humans = 400
p_red_human = np.empty((max_steps,n_humans))
p_blue_human = np.empty((max_steps,n_humans))
humans_perceived_values = np.empty((2,max_steps,n_humans))
humans_choices = np.empty((max_steps,n_humans))
chosen_options = np.empty((max_steps))
chosen_options[:] = np.nan
rewards = np.empty((max_steps))
rewards[:] = np.nan

q_table = np.zeros((k,max_steps)) #normally it would be an array of shape (n_states, k), and we would just update the value in place,
# without storing the previous values. But here we don't have states, and we do care about 
# storing the values for each time step so we can plot it later, so we do it like this

#  Since on the paper it says that participants got to know at the beginning if the decks where good, bad, or mediocre, the RL algorithm
# can have also that information through the bias (the first q_value for each deck).But interestingly they do not do that on the paper,
# they give both options an initial value of 0.5
# q_table[0,0] = 1
# q_table[1,0] = 0
q_table[:,0] = 0.5

# v = np.zeros((n_decks,max_pulls))
def choose_option(time, beta, human, rng):
    p_choose_red = 1/(1 + np.exp(-beta*(q_table[0,time]-q_table[1,time])))
    if rng.uniform(0,1) < p_choose_red:
        return 0, p_red_human[time,human], 1
    else:
        return 1, p_blue_human[time,human], 0
    


In [3]:
def get_reward(probability, rng):
    if rng.uniform(0,1) < probability:
        return 1
    else:
        return 0


In [10]:
alpha = 0.65
beta = 3
hazard_rate = 0.1
rng = np.random.default_rng()
for human in range(n_humans):
    p_red_human[0,human] = 0.5
    p_blue_human[0,human] = 0.5
    for step in range(max_steps):
        chosen_option, option_prob_value, unchosen_option = choose_option(step, beta, human, rng)
        reward = get_reward(option_prob_value, rng)
        rewards[step] = reward
        chosen_options[step] = chosen_option
        if step+1 == max_steps:
            break
        q_table[chosen_option,step+1] = q_table[chosen_option,step]+alpha*(reward -q_table[chosen_option,step])
        q_table[unchosen_option,step+1] = q_table[unchosen_option,step]
        delta_p = rng.choice([-0.1,0.1]) if rng.uniform(0,1) < hazard_rate else 0
        p_red_human[step+1,human] = max(min(p_red_human[step,human]+delta_p,1.0),0)
        delta_p = rng.choice([-0.1,0.1]) if rng.uniform(0,1) < hazard_rate else 0
        p_blue_human[step+1,human] = max(min(p_blue_human[step,human]+delta_p,1.0),0)
    humans_perceived_values[:,:,human] = q_table
    humans_choices[:,human] = chosen_options

In [12]:
local_richness = abs(q_table[0,:] + q_table[1,:])
local_discriminability = np.divide(abs(q_table[0,:] - q_table[1,:]), local_richness)
fig = go.Figure()
fig.add_trace(go.Scatter(x=local_richness,y=local_discriminability,name="A", mode='markers', opacity=1, line={"color":"#C3BD64"}))
fig.show()

In [11]:
fig1 = create_fig_of_RL_experiment(humans_choices[:,30], humans_perceived_values[:,:,30], p_red_human[:,30], p_blue_human[:,30])
fig2 = create_fig_of_RL_experiment(humans_choices[:,0], humans_perceived_values[:,:,0], p_red_human[:,0], p_blue_human[:,0])
fig1.show()
fig2.show()

In [7]:
import plotly.graph_objects as go
def create_fig_of_RL_experiment(chosen_options, q_table, p_red_human, p_blue_human, p_red=[], p_blue=[]):
    fig = go.Figure()

    #add original probabilities
    x = np.linspace(1,300,300)
    if p_red != []:
        y = p_red(x)
        fig.add_trace(go.Scatter(x=x,y=y/100.0, name="Option 1 Fixed", mode='lines', opacity=0.5,line=dict(color="#C37364", dash='dot')))
    if p_blue != []:
        y2 = p_blue(x)
        fig.add_trace(go.Scatter(x=x,y=y2/100.0, name="Option 2 Fixed", mode='lines', opacity=0.5, line=dict(color="#136EAC", dash='dot')))
    
    y = p_red_human
    y2 = p_blue_human
    fig.add_trace(go.Scatter(x=x,y=y, name="Option 1 Ideal", mode='lines', opacity=0.5, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=y2, name="Option 2 Ideal", mode='lines', opacity=0.5, line={"color":"#136EAC"}))
    
    # fig.add_trace(go.Scatter(x=x,y=y/100.0, name="Option 1 Ideal", mode='lines', dash="dasg" opacity=0.5, line={"color":"#C37364"}))
    # fig.add_trace(go.Scatter(x=x,y=y2/100.0, name="Option 2 Ideal", mode='lines', opacity=0.5, line={"color":"#136EAC"}))

    #add simple RL model
    fig.add_trace(go.Scatter(x=x,y=q_table[0,:], name="Option 1 Perceived", mode='lines', opacity=1, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=q_table[1,:], name="Option 2 Perceived", mode='lines', opacity=1, line={"color":"#136EAC"}))
    
    # add delta reward
    delta_reward = abs(p_red_human - p_blue_human)
    fig.add_trace(go.Scatter(x=x,y=delta_reward, name="Delta Reward", mode='lines', opacity=1, line={"color":"#BC14B9"}))

    # add richness
    richness = p_red_human + p_blue_human
    fig.add_trace(go.Scatter(x=x,y=richness, name="Richness", mode='lines', opacity=1, line={"color":"#BCAE14"}))
    
    # add discriminability
    discriminability = np.divide( 
        delta_reward,
        richness,
        where=richness != 0,
        out=np.zeros_like(richness)
    )
    fig.add_trace(go.Scatter(x=x,y=discriminability, name="Discriinability", mode='lines', opacity=1, line={"color":"#4414BC"}))

    # Add reward outcome points at the top
    chosen_options_y_red = np.ones(len(chosen_options)) * 1.075  # Slightly above 1
    chosen_options_y_blue = np.ones(len(chosen_options)) * 1.05  # Slightly above 1
    chosen_options_x = np.arange(1, len(chosen_options) + 1)

    # Red dots (reward = 0)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 0],
        y=chosen_options_y_red[chosen_options == 0],
        mode='markers',
        marker=dict(color='#C37364', size=6),
        name='Chosen option: Red'
    ))

    # Blue dots (reward = 1)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 1],
        y=chosen_options_y_blue[chosen_options == 1],
        mode='markers',
        marker=dict(color='#136EAC', size=6),
        name='Chosen option: Blue'
    ))

    # fig.update_layout(yaxis=dict(range=[0, 1.15]))  # Adjust y-axis to fit dots
    return fig

# Now, let's try to recreate the graph:

In [14]:
humans_discriminability = np.empty((max_steps,n_humans))
humans_richness = np.empty((max_steps,n_humans))
humans_delta_reward = np.empty((max_steps,n_humans))
for human in range(n_humans):
    humans_richness[:,human] = (p_red_human[:,human] + p_blue_human[:,human])
    humans_delta_reward[:,human] = abs(p_red_human[:,human] - p_blue_human[:,human])
    humans_discriminability[:,human] = np.divide( 
        humans_delta_reward[:,human],
        humans_richness[:,human],
        out=np.zeros_like(humans_delta_reward[:,human]),
        where=humans_richness[:,human] != 0
        )

In [15]:
fig_richness_discriminability2 = go.Figure()
for human in range(50):
    fig_richness_discriminability2.add_trace(go.Scatter(x=humans_richness[:,human],y=humans_discriminability[:,human],
                                                   name="A", mode='markers', opacity=1, line={"color":"#C3BD64"}))
# fig_richness_discriminability2.update_layout(yaxis=dict(range=[0, 2]),xaxis=dict(range=[0,2]))  # Adjust y-axis to fit dots
fig_richness_discriminability2.show()

In [16]:
switch_per_human = np.empty((max_steps,n_humans,4))
for human in range(n_humans):
    switch_per_human[:,human,0] = humans_delta_reward[:,human] < 0.05
    switch_per_human[:,human,1] = humans_delta_reward[:,human] < 0.1
    switch_per_human[:,human,2] = humans_delta_reward[:,human] < 0.2
    switch_per_human[:,human,3] = humans_delta_reward[:,human] < 0.4

In [17]:
print(
    sum(switch_per_human[:,1,0]),
    sum(switch_per_human[:,1,1]),
    sum(switch_per_human[:,1,2]),
    sum(switch_per_human[:,1,3])
    )

13.0 21.0 35.0 136.0


In [18]:
fig1 = create_fig_of_RL_experiment_with_switch(humans_choices[:,1], humans_perceived_values[:,:,1], p_red, p_blue, switch_per_human[:,1,3], humans_delta_reward[:,1])
fig1.show()

NameError: name 'create_fig_of_RL_experiment_with_switch' is not defined

In [19]:
hist = go.Figure()
hist.add_histogram(x=humans_discriminability[:,1], y=switch_per_human[:,1,3], nbinsx=10, histfunc='avg')
hist.show()
hist2 = go.Figure()
hist2.add_histogram(x=humans_richness[:,1], y=switch_per_human[:,1,3], nbinsx=10, histfunc='avg')
hist2.show()

In [None]:

# Bin discriminability for plotting average p(switch)
bins = np.linspace(0, 1.049, 11)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
binned_switch = []

thresholds = [0.05, 0.1, 0.2, 0.4]  # Thresholds for switching
human = 1
for i, t in enumerate(thresholds):
    digitized = np.digitize(humans_discriminability[:,human], bins)
    bin_means = [switch_per_human[digitized == j,human,i].mean() if np.any(digitized == j) else np.nan for j in range(1, len(bins))]
    binned_switch.append(bin_means)
fig = go.Figure()

colors = ['goldenrod', 'darkorange', 'lightseagreen', 'teal']
for i, t in enumerate(thresholds):
    fig.add_trace(go.Scatter(
        x=bin_centers,
        y=binned_switch[i],
        mode='lines+markers',
        name=f'Δ reward < {t}',
        line=dict(color=colors[i]),
        marker=dict(size=6)
    ))

fig.update_layout(
    title='Compare-Alternatives Model: p(switch) vs Discriminability',
    xaxis_title='Discriminability',
    yaxis_title='p(switch)',
    legend_title='Thresholds',
    template='plotly_white'
)

fig.show()

In [None]:

# Bin richness for plotting average p(switch)
bins = np.linspace(0, 2, 11)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
binned_switch = []

thresholds = [0.05, 0.1, 0.2, 0.4]  # Thresholds for switching
human = 1
for i, t in enumerate(thresholds):
    digitized = np.digitize(humans_richness[:,human], bins)
    bin_means = [switch_per_human[digitized == j,human,i].mean() if np.any(digitized == j) else np.nan for j in range(1, len(bins))]
    binned_switch.append(bin_means)
fig = go.Figure()

colors = ['goldenrod', 'darkorange', 'lightseagreen', 'teal']
for i, t in enumerate(thresholds):
    fig.add_trace(go.Scatter(
        x=bin_centers,
        y=binned_switch[i],
        mode='lines+markers',
        name=f'Δ reward < {t}',
        line=dict(color=colors[i]),
        marker=dict(size=6)
    ))

fig.update_layout(
    title='Compare-Alternatives Model: p(switch) vs richness',
    xaxis_title='richness',
    yaxis_title='p(switch)',
    legend_title='Thresholds',
    template='plotly_white'
)

fig.show()

# Now trying with all the values

In [20]:
# Bin discriminability for plotting average p(switch)
bins = np.linspace(-0.05, 1.05, 12)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
binned_switch = []

thresholds = [0.05, 0.1, 0.2, 0.4]  # Thresholds for switching
for i, t in enumerate(thresholds):
    digitized = np.digitize(humans_discriminability[:,:], bins)
    bin_means = [switch_per_human[digitized == j,i].mean() if np.any(digitized == j) else np.nan for j in range(1, len(bins))]
    binned_switch.append(bin_means)
fig = go.Figure()

colors = ['goldenrod', 'darkorange', 'lightseagreen', 'teal']
for i, t in enumerate(thresholds):
    fig.add_trace(go.Scatter(
        x=bin_centers,
        y=binned_switch[i],
        mode='lines+markers',
        name=f'Δ reward < {t}',
        line=dict(color=colors[i]),
        marker=dict(size=6)
    ))

fig.update_layout(
    title='Compare-Alternatives Model: p(switch) vs Discriminability',
    xaxis_title='Discriminability',
    yaxis_title='p(switch)',
    legend_title='Thresholds',
    template='plotly_white'
)

fig.show()

In [21]:

# Bin richness for plotting average p(switch)
bins = np.linspace(0, 2, 10)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
binned_switch = []

thresholds = [0.05, 0.1, 0.2, 0.4]  # Thresholds for switching
for i, t in enumerate(thresholds):
    digitized = np.digitize(humans_richness[:,:], bins)
    bin_means = [switch_per_human[digitized == j,i].mean() if np.any(digitized == j) else np.nan for j in range(1, len(bins))]
    binned_switch.append(bin_means)
fig = go.Figure()

colors = ['goldenrod', 'darkorange', 'lightseagreen', 'teal']
for i, t in enumerate(thresholds):
    fig.add_trace(go.Scatter(
        x=bin_centers,
        y=binned_switch[i],
        mode='lines+markers',
        name=f'Δ reward < {t}',
        line=dict(color=colors[i]),
        marker=dict(size=6)
    ))

fig.update_layout(
    title='Compare-Alternatives Model: p(switch) vs richness',
    xaxis_title='richness',
    yaxis_title='p(switch)',
    legend_title='Thresholds',
    template='plotly_white'
)

fig.show()

In [None]:
print(switch_per_human.shape)
print(switch_per_human.reshape(-1,4).shape)
print(np.array_equal( switch_per_human.reshape(-1,4)[:,0].reshape(300,400), switch_per_human[:,:,0]))

(300, 400, 4)
(120000, 4)
True


In [156]:
hist = go.Figure()
hist.add_histogram(x=humans_discriminability.flatten(), y=switch_per_human.reshape(-1,4)[:,3], nbinsx=10, histfunc='avg')
hist.show()

bins = np.linspace(0, 2, 11)
hist2 = go.Figure()
hist2.add_histogram(x=humans_richness.flatten(), y=switch_per_human.reshape(-1,4)[:,3], xbins=dict(start=0,end=2,size=0.2), histfunc='avg')
hist2.show()

In [148]:
a = np.arange(0,27).reshape(3,3,3) - 10
a<0


array([[[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]],

       [[ True, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]]])

In [117]:
my_discriminability = np.divide(abs(humans_perceived_values[0,:,:] - humans_perceived_values[1,:,:]), (humans_perceived_values[0,:,:] + humans_perceived_values[1,:,:]))

In [116]:
np.divide([1,2,3],[4,5,6])

array([0.25, 0.4 , 0.5 ])

In [None]:
fig_p_switch_discriminability2 = go.Figure()
for human in range(30):
    fig_p_switch_discriminability2.add_trace(go.Scatter(x=humans_richness[:,human],y=my_discriminability[:,human],
                                                   name="A", mode='markers', opacity=1, line={"color":"#C3BD64"}))
fig_p_switch_discriminability2.show()

In [114]:
p_switch_per_human = np.empty((max_steps,n_humans,4))
for human in range(n_humans):
    p_switch_per_human[:,human,0] = sum(my_discriminability[:,human] < 0.05)/len(my_discriminability[:,human])
    p_switch_per_human[:,human,1] = sum(my_discriminability[:,human] < 0.1)/len(my_discriminability[:,human])
    p_switch_per_human[:,human,2] = sum(my_discriminability[:,human] < 0.2)/len(my_discriminability[:,human])
    p_switch_per_human[:,human,3] = sum(my_discriminability[:,human] < 0.4)/len(my_discriminability[:,human])

In [None]:
fig_p_switch = go.Figure()
for p in range(4):
    for human in range(1):
        fig_p_switch_discriminability2.add_trace(go.Scatter(x=humans_richness[:,human],y=p_switch_per_human[:,human,p],
                                                   name="A", mode='markers', opacity=1, line={"color":"#C3BD64"}))
fig_p_switch_discriminability2.show()

In [47]:

delta_reward_probability1 = q_table[0,:100]-q_table[1,:100]
delta_reward_probability2 = q_table[0,:200]-q_table[1,:200]
delta_reward_probability3 = q_table[0,:300]-q_table[1,:300]
discriminability1 = get_discriminability(numbers1)
discriminability2 = get_discriminability(numbers1)
discriminability3 = get_discriminability(numbers1)
richness1 = np.sum(q_table[0,:100]+q_table[1,:100])
richness2 = np.sum(q_table[0,:200]+q_table[1,:200])
richness3 = np.sum(q_table[0,:300]+q_table[1,:300])

In [29]:
get_discriminability(q_table[1,:100])

0.016267211474063712

In [33]:
l = [0, 1, 1]
display(get_discriminability(l))

0.5

2

In [65]:
import plotly.graph_objects as go
def create_fig_of_RL_experiment_with_switch(chosen_options, q_table, p_red, p_blue, switches, delta_reward):
    fig = go.Figure()

    #add original probabilities
    x = np.linspace(1,300,300)
    y = p_red(x)
    y2 = p_blue(x)
    fig.add_trace(go.Scatter(x=x,y=y/100.0, name="Option 1 Ideal", mode='lines', opacity=0.5, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=y2/100.0, name="Option 2 Ideal", mode='lines', opacity=0.5, line={"color":"#136EAC"}))

    #add simple RL model
    fig.add_trace(go.Scatter(x=x,y=q_table[0,:], name="Option 1 Perceived", mode='lines', opacity=1, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=q_table[1,:], name="Option 2 Perceived", mode='lines', opacity=1, line={"color":"#136EAC"}))
    
    fig.add_trace(go.Scatter(x=x,y=delta_reward, name="Delta Reward", mode='lines', opacity=1, line={"color":"#E1C83C"}))

    # Add reward outcome points at the top
    chosen_options_y_switch = np.ones(len(chosen_options)) * 1.1  # Slightly above 1
    chosen_options_y_red = np.ones(len(chosen_options)) * 1.075  # Slightly above 1
    chosen_options_y_blue = np.ones(len(chosen_options)) * 1.05  # Slightly above 1
    chosen_options_x = np.arange(1, len(chosen_options) + 1)

    # Switches
    fig.add_trace(go.Scatter(
        x=chosen_options_x[switches == 1],
        y=chosen_options_y_switch[switches == 1],
        mode='markers',
        marker=dict(color="#E1C83C", size=6),
        name='Switch occurred'
    ))

    # Red dots (reward = 0)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 0],
        y=chosen_options_y_red[chosen_options == 0],
        mode='markers',
        marker=dict(color='#C37364', size=6),
        name='Chosen option: Red'
    ))

    # Blue dots (reward = 1)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 1],
        y=chosen_options_y_blue[chosen_options == 1],
        mode='markers',
        marker=dict(color='#136EAC', size=6),
        name='Chosen option: Blue'
    ))

    fig.update_layout(yaxis=dict(range=[0, 1.25]))  # Adjust y-axis to fit dots
    return fig