# RL Extension 1: different learning rates

In [1]:
import gymnasium as gym
import random
import numpy as np
import pandas as pd
from scipy import interpolate

# Read the CSV file. Option 1 is red, Option 2 is blue
df_red = pd.read_csv('../path_trace_foraging_red.csv')
df_blue = pd.read_csv('../path_trace_foraging_blue.csv')

# Create probability functions with interpolation for both options
p_red = interpolate.interp1d(df_red['trial'], df_red['p'], kind='linear', fill_value='extrapolate')
p_blue = interpolate.interp1d(df_blue['trial'], df_blue['p'], kind='linear', fill_value='extrapolate')


In [2]:
# num_episodes = 10000
max_steps = 300
k = 2

chosen_options_wl = np.empty((max_steps))
chosen_options_wl[:] = np.nan
rewards_wl = np.empty((max_steps))
rewards_wl[:] = np.nan

q_table_wl = np.zeros((k,max_steps))
q_table_wl[:,0] = 0.5

# v = np.zeros((n_decks,max_pulls))
def choose_option(time, beta, rng):
    p_choose_red = 1/(1 + np.exp(-beta*(q_table_wl[0,time]-q_table_wl[1,time])))
    if rng.uniform(0,1) < p_choose_red:
        return 0, p_red(time)/100.0, 1
    else:
        return 1, p_blue(time)/100.0, 0
    


In [6]:
def get_reward(probability, rng):
    if rng.uniform(0,1) < probability:
        return 1
    else:
        return 0


In [None]:
fig_before_wl = create_fig_of_RL_experiment(chosen_options_wl, q_table_wl, p_red, p_blue)
alpha_win = 0.65
alpha_lose = 0.4
beta_wl = 3
rng_wl = np.random.default_rng()
for step in range(max_steps):
    chosen_option, option_prob_value, unchosen_option = choose_option(step, beta_wl, rng_wl)
    reward = get_reward(option_prob_value, rng_wl)
    rewards_wl[step] = reward
    chosen_options_wl[step] = chosen_option
    if step+1 == max_steps:
        break
    if reward == 1:
        q_table_wl[chosen_option,step+1] = q_table_wl[chosen_option,step]+alpha_win*(reward - q_table_wl[chosen_option,step])
    else:
        q_table_wl[chosen_option,step+1] = q_table_wl[chosen_option,step]+alpha_lose*(reward - q_table_wl[chosen_option,step])
    q_table_wl[unchosen_option,step+1] = q_table_wl[unchosen_option,step]
fig_after_wl = create_fig_of_RL_experiment(chosen_options_wl, q_table_wl, p_red, p_blue)
fig_after_wl.show()
fig_before_wl.show()

In [4]:
import plotly.graph_objects as go
def create_fig_of_RL_experiment(chosen_options, q_table, p_red, p_blue):
    fig = go.Figure()

    #add original probabilities
    x = np.linspace(1,300,300)
    y = p_red(x)
    y2 = p_blue(x)
    fig.add_trace(go.Scatter(x=x,y=y/100.0, name="Option 1 Ideal", mode='lines', opacity=0.5, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=y2/100.0, name="Option 2 Ideal", mode='lines', opacity=0.5, line={"color":"#136EAC"}))

    #add simple RL model
    fig.add_trace(go.Scatter(x=x,y=q_table[0,:], name="Option 1 Perceived", mode='lines', opacity=1, line={"color":"#C37364"}))
    fig.add_trace(go.Scatter(x=x,y=q_table[1,:], name="Option 2 Perceived", mode='lines', opacity=1, line={"color":"#136EAC"}))

    # Add reward outcome points at the top
    chosen_options_y_red = np.ones(len(chosen_options)) * 1.075  # Slightly above 1
    chosen_options_y_blue = np.ones(len(chosen_options)) * 1.05  # Slightly above 1
    chosen_options_x = np.arange(1, len(chosen_options) + 1)

    # Red dots (reward = 0)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 0],
        y=chosen_options_y_red[chosen_options == 0],
        mode='markers',
        marker=dict(color='#C37364', size=6),
        name='Chosen option: Red'
    ))

    # Blue dots (reward = 1)
    fig.add_trace(go.Scatter(
        x=chosen_options_x[chosen_options == 1],
        y=chosen_options_y_blue[chosen_options == 1],
        mode='markers',
        marker=dict(color='#136EAC', size=6),
        name='Chosen option: Blue'
    ))

    fig.update_layout(yaxis=dict(range=[0, 1.15]))  # Adjust y-axis to fit dots
    return fig