In [1]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import plotly.express as px
from primacy import get_primacy_effect
import plotly.io as pio
pio.renderers.default = "vscode"


In [2]:
# settings
players_dataset = {
    "Human": "../data/2010/processed/experience_est.csv",
    "AI Summary 20": "../results/2010/data/memory_20.csv",
    "AI Full History": "../results/2010/data/memory_inf.csv",
    }
fig_folder = "../results/2010/figs"

In [3]:
# read data
raw_data = {}
for name, path in players_dataset.items():
    print(f"{name}: {path}")
    df = pd.read_csv(path)
    raw_data[name] = df
    print(df.shape)

Human: ../data/2010/processed/experience_est.csv
(120000, 12)
AI Summary 20: ../results/2010/data/memory_20.csv
(120000, 13)
AI Full History: ../results/2010/data/memory_inf.csv
(120000, 13)


In [5]:
# aggregate risky choice over problems
problem_keys = ["Problem", "Phigh", "Medium", "High", "Low"]
agg_per_problem = {}
for name, df in raw_data.items():
    agg_per_problem[name] = df.groupby(problem_keys)["Choice"].mean().reset_index()

In [6]:
# concatenate data horizontally and vertically
agg_per_problem_concat = pd.concat(agg_per_problem, keys=agg_per_problem.keys(), names=["Player"]).reset_index().drop("level_1", axis=1)

In [7]:
# plot scatter of P(risky) vs P(high)
fig = px.scatter(
    agg_per_problem_concat,
    x="Phigh",
    y="Choice",
    color="Medium",
    trendline="ols",
    facet_col="Player",
    trendline_color_override="black",
    width=500*2.9,
    height=500,
    labels={"Choice": "Pr(Risky)", "Phigh": "Pr(High)"}
)
# make plots square [0,1], [0,1]
fig.update_xaxes(range=[0, 1])
fig.update_yaxes(range=[0, 1])
# aspect ratio 1
fig.update_layout(
    width=400*2.9,
    height=400,
)
# change trend lines to dotted
for i in range(len(fig.data)):
    fig.data[i].line.dash = "dash"
results = px.get_trendline_results(fig)
print(results)
for i, stat in enumerate(results["px_fit_results"]):
    # add the slop and intercept of the trendline to the plot
    x = 0.5
    y = x * stat.params[1] + stat.params[0]
    p_value_slope = stat.pvalues[1]
    p_value_slope_str = f"{p_value_slope:0.0e}".replace("e-0", r"·10<sup>^-") + "</sup>"
    fig.add_annotation(
        x=x, y=y, 
        text=f"~{stat.params[1]:.2f}*x + {stat.params[0]:.2f}<br>R^2={stat.rsquared_adj:.2f}<br>p-value={p_value_slope_str}", 
        row=1, col=i+1, 
        yshift=0, showarrow=True, arrowhead=2, ay=-100
    )


fig.show()
fig.write_image(f"{fig_folder}/p_risky_vs_phigh_and_player.png")

            Player                                     px_fit_results
0            Human  <statsmodels.regression.linear_model.Regressio...
1    AI Summary 20  <statsmodels.regression.linear_model.Regressio...
2  AI Full History  <statsmodels.regression.linear_model.Regressio...


In [8]:
# pair wise plot of risky probability 
agg_per_problem_merged = agg_per_problem_concat.pivot(index=problem_keys, columns=["Player"], values="Choice").reset_index()
fig = px.scatter_matrix(agg_per_problem_merged, dimensions=agg_per_problem.keys(), color="Medium", width=500, height=500)
# change showupperhalf=False, # remove plots on diagonal
fig.update_traces(diagonal_visible=False)
fig.update_traces(showupperhalf=False,)
# add equality line in all facets
refs = [ ('x2', 'y3'), ('x1','y2'), ('x1','y3')]
for ref in refs:
    fig.add_shape(type="line", xref=ref[0], yref=ref[1], x0=0, x1=1, y0=0, y1=1, line=dict(color="black", width=1, dash="dash"))

fig.show()
fig.write_image(f"{fig_folder}/p_risky_pairwise_players.png")


In [None]:
# hypothesis test of dependene between risky probability choice and medium sign per player
from scipy.stats import ttest_ind
# order test non parameteic
from scipy.stats import mannwhitneyu
for player, df in agg_per_problem.items():
    print(f"\n\n{player}")
    test = mannwhitneyu(df.query("Medium > 0")["Choice"], df.query("Medium < 0")["Choice"])
    effect_size = df.query("Medium > 0")["Choice"].mean() - df.query("Medium < 0")["Choice"].mean()
    print(test.pvalue, effect_size)
    
# visualzie the effect above
agg_per_problem_concat["Medium Positive"] = agg_per_problem_concat["Medium"] > 0
fig = px.box(agg_per_problem_concat, x="Player", y="Choice", color="Medium Positive", width=500, height=500, labels={"Choice": "Pr(Risky)"}, color_discrete_map={True: "red", False: "blue"})
# make y range [0,1]
fig.update_yaxes(range=[0, 1])
# add * astrick to the plot when p-value is significant
for i, player in enumerate(agg_per_problem.keys()):
    test = mannwhitneyu(agg_per_problem[player].query("Medium > 0")["Choice"], agg_per_problem[player].query("Medium < 0")["Choice"])
    if test.pvalue < 0.05:
        fig.add_annotation(x=i, y=0.97, text="*", showarrow=False, font_size=15)
        # small horizontal line between the two bars under the p-value astrick
        fig.add_shape(type="line", x0=i-0.2, x1=i+0.2, y0=0.95, y1=0.95, line=dict(color="black", width=1))
        # two sides of the line should end with a | 
        fig.add_shape(type="line", x0=i-0.2, x1=i-0.2, y0=0.95-0.02, y1=0.95+0.02, line=dict(color="black", width=1))
        fig.add_shape(type="line", x0=i+0.2, x1=i+0.2, y0=0.95-0.02, y1=0.95+0.02, line=dict(color="black", width=1))

fig.show()
fig.write_image(f"{fig_folder}/risky_vs_medium_sign.png")
    



Human
0.008350272464876544 0.12019196428571421


AI Summary 20
0.8939085104221585 -0.056736607142857165


AI Full History
5.039387081116553e-09 -0.22298214285714285


In [12]:
# measure primacy effect
primacy_effect = {}
for name, df in raw_data.items():
    print(f"{name}: {df.shape}")
    primacy_effect[name] = get_primacy_effect(df)

primacy_effect_concat = pd.concat(primacy_effect, keys=primacy_effect.keys(), names=["Player"]).reset_index().drop("level_1", axis=1)
fig = px.line(
    primacy_effect_concat,
    x="Trial",
    y="Choice",
    color="First Risky High",
    facet_col="Player",
    width=800,
    height=500,
    title="Primacy Effect Human",
    color_discrete_map={True: "blue", False: "red"},
    labels={"Choice": "P(Risky)", "Trial": "Trial"},
)
fig.show()
fig.write_image(f"{fig_folder}/primacy_effect_across_players.png")

Human: (120000, 12)
AI Summary 20: (120000, 13)
AI Full History: (120000, 13)


# TODO:
1. memory cutoff of 20 trials WITHOUT summarization
2. correct logging of summarization so that we can correlate the trace with summarization
3. recency - P(risky | high) - P(risky |low)

In [42]:
# recency - for each risky choice compute the probability the next choice is risky conditioned on the previous choice payoff was high or low
from recency import get_mean_recency
recency = {}
for name, df in raw_data.items():
    print(f"{name}: {df.shape}")
    recency[name] = get_mean_recency(df)
recency_concat = pd.concat(recency, keys=recency.keys(), names=["Player"]).reset_index().drop("level_1", axis=1)
fig = px.scatter(
    recency_concat,
    x="p risky given previous low",
    y="p risky given previous high",
    color="Medium",
    facet_col="Player",
    width=800,
    height=400,
    title="Recency Effect Human vs. LLM",
    labels={"p risky given previous low": "P(Risky|Previous Low)", "p risky given previous high": "P(Risky|Previous High)"},
)

# add diagonal line per facet
for i in range(len(fig.data)):
    fig.add_shape(type="line", x0=0, x1=1, y0=0, y1=1, line=dict(color="black", width=1, dash="dash"), row=1, col=i+1)

fig.show()
fig.write_image(f"{fig_folder}/recency_effect_across_players.png")


Human: (120000, 12)
AI Summary 20: (120000, 13)
AI Full History: (120000, 13)


In [52]:
recency_concat.rename(columns={"p risky given previous low": "Previous Low", "p risky given previous high": "Previous High"}, inplace=True)
recency_concat_melted = recency_concat.melt(value_vars = ["Previous Low", "Previous High"], id_vars=["Player", "Medium", "Problem"], value_name="P(Risky)", var_name="Previous Risky Outcome")
fig = px.box(recency_concat_melted, x="Player", y="P(Risky)", color="Previous Risky Outcome",  width=800, height=400, color_discrete_map={"Previous Low": "red", "Previous High": "blue"})
fig.show()
fig.write_image(f"{fig_folder}/recency_effect_across_players_boxplot.png")
