In [45]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import scipy.io
from joblib import Memory
import plotly.express as px


In [46]:
# load matlab mat file
memory = Memory(location=".", verbose=0)


@memory.cache
def load_mat(filename):
    mat = scipy.io.loadmat(filename)
    columns = pd.Series([name for name in mat["titles"].dtype.names])
    values = mat["data"]
    column_order = pd.Series([int(x[0][0]) for x in mat["titles"][0][0]]) - 1
    # inverse the order so that the value represetns the column index
    column_order = column_order.argsort()
    df = pd.DataFrame(values, columns=columns[column_order])
    return df


comp = load_mat("../artifacts/2008/competition_data.mat")
est = load_mat("../artifacts/2008/estimation_data.mat")

In [47]:
est.columns

Index(['Id', 'Problem', 'Trial', 'Order', 'High', 'Phigh', 'Low', 'Medium',
       'Choice', 'Payoff'],
      dtype='object')

In [67]:
est_agg = est.groupby(["Problem", "Phigh", "Medium", "High", "Low"])["Choice"].mean().reset_index()
# Create scatter plot
fig = px.scatter(est_agg, x="Phigh", y="Choice", color="Medium", trendline="ols")

# Define axis range to match both axes
axis_min = min(est_agg["Phigh"].min(), est_agg["Choice"].min())
axis_max = max(est_agg["Phigh"].max(), est_agg["Choice"].max())

# Update layout to force square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(l=0, r=0, b=0, t=0, pad=4),
    xaxis=dict(
        scaleanchor="y", scaleratio=1, range=[axis_min, axis_max]  # Ensuring same range
    ),
    yaxis=dict(
        scaleanchor="x", scaleratio=1, range=[axis_min, axis_max]  # Ensuring same range
    ),
)

# Show figure
fig.show()
results = px.get_trendline_results(fig)
print(results["px_fit_results"].values[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.399
Method:                 Least Squares   F-statistic:                     40.21
Date:                Wed, 05 Mar 2025   Prob (F-statistic):           3.71e-08
Time:                        18:45:30   Log-Likelihood:                 34.996
No. Observations:                  60   AIC:                            -65.99
Df Residuals:                      58   BIC:                            -61.80
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2384      0.030      7.827      0.0

In [49]:
import pandas as pd
from glob import glob

folder = "../results/2008/memory_inf"


@memory.cache
def load_llm_results(folder="../results/2008"):
    df_list = []
    for filename in glob(f"{folder}/*.csv"):
        df_list.append(pd.read_csv(filename))
    df_llm = pd.concat(df_list)
    return df_llm


df_llm = load_llm_results(folder)

In [56]:
# Create scatter plot
df_llm_agg = (
    df_llm.groupby(["Problem", "Phigh", "Medium"])["Choice"].mean().reset_index()
)

In [57]:
est_agg["player"] = "human"
df_llm_agg["player"] = "llm"
combined = pd.concat([est_agg, df_llm_agg])
fig = px.scatter(
    combined,
    x="Phigh",
    y="Choice",
    color="Medium",
    trendline="ols",
    facet_col="player",
    trendline_color_override="red",
    width=800,
    height=500,
    labels={"Choice": "P(Risky)", "Phigh": "Pr(High)"}
)
results = px.get_trendline_results(fig)
stats0 = results["px_fit_results"].values[0]
stats1 = results["px_fit_results"].values[1]
# add the slop and intercept of the trendline to the plot
fig.add_annotation(
    x=0.5, y=0.5, text=f"Prisky ~ {stats0.params[1]:.2f}*Phigh + {stats0.params[0]:.2f}"
)
fig.add_annotation(
    x=0.5,
    y=0.5,
    text=f"Prisky ~ {stats1.params[1]:.2f}*Phigh + {stats1.params[0]:.2f}",
    col=2,
    row=1,
)

fig.show()

In [59]:
# scatter plot of human (x-axis) vs llm (y-axis)
merged = pd.merge(
    est_agg, df_llm_agg, on=["Problem", "Phigh", "Medium"], suffixes=("_human", "_llm")
)
fig = px.scatter(
    merged,
    x="Choice_human",
    y="Choice_llm",
    trendline="ols",
    width=500,
    height=500,
    labels={"Choice_human": "Human Prisky", "Choice_llm": "LLM Prisky"},
    trendline_color_override="red",
)
# Update layout to force square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    xaxis=dict(scaleanchor="y", scaleratio=1, range=[0, 1]),  # Ensuring same range
    yaxis=dict(scaleanchor="x", scaleratio=1, range=[0, 1]),  # Ensuring same range
)
# add the slop and intercept of the trendline to the plot
results = px.get_trendline_results(fig)
stats = results["px_fit_results"].values[0]
fig.add_annotation(
    x=0.5,
    y=0.5,
    text=f"LLM Prisky ~ {stats.params[1]:.2f}*Human Prisky + {stats.params[0]:.2f}",
)
fig.show()

In [58]:
import pandas as pd
import numpy as np
import plotly.express as px
import scipy.odr as odr

# Merge human and LLM data
merged = pd.merge(
    est_agg, df_llm_agg, on=["Problem", "Phigh", "Medium"], suffixes=("_human", "_llm")
)

x = merged["Choice_human"].values
y = merged["Choice_llm"].values

# Define TLS (Deming regression) model
def linear_func(B, x):
    return B[0] * x + B[1]  # B[0] = slope, B[1] = intercept

# Create ODR model
model = odr.Model(linear_func)
data = odr.Data(x, y)

# Set initial guess from OLS
beta0 = np.polyfit(x, y, 1)  # OLS-based initial guess
odr_instance = odr.ODR(data, model, beta0=beta0)
odr_result = odr_instance.run()

# Extract TLS regression parameters
slope, intercept = odr_result.beta

# Create scatter plot
fig = px.scatter(
    merged,
    x="Choice_human",
    y="Choice_llm",
    width=500,
    height=500,
    labels={"Choice_human": "Human Pr(Risky)", "Choice_llm": "LLM Pr(Risky)"},
)

# Get min and max x values from actual data
x_min, x_max = x.min(), x.max()
y_min, y_max = slope * x_min + intercept, slope * x_max + intercept

# Add TLS regression line using just two points
fig.add_trace(px.line(x=[x_min, x_max], y=[y_min, y_max]).data[0])
fig.data[-1].line.color = "red"

# Add annotation for regression equation
fig.add_annotation(
    x=0.5 * (x_min + x_max),  # Centered annotation
    y=0.5 * (y_min + y_max),
    text=f"LLM Pr(Risky) ~ {slope:.2f} * Human Pr(Risky) + {intercept:.2f}",
)

# Set square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    xaxis=dict(scaleanchor="y", scaleratio=1, range=[x_min, x_max]),
    yaxis=dict(scaleanchor="x", scaleratio=1, range=[y_min, y_max]),
)

fig.show()


In [54]:
# measure primacy effect

from primacy import get_primacy_effect

primacy_effect = get_primacy_effect(est)
llm_primacy_effect = get_primacy_effect(df_llm)
primacy_effect["player"] = "human"
llm_primacy_effect["player"] = "llm"
llm_primacy_concat = pd.concat([primacy_effect, llm_primacy_effect])

fig = px.line(
    llm_primacy_concat,
    x="Trial",
    y="Choice",
    color="First Risky High",
    facet_col="player",
    width=800,
    height=500,
    title="Primacy Effect Human",
    color_discrete_map={True: "blue", False: "red"},
    labels={"Choice": "P(Risky)", "Trial": "Trial"},
)
fig.show()

# TODO:
1. memory cutoff of 20 trials WITHOUT summarization
2. correct logging of summarization so that we can correlate the trace with summarization
3. recency - P(risky | high) - P(risky |low)

In [60]:
# recency - for each risky choice compute the probability the next choice is risky conditioned on the previous choice payoff was high or low
def get_recency(df):
    risky = df["Choice"]==1
    high = df["Payoff"]==df["High"]
    next_choice = df["Choice"].shift(-1)
    risky_given_previous_was_risky = next_choice[risky]
    previous_risky_was_high = high[risky]
    p_risky_given_previous_risky = risky_given_previous_was_risky.groupby(previous_risky_was_high).mean()
    p_risky_given_previous_risky = p_risky_given_previous_risky.reindex([False, True], fill_value=np.nan)
    recency_df = p_risky_given_previous_risky.to_frame().T
    recency_df.columns = ["p risky given previous low", "p risky given previous high"]
    return recency_df

def get_mean_recency(df):
    recency_df = df.groupby(["Problem", "Id"]).apply(get_recency, include_groups=False)
    recency_df = recency_df.groupby("Problem").mean()
    recency_df['recency diff'] = recency_df.diff(axis=1).iloc[:,1]
    df_agg = df[["Problem", "Phigh", "Medium", "High", "Low"]].drop_duplicates()
    recency_df = df_agg.merge(recency_df, on="Problem")
    return recency_df

recency_df = get_mean_recency(est)
llm_recency_df = get_mean_recency(df_llm)
recency_df["player"] = "human"
llm_recency_df["player"] = "llm"
recency_concat = pd.concat([recency_df, llm_recency_df])
fig = px.scatter(
    recency_concat,
    x="p risky given previous low",
    y="p risky given previous high",
    color="Medium",
    facet_col="player",
    width=800,
    height=500,
    title="Recency Effect Human vs. LLM",
    labels={"p risky given previous low": "P(Risky|Previous Low)", "p risky given previous high": "P(Risky|Previous High)"},
)
# add equality trend line on each facet
fig.add_trace(px.line(x=[0, 1], y=[0, 1]).data[0], row=1, col=1)
fig.add_trace(px.line(x=[0, 1], y=[0, 1]).data[0], row=1, col=2)

fig.show()

# box plot of diff in recency effect per player type
fig = px.box(
    recency_concat,
        width=400,
    x="player",
    color="player",
    y="recency diff",
    title="Recency Effect Difference Human vs. LLM",
    labels={"recency diff": "Recency Effect Difference", "player": "Player"},
)
fig.show()

# scatter of recency effect (diff) llm vs human
recency_merge = recency_df.merge(llm_recency_df, on=["Problem"], suffixes=("_human", "_llm"))
fig = px.scatter(
    recency_merge,
    x="recency diff_human",
    y="recency diff_llm",
    width=500,
    height=500,
    labels={"recency diff_human": "Human Recency Effect", "recency diff_llm": "LLM Recency Effect"},
)
fig.show()

In [None]:
# risky vs medium
est_agg["High Positive, Medium Negative"] = (est_agg["High"] > 1) & (est_agg["Medium"] < 0)
est_agg["Risky Std"] = np.sqrt((est_agg["High"]**2 * est_agg["Phigh"]  + est_agg["Low"]**2 * (1-est_agg["Phigh"])) - est_agg["Medium"]**2)
px.box(est_agg, x="High Positive, Medium Negative", y="Choice")

In [74]:
px.scatter(est_agg, x="Medium", y="High", color="High")

In [None]:
est_agg