In [None]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import scipy.io
from joblib import Memory
import plotly.express as px


In [2]:
filename = '../artifacts/2010/clicking_comp_dat.txt'

In [3]:
df = pd.read_csv(filename, sep=r'\s+')

In [7]:
#load matlab mat file
memory = Memory(location='.', verbose=0)
@memory.cache
def load_mat(filename):
    mat = scipy.io.loadmat(filename)
    columns = pd.Series([name for name in mat['titles'].dtype.names])
    values = mat['data']
    column_order = pd.Series([int(x[0][0]) for x in mat['titles'][0][0]]) -1
# inverse the order so that the value represetns the column index
    column_order = column_order.argsort()
    df = pd.DataFrame(values, columns=columns[column_order])
    return df
comp = load_mat('../artifacts/2008/competition_data.mat')
est = load_mat('../artifacts/2008/estimation_data.mat')

In [5]:
est.columns

Index(['Id', 'Problem', 'Trial', 'Order', 'High', 'Phigh', 'Low', 'Medium',
       'Choice', 'Payoff'],
      dtype='object')

In [28]:
est_agg = est.groupby(['Problem', 'Phigh', 'Medium'])['Choice'].mean().reset_index()
# Create scatter plot
fig = px.scatter(est_agg, x='Phigh', y='Choice', color='Medium', trendline='ols')

# Define axis range to match both axes
axis_min = min(est_agg["Phigh"].min(), est_agg["Choice"].min())
axis_max = max(est_agg["Phigh"].max(), est_agg["Choice"].max())

# Update layout to force square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(l=0, r=0, b=0, t=0, pad=4),
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[axis_min, axis_max]  # Ensuring same range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[axis_min, axis_max]  # Ensuring same range
    )
)

# Show figure
fig.show()
results = px.get_trendline_results(fig)
print(results['px_fit_results'].values[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.399
Method:                 Least Squares   F-statistic:                     40.21
Date:                Sun, 02 Mar 2025   Prob (F-statistic):           3.71e-08
Time:                        17:49:52   Log-Likelihood:                 34.996
No. Observations:                  60   AIC:                            -65.99
Df Residuals:                      58   BIC:                            -61.80
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2384      0.030      7.827      0.0

In [29]:
import pandas as pd
from glob import glob
folder = '../results/2008'
@memory.cache
def load_llm_results(folder = '../results/2008'):
    df_list = []
    for filename in glob(f'{folder}/*.csv'):
        df_list.append(pd.read_csv(filename))
    df_llm = pd.concat(df_list)
    return df_llm
df_llm = load_llm_results(folder)


In [26]:

# Create scatter plot
df_llm_agg = df_llm.groupby(['Problem', 'Phigh', 'Medium'])['Choice'].mean().reset_index()
fig = px.scatter(df_llm_agg, x='Phigh', y='Choice', color='Medium', trendline="ols",)

# Define axis range to match both axes
axis_min = min(df_llm_agg["Phigh"].min(), df_llm_agg["Choice"].min())
axis_max = max(df_llm_agg["Phigh"].max(), df_llm_agg["Choice"].max())

# Update layout to force square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(l=0, r=0, b=0, t=0, pad=4),
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[axis_min, axis_max]  # Ensuring same range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[axis_min, axis_max]  # Ensuring same range
    )
)

# Show figure
fig.show()
results = px.get_trendline_results(fig)
print(results['px_fit_results'].values[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.214
Model:                            OLS   Adj. R-squared:                  0.200
Method:                 Least Squares   F-statistic:                     15.77
Date:                Sun, 02 Mar 2025   Prob (F-statistic):           0.000200
Time:                        17:49:03   Log-Likelihood:                 7.3973
No. Observations:                  60   AIC:                            -10.79
Df Residuals:                      58   BIC:                            -6.606
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2077      0.048      4.305      0.0

In [68]:
est_agg['player'] = 'human'
df_llm_agg['player'] = 'llm'
combined = pd.concat([est_agg, df_llm_agg])
fig = px.scatter(combined, x='Phigh', y='Choice', trendline="ols", facet_col='player', trendline_color_override='red', width=800, height=500,)
results = px.get_trendline_results(fig)
stats0 = results['px_fit_results'].values[0]
stats1 = results['px_fit_results'].values[1]
# add the slop and intercept of the trendline to the plot 
fig.add_annotation(x=0.5, y=0.5, text=f"Prisky ~ {stats0.params[1]:.2f}*Phigh + {stats0.params[0]:.2f}")
fig.add_annotation(x=0.5, y=0.5, text=f"Prisky ~ {stats1.params[1]:.2f}*Phigh + {stats1.params[0]:.2f}", col=2, row=1)

fig.show()


In [None]:
# scatter plot of human (x-axis) vs llm (y-axis)
merged = pd.merge(est_agg, df_llm_agg, on=['Problem', 'Phigh', 'Medium'], suffixes=('_human', '_llm'))
fig = px.scatter(merged, x='Choice_human', y='Choice_llm', trendline='ols', width=500, height=500, labels={'Choice_human': 'Human Prisky', 'Choice_llm': 'LLM Prisky'})
# Update layout to force square aspect ratio
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(l=0, r=0, b=0, t=0, pad=4),
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[0, 1]  # Ensuring same range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[0, 1]  # Ensuring same range
    )
)
# add the slop and intercept of the trendline to the plot
results = px.get_trendline_results(fig)
stats = results['px_fit_results'].values[0]
fig.add_annotation(x=0.5, y=0.5, text=f"LLM Prisky ~ {stats.params[1]:.2f}*Human Prisky + {stats.params[0]:.2f} R^2={stats.rsquared:.2f}")
fig.show()

In [49]:
stats0.params, stats1.params

(array([0.23839974, 0.28441038]), array([0.20769914, 0.28217784]))

In [50]:
stats0.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.409
Model:,OLS,Adj. R-squared:,0.399
Method:,Least Squares,F-statistic:,40.21
Date:,"Sun, 02 Mar 2025",Prob (F-statistic):,3.71e-08
Time:,17:58:54,Log-Likelihood:,34.996
No. Observations:,60,AIC:,-65.99
Df Residuals:,58,BIC:,-61.8
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.2384,0.030,7.827,0.000,0.177,0.299
x1,0.2844,0.045,6.341,0.000,0.195,0.374

0,1,2,3
Omnibus:,31.619,Durbin-Watson:,2.065
Prob(Omnibus):,0.0,Jarque-Bera (JB):,113.006
Skew:,1.368,Prob(JB):,2.89e-25
Kurtosis:,9.142,Cond. No.,3.4
