# Effects of Competition on the Illusion of Control

In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
import numpy as np
import pandas as pd
from scipy import stats

### Reading & Preparing Data

In [2]:
def read_results(results_folder, run_id_list, experiment_id_list, model_id_list):
    r_df = pd.DataFrame()
    
    with os.scandir(results_folder) as runs:
        for r in runs:
            if r.is_file():
                continue
            run_id = r.path.split('/')[-1]
            if len(run_id_list) > 0 and not run_id in run_id_list:
                continue

            with os.scandir(r.path) as experiments:
                for e in experiments:
                    if e.is_file():
                        continue
                    experiment_id = e.path.split('/')[-1]
                    if len(experiment_id_list) > 0 and not experiment_id in experiment_id_list:
                        continue

                    with os.scandir(e.path) as models:
                        for m in models:
                            if m.is_file():
                                continue
                            model_id = m.path.split('/')[-1]
                            if len(model_id_list) > 0 and not model_id in model_id_list:
                                continue

                            with os.scandir(m.path) as iterations:
                                for i in iterations:
                                    if i.is_dir():
                                        continue
                                    i_ts = i.path.split('/')[-1].split('.')[0].split('_')[1]

                                    i_df = pd.read_csv(i.path)
                                    
                                    i_df = i_df.drop(i_df.columns[i_df.columns.str.contains('unnamed', case=False)], axis=1)
                                    i_df['run_id'] = run_id
                                    i_df['experiment_id'] = experiment_id
                                    i_df['model_id'] = model_id
                                    i_df['iteration_ts'] = i_ts

                                    if r_df.empty:
                                        r_df = i_df
                                    else:
                                        r_df = pd.concat([r_df, i_df])

    return r_df

In [6]:
run_id_list = ['competition_run']
experiment_id_list = ['competition']
model_id_list = ['gpt4omini']

results_df = read_results('.' + os.getenv('RESULTS_FOLDER'), run_id_list, experiment_id_list, model_id_list)

In [7]:
results_df.isna().sum()

model_id              0
model_name            0
model_provider        0
participant_gender    0
bet_1_raw             0
bet_1                 0
bet_2_raw             0
bet_2                 0
bet_3_raw             0
bet_3                 0
bet_4_raw             0
bet_4                 0
condition_id          0
condition_title       0
experiment_id         0
experiment_title      0
run_id                0
iteration_ts          0
dtype: int64

In [8]:
df = results_df[[
    'run_id', 'experiment_id', 'model_id', 'iteration_ts',
    'participant_gender',
    'condition_id',
    'bet_1', 'bet_2', 'bet_3', 'bet_4'
]].sort_values(by=['run_id', 'experiment_id', 'model_id', 'iteration_ts'])

df.loc[:, 'bet_avg'] = 0.25 * (df['bet_1'] + df['bet_2'] + df['bet_3'] + df['bet_4'])

### Analysis

In [13]:
def build_pivot_table(
    df: pd.DataFrame,
    row_group_columns: list[str],
    column_group_columns: list[str],
    metric_columns: list[str],
    aggregate_funcs: list[str] = ['mean']
):
    return df.pivot_table(index=row_group_columns, columns=column_group_columns, values=metric_columns, aggfunc=aggregate_funcs).round(1)

In [27]:
def build_experiment_table(
    df: pd.DataFrame,
    group_columns: list[str],
    condition_column: str,
    condition_control: str,
    metric_columns: list[str],
):
    results = []

    grouped = df.groupby(group_columns + [condition_column])

    agg = grouped[metric_columns].agg(['mean', 'std', 'count'])
    agg.columns = [
        f'{m}_{stat}' for m, stat in agg.columns.to_flat_index()
    ]
    agg = agg.reset_index()

    control = agg[agg[condition_column] == condition_control].set_index(group_columns)
    test = agg[agg[condition_column] != condition_control]

    for _, row in test.iterrows():
        group_key = tuple(row[c] for c in group_columns)
        base_row = control.loc[group_key]

        out_row = {col: row[col] for col in group_columns}
        out_row[condition_column] = row[condition_column]

        for m in metric_columns:
            mean_control = base_row[f'{m}_mean']
            std_control = base_row[f'{m}_std']
            n_control = base_row[f'{m}_count']

            mean_test = row[f'{m}_mean']
            std_test = row[f'{m}_std']
            n_test = row[f'{m}_count']

            diff = mean_test - mean_control
            r_diff = diff / mean_control if mean_control != 0 else np.nan

            if n_control > 1 and n_test > 1 and std_control > 0 and std_test > 0:
                t_stat, pvalue = stats.ttest_ind_from_stats(
                    mean1=mean_test, std1=std_test, nobs1=n_test,
                    mean2=mean_control, std2=std_control, nobs2=n_control,
                    equal_var=False,
                )
            else:
                t_stat, pvalue = np.nan, np.nan

            # пишем колонки
            out_row[f'{m}_control'] = mean_control
            out_row[f'{m}_value'] = mean_test
            out_row[f'{m}_difference'] = diff
            out_row[f'{m}_r_diff'] = r_diff
            out_row[f'{m}_t_stat'] = t_stat
            out_row[f'{m}_pvalue'] = pvalue

        results.append(out_row)

    result_df = pd.DataFrame(results)
    
    return result_df

In [28]:
experiment_group_columns = ['run_id', 'experiment_id', 'model_id', 'iteration_ts']
slice_group_columns = ['participant_gender']
condition_group_column = 'condition_id'
metric_columns = ['bet_1', 'bet_avg']

In [29]:
build_pivot_table(
    df=df,
    row_group_columns=experiment_group_columns,
    column_group_columns=[condition_group_column],
    metric_columns=metric_columns
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,bet_1,bet_1,bet_avg,bet_avg
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,condition_id,dapper,schnook,dapper,schnook
run_id,experiment_id,model_id,iteration_ts,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
competition_run,competition,gpt4omini,1768073253,11.9,23.9,13.1,16.1
competition_run,competition,gpt4omini,1768073361,10.0,23.3,13.5,16.5
competition_run,competition,gpt4omini,1768073472,11.7,20.6,13.9,15.3
competition_run,competition,gpt4omini,1768073579,11.9,22.5,13.1,16.7
competition_run,competition,gpt4omini,1768073694,11.1,22.2,12.6,15.9
competition_run,competition,gpt4omini,1768073805,10.7,23.3,12.9,16.6
competition_run,competition,gpt4omini,1768073910,10.9,22.5,13.1,16.7
competition_run,competition,gpt4omini,1768074027,11.4,22.2,13.1,16.7
competition_run,competition,gpt4omini,1768074129,11.4,22.5,13.4,16.4
competition_run,competition,gpt4omini,1768074230,15.0,23.1,13.9,16.5


In [30]:
build_experiment_table(
    df=df,
    group_columns=experiment_group_columns,
    condition_column=condition_group_column,
    condition_control='dapper',
    metric_columns=metric_columns
)

Unnamed: 0,run_id,experiment_id,model_id,iteration_ts,condition_id,bet_1_control,bet_1_value,bet_1_difference,bet_1_r_diff,bet_1_t_stat,bet_1_pvalue,bet_avg_control,bet_avg_value,bet_avg_difference,bet_avg_r_diff,bet_avg_t_stat,bet_avg_pvalue
0,competition_run,competition,gpt4omini,1768073253,schnook,11.944444,23.888889,11.944444,1.0,7.708415,1.430849e-08,13.055556,16.111111,3.055556,0.234043,4.452696,0.0001110623
1,competition_run,competition,gpt4omini,1768073361,schnook,10.0,23.333333,13.333333,1.333333,7.567242,1.971081e-08,13.472222,16.458333,2.986111,0.221649,4.570089,6.203066e-05
2,competition_run,competition,gpt4omini,1768073472,schnook,11.666667,20.555556,8.888889,0.761905,4.321818,0.0001327581,13.888889,15.277778,1.388889,0.1,1.712026,0.09609755
3,competition_run,competition,gpt4omini,1768073579,schnook,11.944444,22.5,10.555556,0.883721,6.262113,5.600362e-07,13.055556,16.666667,3.611111,0.276596,4.566905,0.0001019444
4,competition_run,competition,gpt4omini,1768073694,schnook,11.111111,22.222222,11.111111,1.0,6.920067,6.55532e-08,12.638889,15.902778,3.263889,0.258242,5.126326,2.133789e-05
5,competition_run,competition,gpt4omini,1768073805,schnook,10.666667,23.333333,12.666667,1.1875,7.853559,4.30841e-09,12.944444,16.597222,3.652778,0.282189,7.265924,4.219867e-08
6,competition_run,competition,gpt4omini,1768073910,schnook,10.944444,22.5,11.555556,1.055838,7.035705,4.359375e-08,13.083333,16.666667,3.583333,0.273885,6.017735,8.914159e-07
7,competition_run,competition,gpt4omini,1768074027,schnook,11.388889,22.222222,10.833333,0.95122,5.467402,4.689785e-06,13.055556,16.666667,3.611111,0.276596,6.108329,6.967848e-07
8,competition_run,competition,gpt4omini,1768074129,schnook,11.388889,22.5,11.111111,0.97561,6.137844,6.462616e-07,13.402778,16.388889,2.986111,0.222798,4.419925,9.785239e-05
9,competition_run,competition,gpt4omini,1768074230,schnook,15.0,23.055556,8.055556,0.537037,3.897873,0.0005102682,13.888889,16.527778,2.638889,0.19,3.713625,0.0007510591
