In [16]:
import pandas as pd
import numpy as np
from ast import literal_eval
from scipy.stats import kruskal, iqr
from collections import defaultdict
import matplotlib.pyplot as plt

In [17]:
lqr_file = "../../data/qualitative_data/lqr_all_situations.csv"
pp_data_file = '../../data/experimental_data/experiment_actions.csv'
model_score_file = "../../data/qualitative_data/all_model_runs_on_situations_canonical.csv"
conditions_file = "../../data/experimental_data/experiment_conditions.csv"
n_pps = 111

In [18]:
# read all the data
df_data = pd.read_csv(pp_data_file)
df_pps = df_data.loc[df_data.groupby('pp_id')['id'].idxmax()]
df_lqr = pd.read_csv(lqr_file)
df_model_scores = pd.read_csv(model_score_file)
df_conditions = pd.read_csv(conditions_file)

In [19]:
df_conditions["initial_endogenous"]

0      [241.0, -127.0, -192.0, 113.0, -224.0]
1        [95.0, -108.0, -140.0, 62.0, -173.0]
2         [-84.0, 219.0, 125.0, -84.0, 132.0]
3             [-88.0, 3.0, 58.0, 82.0, 231.0]
4             [-32.0, 39.0, 75.0, 175.0, 6.0]
                        ...                  
135        [-40.0, -43.0, -35.0, 199.0, 67.0]
136       [-184.0, 235.0, 38.0, -237.0, 65.0]
137      [-122.0, 71.0, -217.0, 131.0, 165.0]
138         [106.0, 40.0, -10.0, -191.0, 3.0]
139          [156.0, 99.0, 38.0, -50.0, 56.0]
Name: initial_endogenous, Length: 140, dtype: object

In [20]:
# convert the situations to string
df_conditions['initial_endogenous'] = df_conditions['initial_endogenous'].apply(lambda x: str([int(y) for y in literal_eval(x)]))
# drop all but the situation and lqr score columns
columns_to_keep = ['situation', 'lqr_score']
df_lqr = df_lqr.merge(df_conditions, how='left', left_on='situation', right_on='initial_endogenous')[columns_to_keep]
df_by_pp = df_data.groupby('pp_id').idxmax()  # dataframe with one row per participant

In [21]:
# count the number of people assigned to the easy and informative conditions
n_easy = 0
n_info = 0
for c in df_by_pp['condition']:
    cond = c % 30
    if cond < 10:
        n_easy += 1
    else:
        n_info += 1

In [22]:
n_easy

37

In [23]:
# median human performance
df_pps['final_goal_distance'].median()

116.24646231176244

In [24]:
# number of participants who got within 100 points of the goal
len(df_pps[df_pps['final_goal_distance'] < 100])

44

In [25]:
# compute the root costs
df_pps['root_cost'] = df_pps['total_cost'].apply(np.sqrt)

Get the mean and median costs and root costs for each model type

In [26]:
costs = defaultdict(list)
# create lists of all the scores achieved by each agent type
for index, row in df_pps.iterrows():
    # add the human score
    costs['human'].append(np.sqrt(row['total_cost']))
    # add the lqr score
    condition = int(row['condition'])
    costs['lqr'].append(df_lqr.loc[condition % 30]['lqr_score'])
    
# compute the mean and median costs (means get skewed by a few outliers. Medians are more meaningful.)
avg_costs, med_costs = {}, {}
for score_type in costs:
    avg_costs[score_type] = np.mean(costs[score_type])
    med_costs[score_type] = np.median(costs[score_type])
for agent_type in df_model_scores["model"].drop_duplicates():
    avg_costs[agent_type] = df_model_scores[df_model_scores["model"] == agent_type]["performance"].mean()
    med_costs[agent_type] = df_model_scores[df_model_scores["model"] == agent_type]["performance"].median()

In [27]:
avg_costs

{'human': 1033.664207131002,
 'lqr': 6.706318696972033,
 'null_model_2': 629.6316683697174,
 'hill_climbing': 166.41983635879137,
 'sparse_max_discrete': 144.8840354815775,
 'sparse_lqr': 143.5613097157767,
 'sparse_max_continuous': 357.4669374598366,
 'null_model_1': 611.7676810539906}

In [28]:
med_costs

{'human': 116.42950656942597,
 'lqr': 6.374889832582812,
 'null_model_2': 505.3941210937507,
 'hill_climbing': 138.05166333818937,
 'sparse_max_discrete': 119.36807647547414,
 'sparse_lqr': 83.78617356356702,
 'sparse_max_continuous': 263.7716172144445,
 'null_model_1': 492.8503046075971}

In [14]:
df_pp_condition = df_pps.merge(df_conditions, left_on='condition', right_on='goal_id')  # merge the condition labels (easy, hard) with the pp data
df_pp_condition['root_cost'] = df_pp_condition['total_cost'].apply(np.sqrt)  # compute the root costs
# print the mean and median score of people in each condition
print(df_pp_condition[df_pp_condition['conditions'] == 'informative']['root_cost'].mean())
print(df_pp_condition[df_pp_condition['conditions'] == 'informative']['root_cost'].median())
print(df_pp_condition[df_pp_condition['conditions'] == 'easy']['root_cost'].mean())
print(df_pp_condition[df_pp_condition['conditions'] == 'easy']['root_cost'].median())

241.03226556653763
116.42950656942597
2440.5859034079267
117.29339106392572


In [15]:
df_pps['condition_name'] = df_pps['condition'].apply(lambda x: "informative" if x % 30 >= 10 else "easy")

Compare the median human score between easy and informative conditions

In [16]:
easy_costs = df_pps[df_pps['condition_name'] == 'easy']['root_cost']
informative_costs = df_pps[df_pps['condition_name'] == 'informative']['root_cost']

In [17]:
print(f"easy median: {easy_costs.median()}")
print(f"informative median: {informative_costs.median()}")

easy median: 117.29339106392572
informative median: 116.42950656942597


In [18]:
kruskal(easy_costs, informative_costs)

KruskalResult(statistic=0.0006036217304199454, pvalue=0.9803989740043368)