In [None]:
import pandas as pd
import numpy as np
from ast import literal_eval
from scipy.stats import kruskal, iqr
from collections import defaultdict
import matplotlib.pyplot as plt

In [None]:
lqr_file = "../../data/qualitative_data/lqr_all_situations.csv"
pp_data_file = '../../data/experimental_data/experiment_actions.csv'
model_score_file = "../../data/input_cost_analysis/all_model_runs_on_situations_exo=0.01.csv"
conditions_file = "../../data/experimental_data/experiment_conditions.csv"
n_pps = 111

In [None]:
df_data = pd.read_csv(pp_data_file)
df_pps = df_data.loc[df_data.groupby('pp_id')['id'].idxmax()]
df_lqr = pd.read_csv(lqr_file)
df_model_scores = pd.read_csv(model_score_file)
df_scores = pd.read_csv(model_score_file)
df_conditions = pd.read_csv(conditions_file)

In [None]:
df_conditions['initial_endogenous'] = df_conditions['initial_endogenous'].apply(lambda x: str([int(y) for y in literal_eval(x)]))
columns_to_keep = ['situation', 'lqr_score']
df_lqr = df_lqr.merge(df_conditions, how='left', left_on='situation', right_on='initial_endogenous')[columns_to_keep]

In [None]:
df_by_pp = df_data.groupby('pp_id').idxmax()

In [None]:
n_easy = 0
n_info = 0
for c in df_by_pp['condition']:
    cond = c % 30
    if cond < 10:
        n_easy += 1
    else:
        n_info += 1

In [None]:
n_easy

In [None]:
# median human performance, dropping outliers
df_pps[df_pps['final_goal_distance'] < 1000]['final_goal_distance'].median()

In [None]:
# number of participants who got within 100 points of the goal
len(df_pps[df_pps['final_goal_distance'] < 100])

In [None]:
df_pps['root_cost'] = df_pps['total_cost'].apply(np.sqrt)

Get the mean and median costs and root costs for each model type

In [None]:
costs = defaultdict(list)
for index, row in df_pps.iterrows():

    costs['human'].append(np.sqrt(row['total_cost']))
    condition = int(row['condition'])
    situation = []
    costs['lqr'].append(df_lqr.loc[condition % 30]['lqr_score'])
    for agent_type in ("sparse_max_discrete", "sparse_max_continuous", "hill_climbing", "sparse_lqr", "null_model_2"):
        costs[agent_type].extend(df_model_scores[df_model_scores["model"] == agent_type]["performance"])
    
avg_costs = {}
med_costs = {}
for agent_type in costs:
    avg_costs[agent_type] = np.mean(costs[agent_type])
    med_costs[agent_type] = np.median(costs[agent_type])

In [None]:
avg_costs

In [None]:
med_costs

In [None]:
np.median(root_costs['lqr'])

In [None]:
df_pp_condition = df_pps.merge(df_conditions, left_on='condition', right_on='goal_id')

In [None]:
df_pp_condition['root_cost'] = df_pp_condition['total_cost'].apply(np.sqrt)
df_pp_condition = df_pp_condition[df_pp_condition['root_cost'] < 500]
print(df_pp_condition[df_pp_condition['conditions'] == 'informative']['root_cost'].mean())
print(df_pp_condition[df_pp_condition['conditions'] == 'informative']['root_cost'].median())
print(df_pp_condition[df_pp_condition['conditions'] == 'easy']['root_cost'].mean())
print(df_pp_condition[df_pp_condition['conditions'] == 'easy']['root_cost'].median())

In [None]:
df_pps['condition_name'] = df_pps['condition'].apply(lambda x: "informative" if x % 30 >= 10 else "easy")

Compare the median human score between easy and informative conditions

In [None]:
easy_costs = df_pps[df_pps['condition_name'] == 'easy']['root_cost']
informative_costs = df_pps[df_pps['condition_name'] == 'informative']['root_cost']

In [None]:
easy_costs.median()

In [None]:
informative_costs.median()

In [None]:
kruskal(easy_costs, informative_costs)

In [None]:
iqr(df_pps['root_cost'])