In [1]:
import json
import glob
import pandas as pd
from scipy import stats

map_dir = 'map 4'
path = './data/'+map_dir+'/data-of-baselines.json'

In [2]:
#load data from the json file
with open(path) as train_file:
    data = json.load(train_file)

In [3]:
participants_df = pd.DataFrame.from_dict(data['all-games'], orient='index')

path = 'data/'+map_dir+'/workers' # use your path
all_files = glob.glob(path + "/*.csv")

li = []
for filename in all_files:
    fromOne = pd.read_csv(filename, index_col=None, header=0)
    fromOne['filename'] = filename
    li.append(fromOne)

workers = pd.concat(li, axis=0, ignore_index=True)
workers = workers.set_index('Answer.surveycode')
workers.index = workers.index.map(lambda code: '-' + code[:-3])
# print(workers.index[workers.index.duplicated()])
workers = workers[~workers.index.duplicated(keep='first')]
participants_df['WorkerId'] = workers['WorkerId']
participants_df = participants_df.dropna(subset = ['WorkerId'])

participants_df = participants_df[~participants_df.duplicated('WorkerId', keep='first')]

participants_df.head()

Unnamed: 0,additional_comments,behavior,birth_year,collaborative_value,computer_score,computer_value,education,gender,human_score,predictable_value,selfishly_value,wisely_value,WorkerId
-Mrxq3qHzIBT4S2E8ooK,,closest,1977,6,1.0,7,BA,male,0.99,4,5,7,A1M64R8HPYNNZA
-Mrxq7Ag3t6EAUpVxo_C,,closest,1964,6,0.95,7,High School,female,0.8,4,4,6,A2DYXKOHDDY9EG
-Mrxq8C23gWU1NXTc8Hg,,TSP,1983,6,0.95,6,Master's degree,male,0.94,4,2,5,A1YBJN6WT3X8B5
-Mrxq8hPTtLOqELhgh7y,,closest,1985,6,1.0,7,High School,male,0.95,6,2,7,A1KZ21TSAYUHO4
-Mrxq9Qr5wmMfAV2wlbE,,farthest,1973,6,0.8,4,Ph.D,male,0.89,4,2,4,ANBWJZYU2A68T


In [4]:
participants_df.loc[: ,'human_score'] = pd.to_numeric(participants_df['human_score'], errors='coerce')
participants_df.loc[: ,'computer_score'] = pd.to_numeric(participants_df['computer_score'], errors='coerce')
participants_df.loc[: ,'collaborative_value'] = pd.to_numeric(participants_df['collaborative_value'], errors='coerce')
participants_df.loc[: ,'predictable_value'] = pd.to_numeric(participants_df['predictable_value'], errors='coerce')
participants_df.loc[: ,'selfishly_value'] = pd.to_numeric(participants_df['selfishly_value'], errors='coerce')
participants_df.loc[: ,'wisely_value'] = pd.to_numeric(participants_df['wisely_value'], errors='coerce')

In [5]:
participants_df.groupby('behavior')[['computer_score', 'human_score', 'collaborative_value', 'predictable_value', 'selfishly_value', 'wisely_value']].mean()

Unnamed: 0_level_0,computer_score,human_score,collaborative_value,predictable_value,selfishly_value,wisely_value
behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TSP,0.911765,0.86,5.411765,5.058824,3.529412,5.705882
closest,0.884783,0.91087,5.217391,5.304348,2.956522,5.304348
farthest,0.515882,0.61,5.058824,5.0,4.823529,4.823529
random,0.412857,0.487143,2.642857,4.642857,5.571429,3.5
selfish,1.219091,0.421818,3.636364,5.454545,6.363636,4.090909


In [6]:
to_exel = participants_df[['computer_score', 'human_score', 'behavior']]

table_computerScore = pd.DataFrame(index=to_exel.index, columns=to_exel.groupby('behavior').count().index)
for agent in table_computerScore.columns:
    table_computerScore[agent] = to_exel[to_exel['behavior'] == agent]['computer_score']

computerScore_result = pd.concat([table_computerScore[agent].dropna().reset_index().drop('index', axis=1) for agent in table_computerScore.columns], axis=1)

table_humanScore = pd.DataFrame(index=to_exel.index, columns=to_exel.groupby('behavior').count().index)
for agent in table_humanScore.columns:
    table_humanScore[agent] = to_exel[to_exel['behavior'] == agent]['human_score']

humanScore_result = pd.concat([table_humanScore[agent].dropna().reset_index().drop('index', axis=1) for agent in table_humanScore.columns], axis=1)

with pd.ExcelWriter('./data/'+map_dir+'/agent_scores.xlsx') as writer:
    computerScore_result.to_excel(writer, sheet_name='computer_score')
    humanScore_result.to_excel(writer, sheet_name='human_score')

In [7]:
sarl = participants_df[(participants_df['behavior'] == 'sarl ddqn distribution')]
selfish = participants_df[(participants_df['behavior'] == 'selfish')]
# homogeneity
stats.ttest_ind(sarl['computer_score'], selfish['computer_score'])

Ttest_indResult(statistic=-7.099035243242335, pvalue=4.707824095947435e-08)