In [1]:
import json
import glob
import pandas as pd
from scipy import stats

map_dir = 'map 5'
path = './data/'+map_dir+'/data-of-baselines-ddqn-sarl.json'

In [2]:
#load data from the json file
with open(path) as train_file:
    data = json.load(train_file)

In [3]:
participants_df = pd.DataFrame.from_dict(data['all-games'], orient='index')

path = 'data/'+map_dir+'/workers' # use your path
all_files = glob.glob(path + "/*.csv")

li = []
for filename in all_files:
    fromOne = pd.read_csv(filename, index_col=None, header=0)
    fromOne['filename'] = filename
    li.append(fromOne)

workers = pd.concat(li, axis=0, ignore_index=True)
workers = workers.set_index('Answer.surveycode')
workers.index = workers.index.map(lambda code: '-' + code[:-3])
# print(workers.index[workers.index.duplicated()])
workers = workers[~workers.index.duplicated(keep='first')]
participants_df['WorkerId'] = workers['WorkerId']
participants_df = participants_df.dropna(subset = ['WorkerId'])

participants_df = participants_df[~participants_df.duplicated('WorkerId', keep='first')]

participants_df.head()

Unnamed: 0,behavior,additional_comments,birth_year,collaborative_value,computer_score,computer_value,education,gender,human_score,predictable_value,selfishly_value,wisely_value,WorkerId
-Ms8apzf274KAnw8anBV,TSP,NICE STUDY.,1981,5,0.3,6,BA,female,1.2,5,5,4,ATZ6BKELCGF6H
-Ms8areDl12-5B5lSAX5,TSP,"I used to play in a league with that blue guy,...",1980,2,0.9,6,BA,male,0.9,6,2,3,A37MQE3V7PAGMB
-Ms8at7zSAJbq6RGJXMI,TSP,,1978,4,0.45,7,Ph.D,male,0.5,3,2,4,A2ZPSPA0R8KSL8
-Ms8av2Z5M2XEIGMqjsx,TSP,,1972,6,0.7,4,BA,female,0.78,7,7,6,A3SJTDFJ03P5LJ
-Ms8avJTdP2GRxWwV6VG,TSP,It was too hard to move your piece,1995,6,0.3,7,BA,female,0.48,4,1,6,A38LW34F26WBJY


In [4]:
participants_df.loc[: ,'human_score'] = pd.to_numeric(participants_df['human_score'], errors='coerce')
participants_df.loc[: ,'computer_score'] = pd.to_numeric(participants_df['computer_score'], errors='coerce')
participants_df.loc[: ,'collaborative_value'] = pd.to_numeric(participants_df['collaborative_value'], errors='coerce')
participants_df.loc[: ,'predictable_value'] = pd.to_numeric(participants_df['predictable_value'], errors='coerce')
participants_df.loc[: ,'selfishly_value'] = pd.to_numeric(participants_df['selfishly_value'], errors='coerce')
participants_df.loc[: ,'wisely_value'] = pd.to_numeric(participants_df['wisely_value'], errors='coerce')

In [5]:
participants_df.groupby('behavior')[['computer_score', 'human_score', 'collaborative_value', 'predictable_value', 'selfishly_value', 'wisely_value']].mean()

Unnamed: 0_level_0,computer_score,human_score,collaborative_value,predictable_value,selfishly_value,wisely_value
behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TSP,0.68,0.762,4.96,4.92,3.24,4.88
closest,0.216667,0.561667,5.0,5.166667,4.666667,5.0
ddqn distribution,0.982941,-0.188824,1.764706,5.882353,6.411765,2.941176
farthest,0.650909,0.34,4.909091,4.909091,3.727273,5.454545
random,-0.0025,-0.06,3.625,5.0,6.375,4.125
sarl ddqn distribution,1.156522,0.715217,3.913043,4.478261,5.086957,4.956522
selfish,1.088421,-0.228421,2.842105,4.421053,5.526316,3.894737


In [6]:
to_exel = participants_df[['computer_score', 'human_score', 'behavior']]

table_computerScore = pd.DataFrame(index=to_exel.index, columns=to_exel.groupby('behavior').count().index)
for agent in table_computerScore.columns:
    table_computerScore[agent] = to_exel[to_exel['behavior'] == agent]['computer_score']

computerScore_result = pd.concat([table_computerScore[agent].dropna().reset_index().drop('index', axis=1) for agent in table_computerScore.columns], axis=1)

table_humanScore = pd.DataFrame(index=to_exel.index, columns=to_exel.groupby('behavior').count().index)
for agent in table_humanScore.columns:
    table_humanScore[agent] = to_exel[to_exel['behavior'] == agent]['human_score']

humanScore_result = pd.concat([table_humanScore[agent].dropna().reset_index().drop('index', axis=1) for agent in table_humanScore.columns], axis=1)

with pd.ExcelWriter('./data/'+map_dir+'/agent_scores.xlsx') as writer:
    computerScore_result.to_excel(writer, sheet_name='computer_score')
    humanScore_result.to_excel(writer, sheet_name='human_score')

In [7]:
sarl = participants_df[(participants_df['behavior'] == 'sarl ddqn distribution')]
selfish = participants_df[(participants_df['behavior'] == 'selfish')]
# homogeneity
stats.ttest_ind(sarl['computer_score'], selfish['computer_score'])

Ttest_indResult(statistic=2.154092120673532, pvalue=0.03731212710491181)