In [1]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

map_dirs = ['map 3', 'map 4', 'map 5']
# map_dirs = ['map 4']

paths = {'map 3': './data/map 3/data-of-baselines-ddqn-sarl.json',
        'map 4': './data/map 4/data-of-baselines.json',
        'map 5': './data/map 5/data-of-baselines-ddqn-sarl-new.json'}


# Combine all maps together:

In [2]:
participants_df = pd.DataFrame()
raw_df_state_to_action = pd.DataFrame()
for map_dir in map_dirs:
    path = paths[map_dir]
    #load data from the json file
    with open(path) as train_file:
        data = json.load(train_file)

    participants_df_temp = pd.DataFrame.from_dict(data['all-games'], orient='index')

    path = 'data/'+map_dir+'/workers' # use your path
    all_files = glob.glob(path + "/*.csv")

    li = []
    for filename in all_files:
        fromOne = pd.read_csv(filename, index_col=None, header=0)
        fromOne['filename'] = filename
        li.append(fromOne)

    workers = pd.concat(li, axis=0, ignore_index=True)
    workers = workers.set_index('Answer.surveycode')
    workers.index = workers.index.map(lambda code: '-' + code[:-3])
    # print(workers.index[workers.index.duplicated()])
    workers = workers[~workers.index.duplicated(keep='first')]
    participants_df_temp['WorkerId'] = workers['WorkerId']
    participants_df_temp = participants_df_temp.dropna(subset = ['WorkerId'])

    if map_dir == 'map 4':
        # duplicate selfish to ddqn and sarl
        selfish_df = participants_df_temp[participants_df_temp['behavior'] == 'selfish']
        ddqn_df = selfish_df.copy()
        ddqn_df['behavior'] = 'ddqn distribution'
        sarl_df = selfish_df.copy()
        sarl_df['behavior'] = 'sarl ddqn distribution'
        participants_df_temp = pd.concat([participants_df_temp, ddqn_df, sarl_df])

    participants_df = pd.concat([participants_df, participants_df_temp])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [3]:
index_to_drop = participants_df[participants_df.duplicated('WorkerId', keep='first')].index
print(len(index_to_drop))

participants_df = participants_df[~participants_df.duplicated('WorkerId', keep='first')]

514


number of people that answer the survay only once:

In [4]:
len(participants_df)

369

In [5]:
participants_df.loc[: ,'human_score'] = pd.to_numeric(participants_df['human_score'], errors='coerce')
participants_df.loc[: ,'computer_score'] = pd.to_numeric(participants_df['computer_score'], errors='coerce')
participants_df.loc[: ,'collaborative_value'] = pd.to_numeric(participants_df['collaborative_value'], errors='coerce')
participants_df.loc[: ,'predictable_value'] = pd.to_numeric(participants_df['predictable_value'], errors='coerce')
participants_df.loc[: ,'selfishly_value'] = pd.to_numeric(participants_df['selfishly_value'], errors='coerce')
participants_df.loc[: ,'wisely_value'] = pd.to_numeric(participants_df['wisely_value'], errors='coerce')
participants_df.loc[: ,'computer_value'] = pd.to_numeric(participants_df['computer_value'], errors='coerce')

In [6]:
participants_df.groupby('behavior')[['computer_score', 'human_score', 'collaborative_value', 'predictable_value', 'selfishly_value', 'wisely_value', 'computer_value']].mean()

Unnamed: 0_level_0,computer_score,human_score,collaborative_value,predictable_value,selfishly_value,wisely_value,computer_value
behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TSP,0.822727,0.878333,5.212121,5.121212,2.863636,5.378788,5.575758
closest,0.741791,0.825821,4.850746,5.059701,3.298507,4.746269,6.0
ddqn distribution,1.029667,-0.065333,2.5,5.266667,6.066667,3.533333,5.233333
farthest,0.449672,0.510164,3.803279,4.344262,4.393443,3.819672,5.47541
random,0.257581,0.262903,2.516129,4.080645,5.403226,3.032258,5.919355
sarl ddqn distribution,1.175714,0.74,3.714286,4.571429,5.0,4.914286,5.6
selfish,1.078333,0.076667,2.520833,5.375,6.208333,3.791667,5.854167


In [7]:
participants_df['behavior'].value_counts()

closest                   67
TSP                       66
random                    62
farthest                  61
selfish                   48
sarl ddqn distribution    35
ddqn distribution         30
Name: behavior, dtype: int64

In [8]:
participants_df['gender'].value_counts()

male      226
female    143
Name: gender, dtype: int64

In [9]:
participants_df['education'].value_counts()

BA                 194
High School        105
Master's degree     66
Ph.D                 4
Name: education, dtype: int64

# Build stay table

In [10]:
c_h_stays = pd.DataFrame()
for map_dir in map_dirs:
    path = paths[map_dir]
    #load data from the json file
    with open(path) as train_file:
        data = json.load(train_file)
    
    raw_df_state_to_action_temp = pd.DataFrame.from_dict(data['humanModel'], orient='index')
    raw_df_state_to_action_temp = raw_df_state_to_action_temp.drop(0, axis=1)

    participants_df_temp = pd.DataFrame.from_dict(data['all-games'], orient='index')

    path = 'data/'+map_dir+'/workers' # use your path
    all_files = glob.glob(path + "/*.csv")

    li = []
    for filename in all_files:
        fromOne = pd.read_csv(filename, index_col=None, header=0)
        fromOne['filename'] = filename
        li.append(fromOne)

    workers = pd.concat(li, axis=0, ignore_index=True)
    workers = workers.set_index('Answer.surveycode')
    workers.index = workers.index.map(lambda code: '-' + code[:-3])
    # print(workers.index[workers.index.duplicated()])
    workers = workers[~workers.index.duplicated(keep='first')]
    participants_df_temp['WorkerId'] = workers['WorkerId']
    participants_df_temp = participants_df_temp.dropna(subset = ['WorkerId'])

    if map_dir == 'map 4':
        # duplicate selfish to ddqn and sarl
        selfish_df = participants_df_temp[participants_df_temp['behavior'] == 'selfish']
        ddqn_df = selfish_df.copy()
        ddqn_df['behavior'] = 'ddqn distribution'
        sarl_df = selfish_df.copy()
        sarl_df['behavior'] = 'sarl ddqn distribution'
        participants_df_temp = pd.concat([participants_df_temp, ddqn_df, sarl_df])

    index_to_drop = participants_df_temp[participants_df_temp.duplicated('WorkerId', keep='first')].index
    print(len(index_to_drop))
    
    participants_df_temp = participants_df_temp[~participants_df_temp.duplicated('WorkerId', keep='first')]

    null_index = raw_df_state_to_action_temp.index.difference(participants_df_temp.index, sort=False)
    raw_df_state_to_action_finish_game = raw_df_state_to_action_temp.drop(null_index)

    participants_df_temp = participants_df_temp.reset_index()

    def extractState(cell):
        if cell == None:
            return cell
        if cell != cell:
            return cell
        board = np.array(cell['state'][0]).astype(float)
        human_trace = np.array(cell['state'][1]).astype(float)
        computer_trace = np.array(cell['state'][2]).astype(float)
        human_awards = np.array(cell['state'][3]).astype(float)
        computer_awards = np.array(cell['state'][4]).astype(float)
        all_awards = np.array(cell['state'][5]).astype(float)
        return (board, human_trace, computer_trace, human_awards, computer_awards, all_awards)

    state_dim_6_temp = pd.DataFrame(columns=raw_df_state_to_action_finish_game.columns)
    for col in raw_df_state_to_action_finish_game:
        state_dim_6_temp[col] = raw_df_state_to_action_finish_game[col].apply(extractState)

    state_dim_6_temp[len(state_dim_6_temp.columns) + 1] = None

    state_dim_6_temp = state_dim_6_temp.reset_index()

    def countStays(row, agent):
        stay_count = 0
        for i in range(2, len(row)+1):
            if row[i] != None: # game not ended
                if agent == 'computer':
                    trace_agent = row[i][2]
                else: # agent == 'human'
                    trace_agent = row[i][1]
                if 0.9 not in trace_agent:
                    stay_count += 1
            else:
                return stay_count
    
    c_h_stays_temp = pd.DataFrame(index=state_dim_6_temp.index, columns=[])
    computer_stays = []
    human_stays = []
    for i, row in state_dim_6_temp.iterrows():
        computer_num_of_stays = countStays(row, 'computer')
        human_num_of_stays = countStays(row, 'human')

        computer_stays.append(computer_num_of_stays)
        human_stays.append(human_num_of_stays)

    c_h_stays_temp['computer_stays'] = computer_stays
    c_h_stays_temp['human_stays'] = human_stays
    c_h_stays_temp['behavior'] = participants_df_temp['behavior']
    c_h_stays_temp['gender'] = participants_df_temp['gender']
    c_h_stays_temp['education'] = participants_df_temp['education']

    c_h_stays = pd.concat([c_h_stays, c_h_stays_temp])
    # c_h_stays.index.name = ''
    # c_h_stays = c_h_stays.reset_index()

89
192
190


In [11]:
c_h_stays

Unnamed: 0,computer_stays,human_stays,behavior,gender,education
0,2,0,closest,male,High School
1,160,131,selfish,female,BA
2,0,0,TSP,male,BA
3,2,0,farthest,female,Master's degree
4,1,1,farthest,male,High School
...,...,...,...,...,...
136,3,2,farthest,male,BA
137,14,2,random,female,High School
138,19,1,random,male,BA
139,1,0,farthest,male,BA


#### Average number of stays per agent:

In [12]:
c_h_stays.groupby('behavior').mean()

Unnamed: 0_level_0,computer_stays,human_stays
behavior,Unnamed: 1_level_1,Unnamed: 2_level_1
TSP,0.944444,2.611111
closest,0.887324,3.112676
ddqn distribution,42.366667,8.8
farthest,3.930556,2.305556
random,9.819444,4.930556
sarl ddqn distribution,13.210526,1.210526
selfish,41.210526,9.105263


#### Average number of stays by education:

In [13]:
c_h_stays[c_h_stays['behavior'] == 'selfish'].groupby('education').mean()

Unnamed: 0_level_0,computer_stays,human_stays
education,Unnamed: 1_level_1,Unnamed: 2_level_1
BA,44.055556,12.25
High School,30.444444,1.333333
Master's degree,40.75,5.5


#### Average score by education:

In [14]:
participants_df[participants_df['behavior'] == 'selfish'][['computer_score', 'human_score','education']].groupby('education').mean()

Unnamed: 0_level_0,computer_score,human_score
education,Unnamed: 1_level_1,Unnamed: 2_level_1
BA,1.049677,0.07129
High School,1.18625,0.245
Master's degree,1.081111,-0.054444


#### Average number of stays by gender:

In [15]:
c_h_stays[c_h_stays['behavior'] == 'selfish'].groupby('gender').mean()

Unnamed: 0_level_0,computer_stays,human_stays
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
female,42.384615,8.653846
male,40.225806,9.483871


#### Average score by gender:

In [16]:
participants_df[participants_df['behavior'] == 'selfish'][['computer_score', 'human_score','gender']].groupby('gender').mean()

Unnamed: 0_level_0,computer_score,human_score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
female,1.060952,-0.015238
male,1.091852,0.148148


## Agents score divided by gender

#### Per agent:

In [17]:
participants_df.groupby(['behavior', 'gender']).mean()[['computer_score', 'human_score']]

Unnamed: 0_level_0,Unnamed: 1_level_0,computer_score,human_score
behavior,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
TSP,female,0.822857,0.863429
TSP,male,0.822581,0.895161
closest,female,0.7525,0.758
closest,male,0.737234,0.854681
ddqn distribution,female,0.9725,-0.075833
ddqn distribution,male,1.067778,-0.058333
farthest,female,0.417,0.487
farthest,male,0.46561,0.521463
random,female,0.286364,0.183182
random,male,0.24175,0.30675


## Agents score divided by education:

#### Per agent:

In [18]:
participants_df.groupby(['behavior', 'education']).mean()[['computer_score', 'human_score']]

Unnamed: 0_level_0,Unnamed: 1_level_0,computer_score,human_score
behavior,education,Unnamed: 2_level_1,Unnamed: 3_level_1
TSP,BA,0.82,0.903
TSP,High School,0.894,0.93
TSP,Master's degree,0.69,0.728
TSP,Ph.D,0.45,0.35
closest,BA,0.741176,0.780294
closest,High School,0.779412,0.892353
closest,Master's degree,0.703125,0.851875
ddqn distribution,BA,1.035333,-0.144
ddqn distribution,High School,1.118333,0.1
ddqn distribution,Master's degree,0.961111,-0.044444
