In [1]:
import json
import glob
import pandas as pd
import numpy as np
import os

path = './data/data-of-baselines.json'

In [2]:
#load data from the json file
with open(path) as train_file:
    data = json.load(train_file)

# Survay results:

upload Id workers and search for duplicate workers

In [3]:
raw_df_state_to_action = pd.DataFrame.from_dict(data['humanModel'], orient='index')
raw_df_state_to_action = raw_df_state_to_action.drop(0, axis=1)

participants_df = pd.DataFrame.from_dict(data['all-games'], orient='index')

path = 'data/workers' # use your path
all_files = glob.glob(path + "/*.csv")

li = []
for filename in all_files:
    fromOne = pd.read_csv(filename, index_col=None, header=0)
    fromOne['filename'] = filename
    li.append(fromOne)

workers = pd.concat(li, axis=0, ignore_index=True)
workers = workers.set_index('Answer.surveycode')
workers.index = workers.index.map(lambda code: '-' + code[:-3])
# print(workers.index[workers.index.duplicated()])
workers = workers[~workers.index.duplicated(keep='first')]
participants_df['WorkerId'] = workers['WorkerId']
participants_df = participants_df.dropna(subset = ['WorkerId'])
participants_df

Unnamed: 0,behavior,additional_comments,birth_year,collaborative_value,computer_score,computer_value,education,gender,human_score,predictable_value,selfishly_value,wisely_value,WorkerId
-N2gl9wHXMJoXRg5OyiP,follow_stag,,1980,4,31.0,7,BA,male,36.0,6,1,4,A6Y7SZU9L301W
-N2gnbwu9gs6PgpScg-B,follow_stag,,1994,7,19.0,7,BA,female,36.0,4,1,4,A3RHJEMZ4EGY2U
-N2gpqCiV6iClR2iJfDD,follow_stag,good,1975,5,8.0,4,Master's degree,male,13.0,6,4,5,APIUR4WD44BQ1
-N2gzRD6MoUAWRj3PaOV,follow_stag,no,1988,5,7.0,1,BA,male,8.0,6,6,5,AJ9IY4IHOGB8
-N2h6FFFrGGqFAUIRi6W,follow_stag,good,1988,6,0.0,6,BA,female,6.0,6,6,6,A2JPO11US5Q4H4


In [4]:
participants_df.columns

Index(['behavior', 'additional_comments', 'birth_year', 'collaborative_value',
       'computer_score', 'computer_value', 'education', 'gender',
       'human_score', 'predictable_value', 'selfishly_value', 'wisely_value',
       'WorkerId'],
      dtype='object')

In [5]:
index_to_drop = participants_df[participants_df.duplicated('WorkerId', keep='first')].index
print(len(index_to_drop))

participants_df = participants_df[~participants_df.duplicated('WorkerId', keep='first')]

0


number of people that answer the survay only once:

In [6]:
len(participants_df)

5

In [7]:
# participants_df[['computer_score', 'human_score', 'behavior']].to_excel("agents_score.xlsx")

In [8]:
participants_df[['additional_comments', 'behavior']][participants_df['additional_comments'].notna()][participants_df['additional_comments'] != ""].tail()

Unnamed: 0,additional_comments,behavior
-N2gpqCiV6iClR2iJfDD,good,follow_stag
-N2gzRD6MoUAWRj3PaOV,no,follow_stag
-N2h6FFFrGGqFAUIRi6W,good,follow_stag


In [9]:
participants_df.loc[: ,'human_score'] = pd.to_numeric(participants_df['human_score'], errors='coerce')
participants_df.loc[: ,'computer_score'] = pd.to_numeric(participants_df['computer_score'], errors='coerce')
participants_df.loc[: ,'collaborative_value'] = pd.to_numeric(participants_df['collaborative_value'], errors='coerce')
participants_df.loc[: ,'predictable_value'] = pd.to_numeric(participants_df['predictable_value'], errors='coerce')
participants_df.loc[: ,'selfishly_value'] = pd.to_numeric(participants_df['selfishly_value'], errors='coerce')
participants_df.loc[: ,'wisely_value'] = pd.to_numeric(participants_df['wisely_value'], errors='coerce')
participants_df.loc[: ,'computer_value'] = pd.to_numeric(participants_df['computer_value'], errors='coerce')

In [10]:
participants_df.groupby('behavior')[['computer_score', 'human_score', 'collaborative_value', 'predictable_value', 'selfishly_value', 'wisely_value', 'computer_value']].mean()

Unnamed: 0_level_0,computer_score,human_score,collaborative_value,predictable_value,selfishly_value,wisely_value,computer_value
behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
follow_stag,13.0,19.8,5.4,5.6,3.6,4.8,5.0


In [11]:
participants_df['behavior'].value_counts()

follow_stag    5
Name: behavior, dtype: int64

In [12]:
participants_df['gender'].value_counts()

male      3
female    2
Name: gender, dtype: int64

In [13]:
participants_df['education'].value_counts()

BA                 4
Master's degree    1
Name: education, dtype: int64

# View some data

In [14]:
# remove from the dataset :
raw_df_state_to_action = raw_df_state_to_action.drop(index_to_drop)
raw_df_state_to_action.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, -N2gl9wHXMJoXRg5OyiP to -N2h6FFFrGGqFAUIRi6W
Columns: 60 entries, 1 to 60
dtypes: object(60)
memory usage: 3.0+ KB


In [15]:
def flatten(t):
    return [item for sublist in t for item in sublist]

def extractAction(cell):
    if cell != None:
        return int(cell['humanAction'])
    return np.nan

def extractState(cell):
    if cell == None:
        return cell
    computer_pos = [cell['stateCoords'][1], cell['stateCoords'][0]]
    human_pos = [cell['stateCoords'][3], cell['stateCoords'][2]]
    stag_pos = [cell['stateCoords'][5], cell['stateCoords'][4]]

    shrubs_pos = [[cell['stateCoords'][i+1], cell['stateCoords'][i]] for i in range(6, 12, 2)]
    shrubs_pos = flatten(shrubs_pos)

    
    return [computer_pos, human_pos, stag_pos, shrubs_pos]
    
state_df = pd.DataFrame(columns=raw_df_state_to_action.columns)
action_df = pd.DataFrame(columns=raw_df_state_to_action.columns)
for col in raw_df_state_to_action:
    state_df[col] = raw_df_state_to_action[col].apply(extractState)
    action_df[col] = raw_df_state_to_action[col].apply(extractAction)

In [16]:
# def extractRow(cell):
#     if cell == None:
#         return cell
#     computer_pos = [cell['stateCoords'][0], cell['stateCoords'][1]]
#     human_pos = [cell['stateCoords'][2], cell['stateCoords'][3]]
#     stag_pos = [cell['stateCoords'][4], cell['stateCoords'][5]]

#     shrubs_pos = [[cell['stateCoords'][i], cell['stateCoords'][i+1]] for i in range(6, 12, 2)]
    
#     human_action = int(cell['humanAction'])
#     computer_action = int(cell['computerAction'])
#     stag_action = int(cell['stagAction'])
#     return [computer_pos, human_pos, stag_pos, shrubs_pos, human_action, computer_action, stag_action]
    
# row_df = pd.DataFrame(columns=raw_df_state_to_action.columns)
# for col in raw_df_state_to_action:
#     row_df[col] = raw_df_state_to_action[col].apply(extractRow)

In [17]:
state_df.head(1)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,51,52,53,54,55,56,57,58,59,60
-N2gl9wHXMJoXRg5OyiP,"[[4, 0], [0, 0], [2, 2], [1, 4, 3, 0, 1, 2]]","[[4, 1], [0, 1], [3, 2], [1, 4, 3, 0, 1, 2]]","[[4, 2], [1, 1], [3, 1], [1, 4, 3, 0, 1, 2]]","[[4, 1], [1, 2], [4, 1], [1, 4, 3, 0, 2, 2]]","[[4, 0], [2, 2], [4, 1], [1, 4, 3, 0, 3, 3]]","[[4, 1], [3, 2], [4, 0], [1, 4, 3, 0, 3, 3]]","[[4, 0], [3, 1], [4, 1], [1, 4, 3, 0, 3, 3]]","[[4, 1], [4, 1], [2, 3], [1, 4, 3, 0, 3, 3]]","[[4, 2], [3, 1], [2, 3], [1, 4, 3, 0, 3, 3]]","[[4, 3], [2, 1], [2, 3], [1, 4, 3, 0, 3, 3]]",...,"[[4, 0], [3, 1], [3, 0], [2, 0, 2, 1, 4, 4]]","[[3, 0], [3, 0], [3, 1], [2, 0, 2, 1, 4, 4]]","[[3, 1], [3, 1], [3, 0], [2, 0, 2, 1, 4, 4]]","[[3, 0], [2, 1], [3, 1], [2, 0, 1, 4, 4, 4]]","[[3, 1], [3, 1], [2, 1], [2, 0, 1, 4, 4, 4]]","[[2, 1], [2, 1], [3, 1], [2, 0, 1, 4, 4, 4]]","[[3, 1], [2, 0], [3, 1], [3, 2, 1, 4, 4, 4]]","[[4, 1], [3, 0], [3, 1], [3, 2, 1, 4, 4, 4]]","[[3, 1], [3, 1], [1, 0], [3, 2, 1, 4, 4, 4]]","[[3, 0], [2, 1], [1, 1], [3, 2, 1, 4, 4, 4]]"


# view one game

In [18]:
from sys import stdout

A_AGENT = 0  # base
B_AGENT = 1

STAG = 2  # hunt
PLANT = 3

def print_matrix(obs, grid_size):
    matrix = np.full((grid_size[0], grid_size[1], 4), False, dtype=bool)

    a, b, stag = (obs[0], obs[1]), (obs[2], obs[3]), (obs[4], obs[5])
    matrix[a[0]][a[1]][A_AGENT] = True
    matrix[b[0]][b[1]][B_AGENT] = True
    matrix[stag[0]][stag[1]][STAG] = True
    for i in range(6, len(obs), 2):
        plant = obs[i], obs[i + 1]
        matrix[plant[0]][plant[1]][PLANT] = True

    symbols = ("S", "P")

    stdout.write("╔════════════════════════════╗\n")
    for row in matrix:
        stdout.write("║ ·")
        for col in row:
            cell = []
            cell.append("A") if col[0] == 1 else cell.append(" ")
            cell.append("B") if col[1] == 1 else cell.append(" ")
            cell.append(symbols[0]) if col[2] == 1 else cell.append(" ")
            cell.append(symbols[1]) if col[3] == 1 else cell.append(" ")
            stdout.write("".join(cell) + "·")
        stdout.write(" ║")
        stdout.write("\n")
    stdout.write("╚════════════════════════════╝\n\r")
    stdout.flush()

In [19]:
def render(row):
    row_s = state_df.loc[row, :]
    row_a = action_df.loc[row, :]
    cols_i = row_a.count()
    for i in range(1, cols_i+1):
        state = row_s.at[i]
        print_matrix(flatten(state), [5, 5])
        print(row_a[i])
    

In [20]:
render('-N2gl9wHXMJoXRg5OyiP')

╔════════════════════════════╗
║ · B  ·    ·    ·    ·    · ║
║ ·    ·    ·   P·    ·   P· ║
║ ·    ·    ·  S ·    ·    · ║
║ ·   P·    ·    ·    ·    · ║
║ ·A   ·    ·    ·    ·    · ║
╚════════════════════════════╝
40.0
╔════════════════════════════╗
║ ·    · B  ·    ·    ·    · ║
║ ·    ·    ·   P·    ·   P· ║
║ ·    ·    ·    ·    ·    · ║
║ ·   P·    ·  S ·    ·    · ║
║ ·    ·A   ·    ·    ·    · ║
╚════════════════════════════╝
39.0
╔════════════════════════════╗
║ ·    ·    ·    ·    ·    · ║
║ ·    · B  ·   P·    ·   P· ║
║ ·    ·    ·    ·    ·    · ║
║ ·   P·  S ·    ·    ·    · ║
║ ·    ·    ·A   ·    ·    · ║
╚════════════════════════════╝
40.0
╔════════════════════════════╗
║ ·    ·    ·    ·    ·    · ║
║ ·    ·    · B  ·    ·   P· ║
║ ·    ·    ·   P·    ·    · ║
║ ·   P·    ·    ·    ·    · ║
║ ·    ·A S ·    ·    ·    · ║
╚════════════════════════════╝
39.0
╔════════════════════════════╗
║ ·    ·    ·    ·    ·    · ║
║ ·    ·    ·    ·    ·   P· ║
║ ·    ·    · B  · 

In [21]:
action_df[(action_df.count(axis=1) > 5)]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,51,52,53,54,55,56,57,58,59,60
-N2gl9wHXMJoXRg5OyiP,40,39,40,39,39,38,39,37,37,40,...,38.0,40.0,37.0,39.0,37.0,38.0,39.0,40.0,37.0,37.0
-N2gnbwu9gs6PgpScg-B,40,40,40,39,40,39,39,39,38,38,...,40.0,39.0,40.0,39.0,37.0,37.0,37.0,40.0,38.0,39.0
-N2go9oI810XlH5aM5rn,39,40,40,38,39,39,39,40,37,40,...,,,,,,,,,,
-N2gpqCiV6iClR2iJfDD,39,37,40,40,40,40,39,39,39,38,...,40.0,40.0,40.0,39.0,39.0,39.0,37.0,38.0,38.0,38.0
-N2gzRD6MoUAWRj3PaOV,40,40,40,40,39,39,39,39,37,39,...,37.0,39.0,39.0,37.0,37.0,37.0,40.0,40.0,40.0,40.0
-N2h6FFFrGGqFAUIRi6W,39,40,37,39,40,40,37,39,39,39,...,38.0,37.0,39.0,39.0,37.0,39.0,38.0,38.0,40.0,40.0


In [22]:
action_df[len(action_df.columns) + 1] = np.NaN
action_df.tail()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,52,53,54,55,56,57,58,59,60,61
-N2gnbwu9gs6PgpScg-B,40,40,40,39,40,39,39,39,38,38,...,39.0,40.0,39.0,37.0,37.0,37.0,40.0,38.0,39.0,
-N2go9oI810XlH5aM5rn,39,40,40,38,39,39,39,40,37,40,...,,,,,,,,,,
-N2gpqCiV6iClR2iJfDD,39,37,40,40,40,40,39,39,39,38,...,40.0,40.0,39.0,39.0,39.0,37.0,38.0,38.0,38.0,
-N2gzRD6MoUAWRj3PaOV,40,40,40,40,39,39,39,39,37,39,...,39.0,39.0,37.0,37.0,37.0,40.0,40.0,40.0,40.0,
-N2h6FFFrGGqFAUIRi6W,39,40,37,39,40,40,37,39,39,39,...,37.0,39.0,39.0,37.0,39.0,38.0,38.0,40.0,40.0,


# Make Dataset

In [23]:
dir_path = './data/humanModel/dataset_v0.csv'
if os.path.exists(dir_path):
  os.remove(dir_path)

rows = [
["code", "step", "computer_x", "computer_y", "human_x", "human_y", "stag_x", "stag_y", "shrub1_x", "shrub1_y", "shrub2_x", "shrub2_y", "shrub3_x", "shrub3_y", "human_action"] #, "computer_action", "stag_action"]
]
counter = 0
for (idxRow, s1), (_, s2) in zip(state_df.iterrows(), action_df.iterrows()):
    for (idxCol, state), (_, action) in zip(s1.iteritems(), s2.iteritems()):
        # check if it is not the last state
        # the last state not enter our model, since it is an end state that not contains any dirts and not preform an action
        if not np.isnan(action_df.loc[idxRow, idxCol+1]):
            row = flatten([[idxRow, idxCol], flatten(state), [int(action)]])
            # path = f'data/humanModel_v0_dataset/{int(action)}/{idxRow}_{idxCol}.png'
            if counter % 50 == 0:
                # print every 500 saved images
                print(row)
            counter += 1
            rows.append(row)
        else:
            break
print(f'{counter} rows have been saved')
np.savetxt(dir_path, 
           rows,
           delimiter =", ", 
           fmt ='% s')

['-N2gl9wHXMJoXRg5OyiP', 1, 4, 0, 0, 0, 2, 2, 1, 4, 3, 0, 1, 2, 40]
['-N2gl9wHXMJoXRg5OyiP', 51, 4, 0, 3, 1, 3, 0, 2, 0, 2, 1, 4, 4, 38]
['-N2gnbwu9gs6PgpScg-B', 42, 3, 4, 0, 3, 3, 3, 0, 4, 4, 1, 3, 3, 40]
['-N2go9oI810XlH5aM5rn', 33, 2, 3, 4, 2, 2, 3, 4, 0, 4, 1, 0, 3, 38]
['-N2gpqCiV6iClR2iJfDD', 35, 1, 1, 4, 0, 2, 1, 1, 4, 0, 4, 0, 1, 40]
['-N2gzRD6MoUAWRj3PaOV', 26, 2, 1, 1, 4, 2, 0, 1, 3, 4, 0, 0, 4, 38]
['-N2h6FFFrGGqFAUIRi6W', 17, 1, 2, 3, 3, 1, 1, 3, 1, 4, 4, 3, 0, 39]
343 rows have been saved
