In [1]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil

from PIL import Image

map_dir = 'map 3'
path = './data/'+map_dir+'/data-of-baselines.json'

In [2]:
#load data from the json file
with open(path) as train_file:
    data = json.load(train_file)

# Survay results:

upload Id workers and search for duplicate workers

In [3]:
raw_df_state_to_action = pd.DataFrame.from_dict(data['humanModel'], orient='index')
raw_df_state_to_action = raw_df_state_to_action.drop(0, axis=1)

participants_df = pd.DataFrame.from_dict(data['all-games'], orient='index')

path = 'data/'+map_dir+'/workers' # use your path
all_files = glob.glob(path + "/*.csv")

li = []
for filename in all_files:
    fromOne = pd.read_csv(filename, index_col=None, header=0)
    fromOne['filename'] = filename
    li.append(fromOne)

workers = pd.concat(li, axis=0, ignore_index=True)
workers = workers.set_index('Answer.surveycode')
workers.index = workers.index.map(lambda code: '-' + code[:-3])
# print(workers.index[workers.index.duplicated()])
workers = workers[~workers.index.duplicated(keep='first')]
participants_df['WorkerId'] = workers['WorkerId']
participants_df = participants_df.dropna(subset = ['WorkerId'])
participants_df

Unnamed: 0,behavior,computer_score,human_score,additional_comments,birth_year,collaborative_value,computer_value,education,gender,predictable_value,selfishly_value,wisely_value,WorkerId
-Mr4RiLyH06mIlLkOkTw,closest,0.850,0.850,,1979,2,4,High School,male,4,2,4,A1VSHM4NLZ705D
-Mr4RjUd6Ei8tNw1x3Vy,selfish,-0.100,-0.940,,1979,1,7,BA,female,7,7,1,A32QWM7BWCSPTS
-Mr4RjX1GClt3WO3Fz_e,TSP,1.100,1.150,,1992,6,4,BA,male,6,2,7,A2GV9WSNSPX53
-Mr4RkayW6Mcf5zf4X9f,farthest,0.380,0.500,,1984,4,5,Master's degree,female,3,6,3,ADQHGQF65JJ08
-Mr4Rl1agkMK0vtM-V7w,farthest,0.300,0.500,,1981,1,7,High School,male,6,7,1,A3L8LSM7V7KX3T
...,...,...,...,...,...,...,...,...,...,...,...,...,...
-Mr5A5eDh_hZ_5aAChtI,random,0.590,0.200,very interesting study..,1995,5,5,BA,male,6,4,6,A3N0DSBAZJ54JE
-Mr5BoE45Gn9AJJXjirU,farthest,0.500,0.650,none,1988,6,6,BA,female,5,2,4,A39VVWV1GHLMFD
-Mr5TMSq5ynTUJE5agP6,farthest,0.500,0.350,yes very nice. thank you.,1974,3,2,BA,female,3,4,3,A1DSLUWW0J639Y
-Mr5gp9WxLYDKJXvYNWj,farthest,0.350,0.550,good interesting game,1991,5,7,Master's degree,male,6,3,7,A1MTBGU8VIHKZ2


In [4]:
participants_df.columns

Index(['behavior', 'computer_score', 'human_score', 'additional_comments',
       'birth_year', 'collaborative_value', 'computer_value', 'education',
       'gender', 'predictable_value', 'selfishly_value', 'wisely_value',
       'WorkerId'],
      dtype='object')

In [5]:
index_to_drop = participants_df[participants_df.duplicated('WorkerId', keep='first')].index
print(len(index_to_drop))

# participants_df = participants_df[~participants_df.duplicated('WorkerId', keep='first')]

37


number of people that answer the survay only once:

In [6]:
len(participants_df)

198

In [7]:
participants_df.loc[: ,'human_score'] = pd.to_numeric(participants_df['human_score'], errors='coerce')
participants_df.loc[: ,'computer_score'] = pd.to_numeric(participants_df['computer_score'], errors='coerce')
participants_df.loc[: ,'collaborative_value'] = pd.to_numeric(participants_df['collaborative_value'], errors='coerce')
participants_df.loc[: ,'predictable_value'] = pd.to_numeric(participants_df['predictable_value'], errors='coerce')
participants_df.loc[: ,'selfishly_value'] = pd.to_numeric(participants_df['selfishly_value'], errors='coerce')
participants_df.loc[: ,'wisely_value'] = pd.to_numeric(participants_df['wisely_value'], errors='coerce')
participants_df.loc[: ,'computer_value'] = pd.to_numeric(participants_df['computer_value'], errors='coerce')

# View some data

In [8]:
# remove from the dataset :
# raw_df_state_to_action = raw_df_state_to_action.drop(index_to_drop)
raw_df_state_to_action.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Index: 208 entries, -Mr4RdOzYugRyEw17_M9 to -Mr6Fw-yi6Ld2xKNzu03
Columns: 280 entries, 1 to 280
dtypes: object(280)
memory usage: 456.6+ KB


In [9]:
def extractAction(cell):
    if cell != None:
        return int(cell['action'])
    return np.nan

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def extractState(cell):
    if cell == None:
        return cell
    board = np.array(cell['state'][0]).astype(float)
    human_trace = np.array(cell['state'][1]).astype(float)
    computer_trace = np.array(cell['state'][2]).astype(float)
    human_awards = np.array(cell['state'][3]).astype(float)
    computer_awards = np.array(cell['state'][4]).astype(float)
    all_awards = np.array(cell['state'][5]).astype(float)
    
    r = human_awards/2 + human_trace + all_awards
    g = board/3 + all_awards
    b = computer_awards/2 + computer_trace + all_awards
    rgb = np.dstack((r,g,b))
    return NormalizeData(rgb)
    

state_df = pd.DataFrame(columns=raw_df_state_to_action.columns)
action_df = pd.DataFrame(columns=raw_df_state_to_action.columns)
for col in raw_df_state_to_action:
    state_df[col] = raw_df_state_to_action[col].apply(extractState)
    action_df[col] = raw_df_state_to_action[col].apply(extractAction)

# view one game

In [10]:
def rowToImage(row):
    fig = plt.figure(figsize=(40, 20))
    row_s = state_df.loc[row, :]
    row_a = action_df.loc[row, :]
    cols_i = row_a.count()
    for i in range(1, cols_i+1):
        rows = 1
        columns = cols_i
        state = row_s.at[i]
        fig.add_subplot(rows, columns, i)
        plt.imshow(state)
        plt.axis('off')
        action = row_a.at[i]
        plt.title(str(i) + " action: " + str(action), fontsize=15)

In [11]:
state_df.head(1)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,271,272,273,274,275,276,277,278,279,280
-Mr4RdOzYugRyEw17_M9,"[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...","[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...",...,,,,,,,,,,


In [12]:
state_df.shape

(208, 280)

In [13]:
action_df[len(action_df.columns) + 1] = np.NaN
action_df.tail()

  """Entry point for launching an IPython kernel.


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,272,273,274,275,276,277,278,279,280,281
-Mr5A5eDh_hZ_5aAChtI,39,39,40,40,37,39,37,37,37,37,...,,,,,,,,,,
-Mr5BoE45Gn9AJJXjirU,38,37,37,40,40,40,40,40,39,39,...,,,,,,,,,,
-Mr5TMSq5ynTUJE5agP6,38,38,40,38,38,39,37,37,40,40,...,,,,,,,,,,
-Mr5gp9WxLYDKJXvYNWj,39,37,38,38,37,40,37,40,40,40,...,,,,,,,,,,
-Mr6Fw-yi6Ld2xKNzu03,32,32,32,32,32,32,32,32,32,32,...,,,,,,,,,,


# Make Dataset

In [14]:
dir_path = './data/humanModel_v1_dataset'
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)

os.mkdir(dir_path)
# os.mkdir(dir_path+'/action_target')
# os.mkdir(dir_path+'/rate_target')

for action in ['32','37','38','39','40']:
    os.mkdir(dir_path+'/'+action)

import csv
# open the file in the write mode
f = open(dir_path+'/target.csv', 'w')
# create the csv writer
writer = csv.writer(f)

counter = 0
in_p_df = True
for (idxRow, s1), (_, s2) in zip(state_df.iterrows(), action_df.iterrows()):
    for (idxCol, state), (_, action) in zip(s1.iteritems(), s2.iteritems()):
        # check if it is not the last state
        # the last state not enter our model, since it is an end state that not contains any dirts and not preform an action
        if not np.isnan(action_df.loc[idxRow, idxCol+1]):
            im = Image.fromarray((state * 255).astype(np.uint8))
            try:
                in_p_df = True
                value_vector_rate = participants_df.loc[idxRow, ['collaborative_value', 'computer_value', 'predictable_value', 'selfishly_value', 'wisely_value']]
            except KeyError:
                in_p_df = False
                continue
            path = f'data/humanModel_v1_dataset/{int(action)}/{idxRow}_{counter}.png'
            if counter % 500 == 0:
                # print every 500 saved images
                print(f'{idxRow}_{idxCol}.png saved! at action {action}')
            counter += 1
            im.save(path)
            # write a vector to the csv file
            writer.writerow(value_vector_rate)

        else:
            break
        # print (idxCol, idxRow)
print(f'{counter} images have been saved')
# close the file
f.close()

-Mr4RiLyH06mIlLkOkTw_1.png saved! at action 38.0
-Mr4Rw_J-OsEeVMOX2rc_20.png saved! at action 39.0
-Mr4SdCNCuYdvb9_MpJ6_34.png saved! at action 32.0
-Mr4TQDA7yqwKG2GDHPl_3.png saved! at action 37.0
-Mr4UNdvlMU8SvAcB4jY_7.png saved! at action 39.0
-Mr4VSwtCpefFtvQrTEX_13.png saved! at action 38.0
-Mr4XJkvwu_g40rP9Eo8_63.png saved! at action 37.0
-Mr4ZLOkU0-yjZv7Ue6e_25.png saved! at action 38.0
-Mr4d6kLpnPruwzbNjeh_9.png saved! at action 39.0
-Mr4hkgWqo_Aim6hCwar_18.png saved! at action 32.0
-Mr4pSsH2YJq06B8CIKz_21.png saved! at action 37.0
5301 images have been saved


In [15]:
# rowToImage('-Mr4Yrtg2Rk8u83xa6yl')

In [16]:
# index = "-MqplpytsnX0zCv_KW9i"
# col = 3
# plt.imshow(state_df.loc[index, col])
# title = "id: " + index + ", col: " + str(col) + ", action: " + str(action_df.loc[index, col])
# plt.title(title)
# plt.show()

In [17]:
participants_df['collaborative_value']

-Mr4RiLyH06mIlLkOkTw    2
-Mr4RjUd6Ei8tNw1x3Vy    1
-Mr4RjX1GClt3WO3Fz_e    6
-Mr4RkayW6Mcf5zf4X9f    4
-Mr4Rl1agkMK0vtM-V7w    1
                       ..
-Mr5A5eDh_hZ_5aAChtI    5
-Mr5BoE45Gn9AJJXjirU    6
-Mr5TMSq5ynTUJE5agP6    3
-Mr5gp9WxLYDKJXvYNWj    5
-Mr6Fw-yi6Ld2xKNzu03    4
Name: collaborative_value, Length: 198, dtype: int64

In [18]:
participants_df['selfishly_value']

-Mr4RiLyH06mIlLkOkTw    2
-Mr4RjUd6Ei8tNw1x3Vy    7
-Mr4RjX1GClt3WO3Fz_e    2
-Mr4RkayW6Mcf5zf4X9f    6
-Mr4Rl1agkMK0vtM-V7w    7
                       ..
-Mr5A5eDh_hZ_5aAChtI    4
-Mr5BoE45Gn9AJJXjirU    2
-Mr5TMSq5ynTUJE5agP6    4
-Mr5gp9WxLYDKJXvYNWj    3
-Mr6Fw-yi6Ld2xKNzu03    1
Name: selfishly_value, Length: 198, dtype: int64