In [2]:
import random
import pandas as pd
import json
import numpy as np

In [2]:
from overcooked_ai_py.utils import save_pickle
from human_aware_rl.utils import set_global_seed
from human_aware_rl.human.process_dataframes import *
from human_aware_rl.human.process_human_trials import *
from human_aware_rl.static import *
from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
from overcooked_ai_py.mdp.overcooked_mdp import OvercookedState, SoupState, ObjectState
set_global_seed(1884)

In [4]:
# All Trials
all_trials = pd.read_pickle("./data/2019_hh_trials_all.pickle")
#Removing unneccesary columns
for col in ["time_elapsed","player_0_is_human","player_1_is_human","reward","score","player_0_id","player_1_id","cur_gameloop","cur_gameloop_total","score_total","button_press","button_press_total","timesteps_since_interact","button_presses_per_timstep"]:
    all_trials=all_trials.drop(col, axis=1)
all_trials.insert(2,"player_idx",[0]*len(all_trials),True)
print(all_trials.iloc[0])

Unnamed: 0                                                      0
joint_action                                     [[0, 0], [0, 0]]
player_idx                                                      0
layout                       ['XXPXX', 'O  2O', 'X1  X', 'XDXSX']
layout_name                                          cramped_room
state           {"players": [{"position": [1, 2], "orientation...
time_left                                                   180.0
trial_id                                                        0
Name: 0, dtype: object


In [7]:
# Filtered Trials:
# Player data between frames must not be identical (Different position/orientation/holding).
# Effectively removing wait actions, attempts to move into an object without turning, interacts with nothing.
# Last frame of trajectory is not considered as future frame does not exist.
def filter_trials(all_trials):
    clean_trials = pd.DataFrame(columns=all_trials.columns)

    # Processing into array for NN
    L = len(all_trials)
    for i in range(L):
        row = all_trials.iloc[i]

        # Actions by both players
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)

        # Consider both player perspectives
        for player_idx in [0,1]:
            #Output/GT
            y = 0

            y_row = y_act[player_idx]
            if y_row == 'INTERACT':
                y=0
            elif y_row[0]==1:
                y=1
            elif y_row[0]==-1:
                y=2
            elif y_row[1]==1:
                y=3
            elif y_row[1]==-1:
                y=4
            else: #No action
                y=5
            
            if y!=5 and row['time_left']!=-1: #Filters out wait actions
                state = json.loads(row['state'])
                player = state['players'][player_idx]
                
                future_row = all_trials.iloc[i+1]
                future_state = json.loads(future_row['state'])
                future_player = future_state['players'][player_idx]
                if future_player!=player:
                    temp_row = row.copy()
                    temp_row['player_idx']=player_idx
                    clean_trials = pd.concat([clean_trials,temp_row.to_frame().T])

    return clean_trials


In [5]:
#Comment steps as needed to remove repeated processing
clean_trials = filter_trials(all_trials)
clean_trials.to_pickle("./data/2019_hh_trials_action.pickle")

# clean_trials = pd.read_pickle("./data/2019_hh_trials_action.pickle")

#Note: clean trials takes both player perspectives and all trials does not. Thus, 66k/206k are useful
print(len(clean_trials), len(all_trials))


65773 103141


In [21]:
#Statistics
# 5 maps
# 5 actions (excluding wait actions)
#Returns stats on the distribution of maps and actions in a group of frames
def balance_stats(trials, print_stats=True):
    layout_name_list = []
    grid = np.zeros([5,5])
    for i in range(len(trials)):
        row = trials.iloc[i]
        # Action
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)
        player_idx = row['player_idx']
        # if player_idx==1:
        #     print('test')
        y=0
        y_row = y_act[player_idx]
        if y_row == 'INTERACT':
            y=0
        elif y_row[0]==1:
            y=1
        elif y_row[0]==-1:
            y=2
        elif y_row[1]==1:
            y=3
        elif y_row[1]==-1:
            y=4
        else:
            print("error",y, y_row)

        layout_name = row['layout_name']
        if layout_name not in layout_name_list:
            layout_name_list.append(layout_name)

        name_idx = layout_name_list.index(layout_name)
        # print(name_idx,layout_name_list)
        grid[y,name_idx]+=1

    if print_stats:
        print(grid)
        print(layout_name_list)
        print("Map: ",np.round(grid.sum(0)*100/len(trials),2))
        print("Act: ",np.round(grid.sum(1)*100/len(trials),2))

    return grid

balance_stats(clean_trials)

[[2849. 4409. 2426. 2085. 3823.]
 [2130. 1953. 2835. 4537. 1483.]
 [2168. 3163. 3083. 4523. 1983.]
 [1220. 2122. 3111. 2481. 1289.]
 [1984. 2758. 3016. 2750. 1592.]]
['cramped_room', 'asymmetric_advantages', 'coordination_ring', 'random3', 'random0']
Map:  [15.74 21.9  22.   24.9  15.46]
Act:  [23.71 19.67 22.68 15.54 18.4 ]


array([[2849., 4409., 2426., 2085., 3823.],
       [2130., 1953., 2835., 4537., 1483.],
       [2168., 3163., 3083., 4523., 1983.],
       [1220., 2122., 3111., 2481., 1289.],
       [1984., 2758., 3016., 2750., 1592.]])

In [36]:
#Splits data based on trial_ids such that no trial is shared between training/test/val data
# Prevents interleaved frames between training and val set to reduce near identical interleaved frames from being in both sets
# Balance parameter attempts to create sets with low standard deviations

def split_data(dataframes, split_percentages=[0.6,0.2,0.2],balance=True):

    S = len(split_percentages) #Amount of returned groups
    N = len(dataframes) #Amount of frames/ datapoints

    #Get all trial ids
    all_ids = []
    for i in range(N):
        row = dataframes.iloc[i]
        trial_id = row['trial_id']
        if trial_id not in all_ids:
            all_ids.append(trial_id)
    NO_IDS = len(all_ids)


    #Allocates number of ids for each group
    split_count_list = (np.array(split_percentages)*NO_IDS)
    if (np.sum(np.round(split_count_list)))> NO_IDS:
        split_count_list = np.floor(split_count_list)
    else:
        split_count_list = np.round(split_count_list)
    # print(np.sum(split_count_list))
    
    complete = False
    best_group = None
    best_avg_std = 999999
    attempts = 0

    while complete==False and attempts<100:
    #Allocates specific ids
        all_ids_copy = all_ids.copy()
        attempts += 1
        all_group_ids = []
        random.shuffle(all_ids_copy)
        # print(split_count_list)
        for i in range(S):
            group_ids=[]
            for _ in range(int(split_count_list[i])):
                item = all_ids_copy.pop()
                group_ids.append(item)
            all_group_ids.append(group_ids)
        # print(all_group_ids)

        # Create dataframe objects
        final_groups=[]
        for _ in range(S):
            final_groups.append([pd.DataFrame(columns=dataframes.columns)])

        #Add frames to respective group with id ownership
        for i in range(N):
            row = dataframes.iloc[i]
            trial_id = row['trial_id']
            for j in range(S):
                if trial_id in all_group_ids[j]:
                    final_groups[j].append(row.to_frame().T)
                    break

        for j in range(S):
            final_groups[j] = pd.concat(final_groups[j])

        if balance==False:
            return final_groups
        else:
            complete=True
            std = 0
            for group in final_groups:
                grid = balance_stats(group,False)
                grid0 = grid.sum(0)/np.sum(grid)
                grid1 = grid.sum(1)/np.sum(grid)
                std += np.std(grid0) + np.std(grid1)
                if (grid0<0.15).any() or (grid1<0.15).any():
                    complete=False
        
            if std < best_avg_std:
                best_avg_std=std
                best_group = final_groups.copy()
        print(attempts)
    return best_group


train_df, val_df, test_df = split_data(clean_trials,balance=True)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [3]:
#Save or Read files as neccessary (comment relevant lines)
train_df.to_pickle("./data/train_data.pickle")
test_df.to_pickle("./data/test_data.pickle")
val_df.to_pickle("./data/val_data.pickle")

# train_df = pd.read_pickle("./data/train_data.pickle")
# test_df = pd.read_pickle("./data/test_data.pickle")
# val_df = pd.read_pickle("./data/val_data.pickle")

print(len(train_df), len(val_df), len(test_df))

39403 12712 13658


In [40]:
balance_stats(train_df);
balance_stats(val_df);
balance_stats(test_df);

[[1369. 1491. 2363. 1307. 2402.]
 [1079. 1738.  919. 3029. 1208.]
 [1099. 1923. 1221. 3029. 1875.]
 [ 621. 1929.  773. 1593. 1240.]
 [ 973. 1878.  964. 1802. 1578.]]
['cramped_room', 'coordination_ring', 'random0', 'random3', 'asymmetric_advantages']
Map:  [13.05 22.74 15.84 27.31 21.07]
Act:  [22.67 20.23 23.21 15.62 18.26]
[[1029.  350.  719.  571.  629.]
 [ 410.  490.  512.  634.  259.]
 [ 694.  490.  518.  674.  312.]
 [ 475.  330.  292.  715.  202.]
 [ 617.  345.  500.  694.  251.]]
['asymmetric_advantages', 'random3', 'cramped_room', 'coordination_ring', 'random0']
Map:  [25.37 15.77 19.99 25.87 13.  ]
Act:  [25.94 18.13 21.15 15.84 18.93]
[[ 978.  364.  831.  761.  428.]
 [ 335.  463.  305.  539. 1018.]
 [ 594.  486.  450.  551. 1004.]
 [ 407.  467.  314.  307.  558.]
 [ 563.  444.  377.  511.  603.]]
['asymmetric_advantages', 'coordination_ring', 'random0', 'cramped_room', 'random3']
Map:  [21.06 16.28 16.67 19.54 26.44]
Act:  [24.62 19.48 22.59 15.03 18.29]


In [5]:
def save_images(dataframes, subfolder_name):
    L = len(dataframes)

    for i in range(L):
        row = dataframes.iloc[i]
        row["Unnamed: 0"] = i
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)

        #Add entry for respective player
        player_idx = row['player_idx']
        
        #Output/GT/Label
        y = 0
        y_row = y_act[player_idx]
        if y_row == 'INTERACT':
            y=0
        elif y_row[0]==1:
            y=1
        elif y_row[0]==-1:
            y=2
        elif y_row[1]==1:
            y=3
        elif y_row[1]==-1:
            y=4
        else: #No action
            y=5
            print("unknown action")

        path = f"./data/imgs/{subfolder_name}/{y}/{i}.png"

        state = json.loads(row['state'])
        if player_idx == 1:
            state['players'][0], state['players'][1] = (state['players'][1], state['players'][0])
        
        oc_state = OvercookedState.from_dict(state)
        
        layout = json.loads(row['layout'].replace("\'","\"").replace("1"," ").replace("2"," "))
        StateVisualizer().display_rendered_state(oc_state,grid=layout,img_path=path)  

    return dataframes 


In [9]:
#Render and save images
save_images(train_df, "train")
new_test_dfs = save_images(test_df, "test")
save_images(val_df, "val")

#Resaving the test frames for matching in analysis
new_test_dfs_reset = new_test_dfs.reset_index()
new_test_dfs_reset.head()
new_test_dfs.to_pickle("./data/test_data.pickle")