In [1]:
%matplotlib inline
%matplotlib notebook
import tqdm, copy
import random, os
import pandas as pd
from tqdm import trange
import matplotlib.pyplot as plt
import json
import numpy as np

In [2]:
from overcooked_ai_py.utils import save_pickle
from human_aware_rl.utils import set_global_seed
from human_aware_rl.human.process_dataframes import *
from human_aware_rl.human.process_human_trials import *
from human_aware_rl.static import *
from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
from overcooked_ai_py.mdp.overcooked_mdp import OvercookedState, SoupState, ObjectState
from collections import defaultdict
set_global_seed(1884)

In [7]:
# All Trials
all_trials = pd.read_pickle("./data/2019_hh_trials_all.pickle")
for col in ["time_elapsed","player_0_is_human","player_1_is_human","reward","score","player_0_id","player_1_id","cur_gameloop","cur_gameloop_total","score_total","button_press","button_press_total","timesteps_since_interact","button_presses_per_timstep"]:
    all_trials=all_trials.drop(col, axis=1)
all_trials.insert(2,"player_idx",[0]*len(all_trials),True)
print(all_trials.iloc[0])

Unnamed: 0                                                      0
joint_action                                     [[0, 0], [0, 0]]
player_idx                                                      0
layout                       ['XXPXX', 'O  2O', 'X1  X', 'XDXSX']
layout_name                                          cramped_room
state           {"players": [{"position": [1, 2], "orientation...
time_left                                                   180.0
trial_id                                                        0
Name: 0, dtype: object


In [4]:
# Filtered Trials:
# Player data between frames must not be identical (Different position/orientation/holding).
# Last frame of trajectory is not considered.
def filter_trials(all_trials):
    clean_trials = pd.DataFrame(columns=all_trials.columns)

    # Processing into array for NN
    L = len(all_trials)
    for i in range(L):
        row = all_trials.iloc[i]

        # Actions by both players
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)

        # Consider both player perspectives
        for player_idx in [0,1]:
            #Output/GT
            y = 0

            y_row = y_act[player_idx]
            if y_row == 'INTERACT':
                y=0
            elif y_row[0]==1:
                y=1
            elif y_row[0]==-1:
                y=2
            elif y_row[1]==1:
                y=3
            elif y_row[1]==-1:
                y=4
            else: #No action
                y=5
            
            if y!=5 and row['time_left']!=-1: #Filters out wait actions
                state = json.loads(row['state'])
                player = state['players'][player_idx]
                
                future_row = all_trials.iloc[i+1]
                future_state = json.loads(future_row['state'])
                future_player = future_state['players'][player_idx]
                if future_player!=player:
                    temp_row = row.copy()
                    temp_row['player_idx']=player_idx
                    clean_trials = pd.concat([clean_trials,temp_row.to_frame().T])

    return clean_trials
clean_trials = filter_trials(all_trials)

In [8]:
# clean_trials.to_pickle("./data/2019_hh_trials_action.pickle")
clean_trials = pd.read_pickle("./data/2019_hh_trials_action.pickle")
print(len(clean_trials), len(all_trials))


65773 103141


In [9]:
#Statistics
# 5 maps
# 5 actions (excluding wait actions)
def balance_stats(trials, print_stats=True):
    layout_name_list = []
    grid = np.zeros([5,5])
    for i in range(len(trials)):
        row = trials.iloc[i]
        # Action
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)
        player_idx = row['player_idx']
        # if player_idx==1:
        #     print('test')
        y=0
        y_row = y_act[player_idx]
        if y_row == 'INTERACT':
            y=0
        elif y_row[0]==1:
            y=1
        elif y_row[0]==-1:
            y=2
        elif y_row[1]==1:
            y=3
        elif y_row[1]==-1:
            y=4
        else:
            print("error",y, y_row)

        layout_name = row['layout_name']
        if layout_name not in layout_name_list:
            layout_name_list.append(layout_name)

        name_idx = layout_name_list.index(layout_name)
        # print(name_idx,layout_name_list)
        grid[y,name_idx]+=1

    if print_stats:
        print(grid)
        print(layout_name_list)
        print("Map: ",np.round(grid.sum(0)*100/len(trials),2))
        print("Act: ",np.round(grid.sum(1)*100/len(trials),2))

    return grid

balance_stats(clean_trials)

[[2849. 4409. 2426. 2085. 3823.]
 [2130. 1953. 2835. 4537. 1483.]
 [2168. 3163. 3083. 4523. 1983.]
 [1220. 2122. 3111. 2481. 1289.]
 [1984. 2758. 3016. 2750. 1592.]]
['cramped_room', 'asymmetric_advantages', 'coordination_ring', 'random3', 'random0']
Map:  [15.74 21.9  22.   24.9  15.46]
Act:  [23.71 19.67 22.68 15.54 18.4 ]


array([[2849., 4409., 2426., 2085., 3823.],
       [2130., 1953., 2835., 4537., 1483.],
       [2168., 3163., 3083., 4523., 1983.],
       [1220., 2122., 3111., 2481., 1289.],
       [1984., 2758., 3016., 2750., 1592.]])

In [21]:
def split_data(dataframes, split_percentages=[0.6,0.2,0.2],balance=True,max_attempts=10):
    #Splits data based on trial_ids such that no trial is shared between training/test/val data

    S = len(split_percentages) #Amount of returned groups
    N = len(dataframes) #Amount of frames/ datapoints

    #Get all trial ids
    all_ids = []
    for i in range(N):
        row = dataframes.iloc[i]
        trial_id = row['trial_id']
        if trial_id not in all_ids:
            all_ids.append(trial_id)
    NO_IDS = len(all_ids)


    #Allocates number of ids for each group
    split_count_list = (np.array(split_percentages)*NO_IDS)
    if (np.sum(np.round(split_count_list)))> NO_IDS:
        split_count_list = np.floor(split_count_list)
    else:
        split_count_list = np.round(split_count_list)
    # print(np.sum(split_count_list))
    
    complete = False
    best_group = None
    best_avg_std = 999999
    attempts = 0

    while complete==False and attempts<max_attempts:
    #Allocates specific ids
        all_ids_copy = all_ids.copy()
        attempts += 1
        all_group_ids = []
        random.shuffle(all_ids_copy)
        # print(split_count_list)
        for i in range(S):
            group_ids=[]
            for _ in range(int(split_count_list[i])):
                item = all_ids_copy.pop()
                group_ids.append(item)
            all_group_ids.append(group_ids)
        # print(all_group_ids)

        # Create dataframe objects
        final_groups=[]
        for _ in range(S):
            final_groups.append([pd.DataFrame(columns=dataframes.columns)])

        #Add frames to respective group with id ownership
        for i in range(N):
            row = dataframes.iloc[i]
            trial_id = row['trial_id']
            for j in range(S):
                if trial_id in all_group_ids[j]:
                    final_groups[j].append(row.to_frame().T)
                    break

        for j in range(S):
            final_groups[j] = pd.concat(final_groups[j])

        if balance==False:
            return final_groups
        else:
            complete=True
            std = 0
            for group in final_groups:
                grid = balance_stats(group,False)
                grid0 = grid.sum(0)/np.sum(grid)
                grid1 = grid.sum(1)/np.sum(grid)
                std += np.std(grid0) + np.std(grid1)
                if (grid0<0.15).any() or (grid1<0.15).any():
                    complete=False
        
            if std < best_avg_std:
                best_avg_std=std
                best_group = final_groups.copy()
        print(attempts)
    return best_group

In [22]:
trainvals_df, test_df = split_data(clean_trials, split_percentages=[0.8,0.2],balance=True,max_attempts=10)
train_val_dfs = split_data(trainvals_df, split_percentages=[0.25,0.25,0.25,0.25])

1
2
3
4
5
6
7
8
9
10
1
2
3
4
5
6
7
8
9
10


In [37]:
save=False
load=True
save_img = True

for i in range(len(train_val_dfs)):
    train_df=None
    val_df=None
    if load:
        train_df = pd.read_pickle(f"./data/train{i+1}.pickle")
        val_df = pd.read_pickle(f"./data/val{i+1}.pickle")
    else:
        train_df = pd.concat(train_val_dfs[:i] + train_val_dfs[i+1:])
        val_df = train_val_dfs[i]
    print(len(train_df),len(val_df),len(train_val_dfs))

    if save:
        train_df.to_pickle(f"./data/train{i+1}.pickle")
        val_df.to_pickle(f"./data/val{i+1}.pickle")

    if save_img:
        save_images(train_df, f"train{i+1}")
        save_images(val_df, f"val{i+1}")

38757 13014 4
38988 12783 4
37566 14205 4
40002 11769 4


In [32]:
test_df.to_pickle("./data/test_data.pickle")

# test_df = pd.read_pickle("./data/test_data.pickle")
print(len(test_df))

13555


In [33]:
balance_stats(trainvals_df);
balance_stats(test_df);

[[2134. 3753. 1817. 2507. 1962.]
 [1619. 1556. 3878.  949. 2294.]
 [1677. 2602. 3855. 1292. 2499.]
 [ 954. 1705. 2135.  810. 2523.]
 [1510. 2314. 2363. 1030. 2480.]]
['cramped_room', 'asymmetric_advantages', 'random3', 'random0', 'coordination_ring']
Map:  [15.12 22.85 26.9  12.62 22.52]
Act:  [23.31 19.72 22.84 15.56 18.57]
[[ 715.  464.  268. 1316.  656.]
 [ 511.  541.  659.  534.  397.]
 [ 491.  584.  668.  691.  561.]
 [ 266.  588.  346.  479.  417.]
 [ 474.  536.  387.  562.  444.]]
['cramped_room', 'coordination_ring', 'random3', 'random0', 'asymmetric_advantages']
Map:  [18.13 20.01 17.17 26.43 18.26]
Act:  [25.22 19.49 22.1  15.46 17.73]


In [16]:
def save_images(dataframes, subfolder_name):
    L = len(dataframes)

    for i in range(L):
        row = dataframes.iloc[i]
        row["Unnamed: 0"] = i
        ja = row['joint_action']
        ja = ja.replace("\'","\"")
        y_act = json.loads(ja)

        #Add entry for respective player
        player_idx = row['player_idx']
        
        #Output/GT/Label
        y = 0
        y_row = y_act[player_idx]
        if y_row == 'INTERACT':
            y=0
        elif y_row[0]==1:
            y=1
        elif y_row[0]==-1:
            y=2
        elif y_row[1]==1:
            y=3
        elif y_row[1]==-1:
            y=4
        else: #No action
            y=5
            print("unknown action")

        path = f"./data/imgs/{subfolder_name}/{y}/{i}.png"

        state = json.loads(row['state'])
        if player_idx == 1:
            state['players'][0], state['players'][1] = (state['players'][1], state['players'][0])
        
        oc_state = OvercookedState.from_dict(state)
        
        layout = json.loads(row['layout'].replace("\'","\"").replace("1"," ").replace("2"," "))
        StateVisualizer().display_rendered_state(oc_state,grid=layout,img_path=path)  

    return dataframes 


In [38]:
new_test_dfs = save_images(test_df, "test")
#Resaving the test frames for matching in analysis
new_test_dfs_reset = new_test_dfs.reset_index()
new_test_dfs_reset.head()
new_test_dfs.to_pickle("./data/test_data.pickle")