In [1]:
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from scipy import stats
import torch
import json
import random
from tqdm import tqdm
from collections import defaultdict
import re
from multiprocessing import  Pool

In [2]:
sim_settings = ['both_gaze_and_gesture', 'only_gaze', 'only_gesture', 'only_objects', 'wrong_gaze_and_gesture']
view_names = ['ego_view','exo_view','top_view']
# view_names = ['egoview_image','exoview_image','topview_image']
instruction_templates = ['template_null', 'template_1_1', 'template_1_2', 'ego_template_2_1', 'ego_template_2_2', 'ego_template_3_1', 'ego_template_3_2', 'exo_template_2_1', 'exo_template_2_2', 'exo_template_3_1', 'exo_template_3_2']
instruction_templates_min = ['template_null', 'template_1_1', 'template_1_2', 'template_2_1', 'template_2_2', 'template_3_1', 'template_3_2']
rel_names = ['corner', 'back', 'center', 'left of', 'right of', 'behind', 'front of', 'next', 'left', 'right', 'front']
split_names = ['test','valid','train']
base_dir_path = '/project/CollabRoboGroup/datasets/official_data'

In [3]:
sample_dir_names = sorted(os.listdir(base_dir_path))
sample_dir_len = len(sample_dir_names)
sample_dir_len

56108

In [4]:
def is_complete_data(path):
    for _ , dir_name in enumerate(sorted(os.listdir(path))):
        if(dir_name.find(".json")!=-1):
            return True
    return False

def translate_view_name(view_name):
#     if(view_name=='ego_view'):
#         return 'egoview_image'
#     elif(view_name=='exo_view'):
#         return 'exoview_image'
#     elif(view_name=='top_view'):
#         return 'topview_image'
    return f'{view_name}_image'

def is_contrastive(settings_dir_name):
    if(settings_dir_name.find("wrong")!=-1):
        return 1
    return 0

def is_valid_point(x1,y1,x2,y2):
    if(x1<0 or y1<0 or x2<0 or y2<0):
        return False
    if(x1>719 or y1>479 or x2>719 or y2>479):
        return False
    return True

def is_valid_point_object(x1,y1,x2,y2):
    if(x1<0 or y1<0 or x2<0 or y2<0):
        return False
    if(x1>719 or y1>479 or x2>719 or y2>479):
        return False
    return True
    
def is_valid_point(point_cords, limit, gt):
    for cord in point_cords:
        if(len(cord)>0):
            if((not gt) and float(cord)<limit):
                return False
            if(gt and float(cord)>limit):
                return False
    return True

def is_valid_point_all_object(row):
    for view_name in view_names:
        if('ego' in view_name):
            continue
        start_point_xs = row[f'{view_name}_image_all_objects_start_point_x'].split('_')
        start_point_ys = row[f'{view_name}_image_all_objects_start_point_y'].split('_')
        end_point_xs = row[f'{view_name}_image_all_objects_end_point_x'].split('_')
        end_point_ys = row[f'{view_name}_image_all_objects_end_point_y'].split('_')
        
        if((not is_valid_point(start_point_xs, 0, False)) or (not is_valid_point(start_point_ys, 0, False))):
            return False
        if((not is_valid_point(end_point_xs, 0, False)) or (not is_valid_point(end_point_ys, 0, False))):
            return False
        if((not is_valid_point(start_point_xs, 719, True)) or (not is_valid_point(start_point_ys, 479, True))):
            return False
        if((not is_valid_point(end_point_xs, 719, True)) or (not is_valid_point(end_point_ys, 479, True))):
            return False
    return True

def get_object_location(data_json, settings_name):
    if('both' in settings_name):
        settings_name = 'both_gaze_and_gesture'
    if('wrong' in settings_name):
        settings_name = 'wrong_gaze_and_gesture'
    object_loc = {}
    all_object_loc = defaultdict(str)
    is_underscore = {}
    for view_name in view_names:
        is_underscore[view_name] = False
    is_valid = True
    for object_item in data_json['objects']:
        if(object_item['object_type']=='chosen'):
            for view_name in view_names:
                if(view_name=='ego_view'):
                    object_loc[f'{view_name}_image_object_start_point_x'] = float(data_json[settings_name][f'{view_name}_object_start_point']['x'])
                    object_loc[f'{view_name}_image_object_start_point_y'] = 480 - float(data_json[settings_name][f'{view_name}_object_end_point']['y'])
                    object_loc[f'{view_name}_image_object_end_point_x'] = float(data_json[settings_name][f'{view_name}_object_end_point']['x'])
                    object_loc[f'{view_name}_image_object_end_point_y'] = 480 - float(data_json[settings_name][f'{view_name}_object_start_point']['y'])
                else:
                    object_loc[f'{view_name}_image_object_start_point_x'] = float(object_item[f'{view_name}_object_start_point']['x'])
                    object_loc[f'{view_name}_image_object_start_point_y'] = 480 - float(object_item[f'{view_name}_object_end_point']['y'])
                    object_loc[f'{view_name}_image_object_end_point_x'] = float(object_item[f'{view_name}_object_end_point']['x'])
                    object_loc[f'{view_name}_image_object_end_point_y'] = 480 - float(object_item[f'{view_name}_object_start_point']['y'])
                    
                is_valid = is_valid and is_valid_point_object(x1 = object_loc[f'{view_name}_image_object_start_point_x'],
                                y1 = object_loc[f'{view_name}_image_object_start_point_y'],
                                x2 = object_loc[f'{view_name}_image_object_end_point_x'],
                                y2 = object_loc[f'{view_name}_image_object_end_point_y'])
                
        for view_name in view_names:
            if(view_name=='ego_view'):
                continue
            else:
                sep = '_'
                if(not is_underscore[view_name]):
                    sep = ''
                is_underscore[view_name] = True

                all_object_loc[f'{view_name}_image_all_objects_start_point_x'] += sep + str(float(object_item[f'{view_name}_object_start_point']['x']))
                all_object_loc[f'{view_name}_image_all_objects_start_point_y'] += sep + str(480 - float(object_item[f'{view_name}_object_end_point']['y']))
                all_object_loc[f'{view_name}_image_all_objects_end_point_x'] += sep + str(float(object_item[f'{view_name}_object_end_point']['x']))
                all_object_loc[f'{view_name}_image_all_objects_end_point_y'] += sep + str(480 - float(object_item[f'{view_name}_object_start_point']['y']))
                    
            is_valid = is_valid and is_valid_point_all_object(all_object_loc)
                
    return object_loc, all_object_loc, is_valid

def is_perspective_necessary(instruction_str):
    if(('left' in instruction_str) or ('right' in instruction_str) or ('behind' in instruction_str) or ('front' in instruction_str)):
        return 1
    return 0

def is_perspective_template(template_name):
    if(('2_1' in template_name) or ('2_2' in template_name) or ('3_1' in template_name) or ('3_2' in template_name)):
        return True
    return False

def is_filter():
    random_choice = random.randint(0, 1)
    if(random_choice==0):
        return True #skip the data
    return False

In [5]:
def determine_ambiguous(data_json):
    instructions = data_json['instructions']
    
    for object in data_json["objects"]:
        if object["object_type"] == "chosen":
            chosen_obj = object
        if object["object_type"] == "ref":
            ref_obj = object
    
    duplicates = [] # at most two
    ref_duplicates = [] # at most two
    for object in data_json["objects"]:
        if object["object_type"] != "chosen" and object["object_name"] == chosen_obj["object_name"]:
            duplicates.append(object)
        if object["object_type"] != "ref" and object["object_name"] == ref_obj["object_name"]:
            ref_duplicates.append(object)
    dict = {}
    for instruction in instructions:
        # none is True
        # 1.2 check color
        # 2.1 check size
        # 2.2 check color
        # 3.1 check size and make two sub parts
        # 3.2 check color and size
        # actually maye be more complicated as can vary between size and color... check code
        if instruction == "template_null": # is always ambiguous
            dict[instruction] = True
        elif "1_1" in instruction:
            dict[instruction + "_1"] = bool(duplicates)
            dict[instruction + "_2"] = bool(duplicates)
        elif "1_2" in instruction:
            found_ambig = False
            for obj in duplicates:
                if obj["object_color"]["name"] == chosen_obj["object_color"]["name"]:
                    found_ambig = True
                    break

            dict[instruction + "_1"] = found_ambig

            dict[instruction + "_2"] = bool(duplicates)
        elif "2_1" in instruction: # have to check if in corner or center for both of 2_1 and 2_2
            dict[instruction + "_1"] = bool(duplicates)
            found_ambig = False

            for obj in duplicates:
                if "corner" in chosen_obj["absolute_location_observer"]:
                    if "corner" in obj["absolute_location_observer"] and obj["object_size"] == chosen_obj["object_size"]:
                        found_ambig = True
                        break   
                elif "center" in chosen_obj["absolute_location_observer"]:
                    if "center" in obj["absolute_location_observer"] and obj["object_size"] == chosen_obj["object_size"]:
                        found_ambig = True
                        break  
                elif obj["object_size"] == chosen_obj["object_size"] and not "center" in obj["absolute_location_observer"] and not "corner" in obj["absolute_location_observer"]:
                    found_ambig = True
                    break

            dict[instruction + "_2"] = found_ambig
        elif "2_2" in instruction:
            dict[instruction + "_1"] = bool(duplicates)
            found_ambig = False
            
            for obj in duplicates:
                if "corner" in chosen_obj["absolute_location_observer"]:
                    if "corner" in obj["absolute_location_observer"] and obj["object_color"]["name"] == chosen_obj["object_color"]["name"]:
                        found_ambig = True
                        break   
                elif "center" in chosen_obj["absolute_location_observer"]:
                    if "center" in obj["absolute_location_observer"] and obj["object_color"]["name"] == chosen_obj["object_color"]["name"]:
                        found_ambig = True
                        break  
                elif obj["object_color"]["name"] == chosen_obj["object_color"]["name"] and not "center" in obj["absolute_location_observer"] and not "corner" in obj["absolute_location_observer"]:
                    found_ambig = True
                    break

            dict[instruction + "_2"] = found_ambig
        elif "3_1" in instruction:
            found_ambig_1 = False
            for obj in duplicates:
                if "next to" in instructions[instruction]["instruction_string_2"]:
                    if bool(ref_duplicates) or not is_closer(chosen_obj, ref_obj, obj):
                        found_ambig_1 = True
                        break
                else:
                    if bool(ref_duplicates) or not is_closer(chosen_obj, ref_obj, obj, 3):
                        found_ambig_1 = True
                        break

            dict[instruction + "_1"] = found_ambig_1

            found_ambig_2 = False
            for obj in duplicates:
                if "next to" in instructions[instruction]["instruction_string_2"]:
                    if bool(ref_duplicates):
                        for ref in ref_duplicates:
                            if obj["object_size"] == chosen_obj["object_size"] and not is_closer(chosen_obj, ref_obj, obj) and (ref["object_color"]["name"] == ref_obj["object_color"]["name"] or ref["object_size"] == ref_obj["object_size"]):
                                found_ambig_2 = True
                                break
                    else:
                        if obj["object_size"] == chosen_obj["object_size"] and not is_closer(chosen_obj, ref_obj, obj):
                                found_ambig_2 = True
                                break
                else:
                    if bool(ref_duplicates):
                        for ref in ref_duplicates:
                            if obj["object_size"] == chosen_obj["object_size"] and not is_closer(chosen_obj, ref_obj, obj, 3) and (ref["object_color"]["name"] == ref_obj["object_color"]["name"] or ref["object_size"] == ref_obj["object_size"]):
                                found_ambig_2 = True
                                break
                    else:
                        if obj["object_size"] == chosen_obj["object_size"] and not is_closer(chosen_obj, ref_obj, obj, 3):
                                found_ambig_2 = True
                                break

            dict[instruction + "_2"] = found_ambig_2

        elif "3_2" in instruction:
            found_ambig_1 = False
            for obj in duplicates:
                if "next to" in instructions[instruction]["instruction_string_2"]:
                    if bool(ref_duplicates) or not is_closer(chosen_obj, ref_obj, obj):
                        found_ambig_1 = True
                        break
                else:
                    if bool(ref_duplicates) or not is_closer(chosen_obj, ref_obj, obj, 3):
                        found_ambig_1 = True
                        break

            dict[instruction + "_1"] = found_ambig_1


            found_ambig_2 = False
            for obj in duplicates:
                if "next to" in instructions[instruction]["instruction_string_2"]:
                    if bool(ref_duplicates):
                        for ref in ref_duplicates:
                            if obj["object_color"]["name"] == chosen_obj["object_color"]["name"] and not is_closer(chosen_obj, ref_obj, obj) and (ref["object_color"]["name"] == ref_obj["object_color"]["name"] or ref["object_size"] == ref_obj["object_size"]):
                                found_ambig_2 = True
                                break
                    else:
                        if obj["object_color"]["name"] == chosen_obj["object_color"]["name"] and not is_closer(chosen_obj, ref_obj, obj):
                                found_ambig_2 = True
                                break
                else:
                    if bool(ref_duplicates):
                        for ref in ref_duplicates:
                            if obj["object_color"]["name"] == chosen_obj["object_color"]["name"] and not is_closer(chosen_obj, ref_obj, obj, 3) and (ref["object_color"]["name"] == ref_obj["object_color"]["name"] or ref["object_size"] == ref_obj["object_size"]):
                                found_ambig_2 = True
                                break
                    else:
                        if obj["object_color"]["name"] == chosen_obj["object_color"]["name"] and not is_closer(chosen_obj, ref_obj, obj, 3):
                                found_ambig_2 = True
                                break

            dict[instruction + "_2"] = found_ambig_2
    # return list of whether each instruction template is ambiguous or not
    return dict

def is_closer(chosen, ref, obj, mult = 2):
    #print(chosen, ref, obj)
    chosenx = (chosen["top_view_object_start_point"]["x"] + chosen["top_view_object_end_point"]["x"]) / 2
    choseny = (chosen["top_view_object_start_point"]["y"] + chosen["top_view_object_end_point"]["y"]) / 2 
    refx = (ref["top_view_object_start_point"]["x"] + ref["top_view_object_end_point"]["x"]) / 2
    refy = (ref["top_view_object_start_point"]["y"] + ref["top_view_object_end_point"]["y"]) / 2
    objx = (obj["top_view_object_start_point"]["x"] + obj["top_view_object_end_point"]["x"]) / 2
    objy = (obj["top_view_object_start_point"]["y"] + obj["top_view_object_end_point"]["y"]) / 2
    dist_chosen = ((chosenx - refx) ** 2 + (choseny - refy) ** 2) ** 0.5
    dist2 = ((refx - objx) ** 2 + (refy - objy) ** 2) ** 0.5
    if dist_chosen*mult < dist2:
        return True
    return False

In [6]:
data_dict = {}
data_no = 1
sample_dir_names = sorted(os.listdir(base_dir_path))
sample_dir_len = len(sample_dir_names)
flag = False
split_config = {'test_start': 0, 'test_end': 4999,'valid_start': 5000, 'valid_end': 9999, 'train_start': 10000, 'train_end': sample_dir_len-1}
rel_stat = {}
for split_name in split_names:
    data_dict[split_name] = {}
    rel_stat[split_name] = defaultdict(int)
    print(f'start parsing {split_name}')
    for i in tqdm(range(split_config[f'{split_name}_start'], split_config[f'{split_name}_end']+1)):
        sample_dir_name = sample_dir_names[i]
        sample_dir_path = f'{base_dir_path}/{sample_dir_name}'
        if(sample_dir_name.find(".csv")!=-1):
            continue
        if(not is_complete_data(sample_dir_path)):
            continue
        for _ , settings_dir_name in enumerate(sorted(os.listdir(sample_dir_path))):
            if(settings_dir_name.find(".json")!=-1):
                continue
            settings_dir_path = f'{sample_dir_path}/{settings_dir_name}'

            view_dir_path_dict = {}
            for _ , view_dir_name in enumerate(sorted(os.listdir(settings_dir_path))):
                view_dir_path = f'{settings_dir_path}/{view_dir_name}'
                for _ , data_type_dir_name in enumerate(sorted(os.listdir(view_dir_path))):
                    if(data_type_dir_name.find("video")!=-1):
                        continue
                    data_path = f'{view_dir_path}/{data_type_dir_name}'
                    data_name = os.listdir(data_path)[0]
#                     data_path = f'{data_path}/{data_name}'
                    data_path = f'{sample_dir_name}/{settings_dir_name}/{view_dir_name}/{data_type_dir_name}/{data_name}'
#                     print(data_path)
                    view_dir_path_dict[translate_view_name(view_dir_name)] = data_path
            try:
                file = open(f'{sample_dir_path}/data.json')
                data_json = json.load(file)
            except:
                print(f'file corrupt {sample_dir_path}/data.json')
                break
                
            object_loc, all_object_loc, is_valid_object_locs = get_object_location(data_json, settings_dir_name)
            if(not is_valid_object_locs):
                continue

            instruction_template_filter_dict = {}
            for template in instruction_templates_min:
                if(is_perspective_template(template)):
                    if(is_filter()):
                        instruction_template_filter_dict[f'ego_{template}'] = True
                        instruction_template_filter_dict[f'exo_{template}'] = False
                    else:
                        instruction_template_filter_dict[f'ego_{template}'] = False
                        instruction_template_filter_dict[f'exo_{template}'] = True
                else:
                    instruction_template_filter_dict[template] = False

                # using 50/50 odds for templates 1-3 and 33/33/33 odds for template null odds
            which = random.randint(1, 2)
            if random.randint(0, 2) > 0: # set to True = filter
                instruction_template_filter_dict['template_null'] = True
            instruction_template_filter_dict['template_1_' + str(which)] = True
            instruction_template_filter_dict['ego_template_2_' + str(3 - which)] = True
            instruction_template_filter_dict['exo_template_2_' + str(3 - which)] = True
            instruction_template_filter_dict['ego_template_3_' + str(which)] = True
            instruction_template_filter_dict['exo_template_3_' + str(which)] = True
                
            
            ambiguous_stat = determine_ambiguous(data_json)
#             print(ambiguous_stat)

            for instruction_template in instruction_templates:

                if(instruction_template_filter_dict[instruction_template]):
                    continue

                if(instruction_template=='template_null'):
                    if is_contrastive(settings_dir_name)==0:
                        instruction_str = data_json['instructions'][instruction_template]['instruction_string']
                        is_instruction_ambiguous = ambiguous_stat[instruction_template]
                    else:
                        continue # template null for wrong gaze and gesture does not make sense
                elif('template_1_2' in instruction_template):
                    instruction_str = data_json['instructions'][instruction_template]['instruction_string_1']
                    is_instruction_ambiguous = ambiguous_stat[f'{instruction_template}_1']
                elif('template_3_1' in instruction_template):
                    random_choice = random.randint(0, 1)
                    if(random_choice==0):
                        instruction_str = data_json['instructions'][instruction_template]['instruction_string_1']
                        is_instruction_ambiguous = ambiguous_stat[f'{instruction_template}_1']
                    else:
                        instruction_str = data_json['instructions'][instruction_template]['instruction_string_2']
                        is_instruction_ambiguous = ambiguous_stat[f'{instruction_template}_2']
                else:
                    instruction_str = data_json['instructions'][instruction_template]['instruction_string_2']
                    is_instruction_ambiguous = ambiguous_stat[f'{instruction_template}_2']

    #             if(('ego' in instruction_template) and (is_perspective_necessary(instruction_str)==0)):
    #                 print(instruction_template, instruction_str, is_perspective_necessary(instruction_str))
    #                 flag = True
    #                 continue
                #Generating data

                data_id = sample_dir_name.split("_")[1]
                data_id = f'{data_id}_{data_no}'
                data_no += 1

                data_dict[split_name][data_id] = {}
                for view_name in view_names:
                    data_dict[split_name][data_id][f'{view_name}_image'] = view_dir_path_dict[f'{view_name}_image']

    #             print(settings_dir_path)
                for view_name in view_names:
                    data_dict[split_name][data_id][f'{view_name}_image_object_start_point_x'] = object_loc[f'{view_name}_image_object_start_point_x']
                    data_dict[split_name][data_id][f'{view_name}_image_object_start_point_y'] = object_loc[f'{view_name}_image_object_start_point_y']
                    data_dict[split_name][data_id][f'{view_name}_image_object_end_point_x'] = object_loc[f'{view_name}_image_object_end_point_x']
                    data_dict[split_name][data_id][f'{view_name}_image_object_end_point_y'] = object_loc[f'{view_name}_image_object_end_point_y']

                for loc_key in all_object_loc.keys():
                    data_dict[split_name][data_id][loc_key] = all_object_loc[loc_key]
                
                instruction_str = re.sub(r'\d', '', instruction_str)
                instruction_str = str(instruction_str)
                data_dict[split_name][data_id]['verbal_instruction'] = instruction_str
                data_dict[split_name][data_id]['is_instruction_ambiguous'] = is_instruction_ambiguous
                if(('ego' in instruction_template) or ('exo' in instruction_template)):
                    data_dict[split_name][data_id]['perspective'] = data_json['instructions'][instruction_template]['instruction_perspective']
                data_dict[split_name][data_id]['is_perspective_necessary'] = is_perspective_necessary(instruction_str)

                data_dict[split_name][data_id]['is_contrastive'] = is_contrastive(settings_dir_name)
                data_dict[split_name][data_id]['view_name'] = view_dir_name
                data_dict[split_name][data_id]['setting_name'] = settings_dir_name   
                
                # Target object category
                # Absolute location
                # Spatial relation
                # template name [template-null, template-1.1, .....]

                for object in data_json["objects"]:
                    if object["object_type"] == "chosen": # is generally first obj in json
                        obj_name = re.sub(r'\d', '', object['object_name'])
                        data_dict[split_name][data_id]['object_category'] = str(obj_name)
                        data_dict[split_name][data_id]['absolute_location_ego'] = object["absolute_location_observer"]
                        data_dict[split_name][data_id]['absolute_location_exo'] = object["absolute_location_participant"]
                        data_dict[split_name][data_id]['spatial_relation_ego'] = data_json['instructions']["ego_template_3_1"]['spatial_relation']
                        data_dict[split_name][data_id]['spatial_relation_exo'] = data_json['instructions']["exo_template_3_1"]['spatial_relation']
                        data_dict[split_name][data_id]['instruction_template'] = instruction_template
                        break
                
                for rel_name in rel_names:
#                     if('left of' in instruction_str):
#                         print(instruction_str)
#                         flag=True
                    if(rel_name in instruction_str):
                        rel_stat[split_name][rel_name] += 1
                        break
#         if(flag):
#             break
#     if(flag):
#         break
#         break
#     break
# print(rel_stat)
# print(data_dict)

  0%|          | 2/5000 [00:00<07:29, 11.13it/s]

start parsing test


100%|██████████| 5000/5000 [05:11<00:00, 16.06it/s]
  0%|          | 2/5000 [00:00<04:53, 17.04it/s]

start parsing valid


100%|██████████| 5000/5000 [05:11<00:00, 16.07it/s]
  0%|          | 2/46099 [00:00<46:30, 16.52it/s]

start parsing train


 93%|█████████▎| 42701/46099 [44:54<03:13, 17.56it/s]  

file corrupt /project/CollabRoboGroup/datasets/official_data/5.9.2022.2.53.18.AM_2827/data.json
file corrupt /project/CollabRoboGroup/datasets/official_data/5.9.2022.2.53.34.AM_2829/data.json


100%|██████████| 46099/46099 [48:26<00:00, 15.86it/s]


In [7]:
all_object_loc.keys()

dict_keys(['exo_view_image_all_objects_start_point_x', 'exo_view_image_all_objects_start_point_y', 'exo_view_image_all_objects_end_point_x', 'exo_view_image_all_objects_end_point_y', 'top_view_image_all_objects_start_point_x', 'top_view_image_all_objects_start_point_y', 'top_view_image_all_objects_end_point_x', 'top_view_image_all_objects_end_point_y'])

In [8]:
df = {}
for split_name in split_names:
    df[split_name] = pd.DataFrame.from_dict(data_dict[split_name], orient='index')
    df[split_name].index.name = 'id'
    df[split_name] = df[split_name].reset_index()
    print(f'{split_name} len: ',len(df[split_name]))
#     df.head()

test len:  81179
valid len:  81045
train len:  748009


In [9]:
df['train']['instruction_template'].unique()

array(['template_1_2', 'ego_template_2_1', 'exo_template_3_2',
       'template_1_1', 'ego_template_2_2', 'ego_template_3_1',
       'template_null', 'ego_template_3_2', 'exo_template_2_1',
       'exo_template_2_2', 'exo_template_3_1'], dtype=object)

In [10]:
for split_name in split_names:
    print(f'before filter dataset({split_name}) len: {len(df[split_name])}')
    df[split_name]['is_instruction_ambiguous'] = df[split_name]['is_instruction_ambiguous'].astype(int)
    df_filter = df[split_name].copy()
    for view_name in view_names:
        df_filter = df_filter[(df_filter[f'{view_name}_image_object_start_point_x']>0) & (df_filter[f'{view_name}_image_object_start_point_y']>0) & (df_filter[f'{view_name}_image_object_end_point_x']>0) & (df_filter[f'{view_name}_image_object_end_point_y']>0)]
    print(f'after filter dataset len ({split_name}): {len(df_filter)}')
    df_filter.to_csv(f'{base_dir_path}/{split_name}_new.csv', index=False)

before filter dataset(test) len: 81179
after filter dataset len (test): 81179
before filter dataset(valid) len: 81045
after filter dataset len (valid): 81045
before filter dataset(train) len: 748009
after filter dataset len (train): 748009


In [None]:
# df.describe()

In [11]:
for split_name in split_names:
    print(split_name, dict(rel_stat[split_name]))

test {'right': 2423, 'left of': 10133, 'right of': 10136, 'left': 2386, 'back': 2453, 'front': 2486, 'corner': 9488, 'center': 2395, 'behind': 1642, 'front of': 1736, 'next': 1208}
valid {'right': 2435, 'right of': 10156, 'left': 2399, 'left of': 10012, 'corner': 9428, 'front of': 1785, 'behind': 1759, 'back': 2505, 'front': 2470, 'center': 2395, 'next': 1143}
train {'left of': 93275, 'right of': 93816, 'back': 22782, 'front': 22712, 'corner': 88657, 'behind': 15459, 'front of': 15535, 'center': 22599, 'left': 21099, 'next': 10863, 'right': 21149}


In [34]:
def is_valid_point(point_cords, limit, gt):
#     print(point_cords)
    for cord in point_cords:
        if(len(cord)>0):
            if((not gt) and float(cord)<limit):
                return False
            if(gt and float(cord)>limit):
                return False
    return True

def data_filter(row):
#     print(row)
    for view_name in view_names:
        if('ego' in view_name):
            continue
#         print(row[f'{view_name}_image_all_objects_start_point_x'])
        start_point_xs = row[f'{view_name}_image_all_objects_start_point_x'].split('_')
        start_point_ys = row[f'{view_name}_image_all_objects_start_point_y'].split('_')
        end_point_xs = row[f'{view_name}_image_all_objects_end_point_x'].split('_')
        end_point_ys = row[f'{view_name}_image_all_objects_end_point_y'].split('_')
        
        if((not is_valid_point(start_point_xs, 0, False)) or (not is_valid_point(start_point_ys, 0, False))):
            return False
        if((not is_valid_point(end_point_xs, 0, False)) or (not is_valid_point(end_point_ys, 0, False))):
            return False
        if((not is_valid_point(start_point_xs, 719, True)) or (not is_valid_point(start_point_ys, 479, True))):
            return False
        if((not is_valid_point(end_point_xs, 719, True)) or (not is_valid_point(end_point_ys, 479, True))):
            return False
    return True

def tiny_object_filter(row):
    
    for view_name in view_names:
        if('ego' in view_name):
            continue
            
        start_point_xs = row[f'{view_name}_image_all_objects_start_point_x'].split('_')
        start_point_ys = row[f'{view_name}_image_all_objects_start_point_y'].split('_')
        end_point_xs = row[f'{view_name}_image_all_objects_end_point_x'].split('_')
        end_point_ys = row[f'{view_name}_image_all_objects_end_point_y'].split('_')
        
        objects_len = len(start_point_xs)
        for i in range(objects_len):
            x1 = float(start_point_xs[i])
            y1 = float(start_point_ys[i])
            x2 = float(end_point_xs[i])
            y2 = float(end_point_ys[i])
            
#             if(((x2-x1)>0 and (x2-x1)<6) or ((y2-y1)>0 and (y2-y1)<6)):
            if(((x2-x1)<6) or ((y2-y1)<6)):
#                 print(view_name,x1,x2,y1,y2)
                return False
            
    return True

def parallelize_dataframe(df, func, n_cores=4):
    df_split = np.array_split(df, n_cores)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

In [35]:
for split_name in split_names:
    df =  pd.read_csv(f'{base_dir_path}/{split_name}_new.csv')
    df_filter = df.copy()
    print(f'before filter dataset len: {len(df_filter)}')
    mask = df_filter.apply(tiny_object_filter, axis=1)
    df_filter = df_filter[mask]
    print(f'after filter dataset len: {len(df_filter)}')
    df_filter.to_csv(f'{base_dir_path}/{split_name}_except_tiny_object.csv', index=False)

before filter dataset len: 81179
after filter dataset len: 75303
before filter dataset len: 81045
after filter dataset len: 75891
before filter dataset len: 748009
after filter dataset len: 698128


In [7]:
df =  pd.read_csv(f'{base_dir_path}/train_new.csv')

In [25]:
df.shape

(748009, 37)

In [33]:
df_filter = df.copy()
# df_filter = parallelize_dataframe(df_filter, tiny_object_filter, n_cores=30)
mask = df_filter.apply(tiny_object_filter, axis=1)
df_filter = df_filter[mask]
print(df_filter.shape)
df_filter.head()

(23591, 37)


Unnamed: 0,id,ego_view_image,exo_view_image,top_view_image,ego_view_image_object_start_point_x,ego_view_image_object_start_point_y,ego_view_image_object_end_point_x,ego_view_image_object_end_point_y,exo_view_image_object_start_point_x,exo_view_image_object_start_point_y,...,is_contrastive,view_name,setting_name,object_category,absolute_location_ego,absolute_location_exo,spatial_relation_ego,spatial_relation_exo,instruction_template,perspective
840,2096_163065,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/eg...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/ex...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/to...,140.156219,250.347626,289.98764,369.921242,499.537354,235.306671,...,0,top_view,both_gaze_gesture,Picnic Basket,back right corner,front left corner,left of,right of,template_1_2,
841,2096_163066,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/eg...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/ex...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/to...,140.156219,250.347626,289.98764,369.921242,499.537354,235.306671,...,0,top_view,both_gaze_gesture,Picnic Basket,back right corner,front left corner,left of,right of,ego_template_3_2,ego
842,2096_163067,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/eg...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/ex...,5.11.2022.1.29.28.AM_2096/both_gaze_gesture/to...,140.156219,250.347626,289.98764,369.921242,499.537354,235.306671,...,0,top_view,both_gaze_gesture,Picnic Basket,back right corner,front left corner,left of,right of,exo_template_2_1,exo
843,2096_163068,5.11.2022.1.29.28.AM_2096/only_gaze/ego_view/c...,5.11.2022.1.29.28.AM_2096/only_gaze/exo_view/c...,5.11.2022.1.29.28.AM_2096/only_gaze/top_view/c...,150.499268,237.156464,291.865295,349.383118,499.537354,235.306671,...,0,top_view,only_gaze,Picnic Basket,back right corner,front left corner,left of,right of,template_null,
844,2096_163069,5.11.2022.1.29.28.AM_2096/only_gaze/ego_view/c...,5.11.2022.1.29.28.AM_2096/only_gaze/exo_view/c...,5.11.2022.1.29.28.AM_2096/only_gaze/top_view/c...,150.499268,237.156464,291.865295,349.383118,499.537354,235.306671,...,0,top_view,only_gaze,Picnic Basket,back right corner,front left corner,left of,right of,template_1_1,


In [19]:
base_dir_path = '/project/CollabRoboGroup/datasets/official_data'
df = {}
for split_name in split_names:
    df[split_name] =  pd.read_csv(f'{base_dir_path}/{split_name}_except_tiny_object.csv')
    print(f'{split_name} shape', df[split_name].shape)

test shape (75303, 37)
valid shape (75891, 37)
train shape (698128, 37)


In [20]:
df['train'].setting_name.unique()

array(['both_gaze_gesture', 'only_gaze', 'only_gesture', 'only_objects',
       'wrong_gaze_gesture'], dtype=object)

In [21]:
# columns

In [22]:
def get_data_id(data_id):
    return int(data_id.split('_')[0])

In [24]:
import random 
columns = df['train'].columns
data = {}
con_setting_names = ['only_gaze', 'only_gesture', 'only_objects']
for split_name in split_names:
    data[split_name] = {}
    data_len = df[split_name].shape[0]
    for index, row in df[split_name].iterrows():
        data[split_name][row['id']] = {}
        for col in columns:
            if(col=='id'):
                continue
            data[split_name][row['id']][col] = row[col]
        if(row['setting_name'] in con_setting_names):
#             print(row['setting_name'])
            con_data_index = index
            while(True):
                con_data_index = (con_data_index+1) % data_len
                pos_data_id = get_data_id(row['id'])
                con_data_id = get_data_id(df[split_name].loc[con_data_index, 'id'])
                
                if(con_data_id != pos_data_id and row['setting_name']==df[split_name].loc[con_data_index, 'setting_name']):
                    break

            key_id = f'{row["id"]}_{df[split_name].loc[con_data_index, "id"]}_{df[split_name].loc[con_data_index, "setting_name"]}'
            data[split_name][key_id] = {}
#             print(con_data_index)
            for col in columns:
                if(col=='id'):
                    continue
                elif(col=='is_contrastive'):
                    data[split_name][key_id][col] = 1
                elif(col=='is_instruction_ambiguous'):
                    data[split_name][key_id][col] = 1
                elif(col=='verbal_instruction'):
                    data[split_name][key_id][col] = row[col]
                elif(col=='instruction_template'):
                    data[split_name][key_id][col] = row[col]
                elif(col=='setting_name'):
                    data[split_name][key_id][col] = f'wrong_{row["setting_name"]}'  
                else:
                    data[split_name][key_id][col] = df[split_name].loc[con_data_index, col]
        if(index%10000==0):
            print(f'{split_name}: {index}')
    

train: 250000
train: 260000
train: 270000
train: 280000
train: 290000
train: 300000
train: 310000
train: 320000
train: 330000
train: 340000
train: 350000
train: 360000
train: 370000
train: 380000
train: 390000
train: 400000
train: 410000
train: 420000
train: 430000
train: 440000
train: 450000
train: 460000
train: 470000
train: 480000
train: 490000
train: 500000
train: 510000
train: 520000
train: 530000
train: 540000
train: 550000
train: 560000
train: 570000
train: 580000
train: 590000
train: 600000
train: 610000
train: 620000
train: 630000
train: 640000
train: 650000
train: 660000
train: 670000
train: 680000
train: 690000


In [25]:
data_df = {}
for split_name in split_names:
    data_df[split_name] = pd.DataFrame.from_dict(data[split_name], orient='index')
    data_df[split_name].index.name = 'id'
    data_df[split_name] = data_df[split_name].reset_index()
    print(df[split_name].shape, data_df[split_name].shape)

(75303, 37) (121262, 37)
(75891, 37) (122157, 37)
(698128, 37) (1123886, 37)


In [26]:
tm_train = data_df['train'][data_df['train']['setting_name'].str.contains('only_gaze')]
tm_train.groupby('is_contrastive').count()

Unnamed: 0_level_0,id,ego_view_image,exo_view_image,top_view_image,ego_view_image_object_start_point_x,ego_view_image_object_start_point_y,ego_view_image_object_end_point_x,ego_view_image_object_end_point_y,exo_view_image_object_start_point_x,exo_view_image_object_start_point_y,...,is_perspective_necessary,view_name,setting_name,object_category,absolute_location_ego,absolute_location_exo,spatial_relation_ego,spatial_relation_exo,instruction_template,perspective
is_contrastive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,143251,143251,143251,143251,143251,143251,143251,143251,143251,143251,...,143251,143251,143251,143251,124596,124596,143251,143251,143251,85960
1,143251,143251,143251,143251,143251,143251,143251,143251,143251,143251,...,143251,143251,143251,143251,124583,124583,143251,143251,143251,0


In [27]:
for split_name in split_names:
    data_df[split_name].to_csv(f'{base_dir_path}/{split_name}_except_tiny_object_other_con.csv', index=False)

In [28]:
data_df['train'].setting_name.unique()

array(['both_gaze_gesture', 'only_gaze', 'wrong_only_gaze',
       'only_gesture', 'wrong_only_gesture', 'only_objects',
       'wrong_only_objects', 'wrong_gaze_gesture'], dtype=object)

In [16]:
base_dir_path = '/project/CollabRoboGroup/datasets/official_data'
df = {}
for split_name in split_names:
    df[split_name] =  pd.read_csv(f'{base_dir_path}/{split_name}.csv')
    print(f'{split_name} shape', df[split_name].shape)

test shape (75303, 37)
valid shape (75891, 37)
train shape (698128, 37)
