In [119]:
import csv
import argparse
import json
from collections import defaultdict, Counter
import re
from operator import itemgetter
MAX_WORDS=40

In [120]:
def process_repeat_dict(d):
    if d["loop"] == "ntimes":
        repeat_dict = {"repeat_key": "FOR"}
        processed_d = process_dict(with_prefix(d, "loop.ntimes."))
        if 'repeat_for' in processed_d:
            repeat_dict["repeat_count"] = processed_d["repeat_for"]
        if 'repeat_dir' in processed_d:
            repeat_dict['repeat_dir'] = processed_d['repeat_dir']
        return repeat_dict
    if d["loop"] == "repeat_all":
        repeat_dict = {"repeat_key": "ALL"}
        processed_d = process_dict(with_prefix(d, "loop.repeat_all."))
        if 'repeat_dir' in processed_d:
            repeat_dict['repeat_dir'] = processed_d['repeat_dir']
        return repeat_dict
    if d["loop"] == "forever":
        return {"stop_condition": {"condition_type": "NEVER"}}
    if d['loop'] == 'repeat_until':
        stripped_d = with_prefix(d, 'loop.repeat_until.')
        processed_d = process_dict(stripped_d)
        if 'adjacent_to_block_type' in processed_d:
            return {"stop_condition" : {
                        "condition_type" : 'ADJACENT_TO_BLOCK_TYPE',
                        'block_type': processed_d['adjacent_to_block_type']}
                   }
    raise NotImplementedError("Bad repeat dict option: {}".format(d["loop"]))



In [121]:
def process_get_memory_dict(d):
    filters_val = d['filters']
    out_dict = {'filters': {}}
    parent_dict = {}
    if filters_val.startswith('type.'):
        parts = remove_prefix(filters_val, 'type.').split('.')
        type_val = parts[0]
        if type_val in ['ACTION', 'AGENT']:
            out_dict['filters']['temporal'] = 'CURRENT'
            tag_val = parts[1]
            out_dict['answer_type'] = 'TAG'
            out_dict['tag_name'] = parts[1] # the name of tag is here
            if type_val == 'ACTION':
                x = with_prefix(d, 'filters.'+filters_val+'.')
                out_dict['filters'].update(x)
        elif type_val in ['REFERENCE_OBJECT']:
            d.pop('filters')
            ref_obj_dict = remove_key_prefixes(d, ['filters.type.'])
            ref_dict = process_dict(ref_obj_dict)
            if 'answer_type' in ref_dict['reference_object']:
                out_dict['answer_type'] = ref_dict['reference_object']['answer_type']
                ref_dict['reference_object'].pop('answer_type')
            if 'tag_name' in ref_dict['reference_object']:
                out_dict['tag_name'] = ref_dict['reference_object']['tag_name']
                ref_dict['reference_object'].pop('tag_name')    
            out_dict['filters'].update(ref_dict)
            
        out_dict['filters']['type'] = type_val
        
    return out_dict

In [122]:
def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]


In [123]:
def handle_get_memory(d):
    out_d = {'dialogue_type': 'GET_MEMORY'}
    child_d = process_get_memory_dict(with_prefix(d, "action_type.ANSWER."))
    out_d.update(child_d)
    return out_d


In [124]:
# convert s to snake case
def snake_case(s):
    return re.sub("([a-z])([A-Z])", "\\1_\\2", s).lower()


In [125]:
'''this function splits the key that starts with a given prefix and only for values that are not None
and makes the key be the thing after prefix
'''
def with_prefix(d, prefix):
    new_d = {}
    for k, v in d.items():
        if k.startswith(prefix) and v not in ("", None, "None"):
            index = k.find(prefix)+ len(prefix)
            new_key = k[index:]
            new_d[new_key] = v
    return new_d

In [126]:
''' this function removes certain prefixes from keys and renames the key to be: key with text following 
the prefix in the dict'''
def remove_key_prefixes(d, ps):
    
    for p in ps:
        d = d.copy()
        rm_keys = []
        add_items = []
        # print(p, d)
        for k, v in d.items():
            if k.startswith(p):
                rm_keys.append(k)
                add_items.append((k[len(p) :], v))
        for k in rm_keys:
            del d[k]
        for k, v in add_items:
            d[k] = v
    return d

In [127]:
def fix_spans_due_to_empty_words(action_dict, words):
    """Return modified (action_dict, words)"""

    def reduce_span_vals_gte(d, i):
        for k, v in d.items():
            if type(v) == dict:
                reduce_span_vals_gte(v, i)
                continue
            try:
                a, b = v
                if a >= i:
                    a -= 1
                if b >= i:
                    b -= 1
                d[k] = [[a, b]]
            except ValueError:
                pass
            except TypeError:
                pass

    # remove trailing empty strings
    while words[-1] == "":
        del words[-1]

    # fix span
    i = 0
    while i < len(words):
        if words[i] == "":
            reduce_span_vals_gte(action_dict, i)
            del words[i]
        else:
            i += 1

    return action_dict, words


In [128]:
def process_dict(d):
    r = {}

#     print("entererrr")
#     print(d)
#     print("----------------")
    d = remove_key_prefixes(d, ["name_check.",
                                "rel_yaw.",
                                "angle.check.",
                                "rel_pitch.",
                                "check.",
                                "yaw_check.",
                                "pitch_check.",
                                "location_check.",
                                "size_check.", 
                                'colour_check.', 
                                'block_type_check.', 
                                'height_check.', 
                                'length_check.',
                                'width_check.',
                                'tag_check.',
                                'thickness_check.',
                                'coordinates_check.yes.',
                                'coref_resolve_check.yes.',
                                'name_check.coref_resolve_check.yes.',
                                'name_check.coref_resolve_check.no.',
                                'coref_resolve_check.no.',
                                'depth_check.',
                               'reward.'])
#     print(d)
#     print("----------------new------------------")
    if "location" in d:
#         print(d)
        
        # fix location type
        r["location"] = {"location_type": d["location"]}
        
        # fix relative direction
        reference_location_keys = ["location.REFERENCE_OBJECT.relative_direction",
                                   "location.SPEAKER_LOOK_REL.relative_direction",
                                   "location.SPEAKER_POS_REL.relative_direction",
                                   "location.AGENT_POS_REL.relative_direction"
                                   ]
        if any(x in reference_location_keys for x in d.keys()):
            for k, v in d.items():
                if k in reference_location_keys:
                    r["location"]["relative_direction"] = d.get(k)
                    d[k] = None
        
        # fix steps
        if (k.startswith("location.REFERENCE_OBJECT.steps") for k, v in d.items()):
            new_d = {}
            for k, v in d.items():
                if k.startswith("location.REFERENCE_OBJECT.steps"):
                    parts = k.split(".")
                    new_l = [parts[0]]
                    new_l.extend(parts[2:])
                    new_key = ".".join(new_l)
                    new_d[new_key] = v
                else:
                    new_d[k ] = v

            d = new_d
        
        if r['location']['location_type'] in ['AGENT_POS_REL', 'SPEAKER_POS_REL', 'SPEAKER_LOOK_REL']:
            r['location']['location_type'] = ''.join(r['location']['location_type'][0:-4]) 
        
        if r['location']['location_type'] == 'CONTAINS_COREFERENCE':
            del r['location']['location_type']
            r['location']['contains_coreference'] = 'yes'
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r['location']['location_type'] == 'coordinates_check':
            r['location']['location_type'] = 'COORDINATES'
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r['location']['location_type'] == 'coref_resolve_check':
            del r['location']['location_type']
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r["location"]["location_type"] == "REFERENCE_OBJECT":
            #print(d)
            
            r["location"]["location_type"] = "REFERENCE_OBJECT"

            # update steps in old data
            if ("relative_direction" in r["location"]):
                x = process_dict(with_prefix(d, "location.REFERENCE_OBJECT.relative_direction.{}.".format(r["location"]["relative_direction"])))
                #print(x)
                r["location"].update(x)
                #print(r)
                dirn = r["location"]["relative_direction"]
                #print(dirn)
                for k, v in d.items():
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object.has_name.'.format(dirn)):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object.location.'.format(dirn)):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object.contains_coreference".format(dirn)):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object_1.has_name.'.format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object_1.contains_coreference".format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object_2.has_name.'.format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object_2.contains_coreference".format(r["location"]["relative_direction"])):
                        d[k] = None
            else:
                del r["location"]["location_type"]
            # no key for EXACT
        if ("relative_direction" in r["location"]) and (r["location"]["relative_direction"] in ("EXACT", "Other")):
            del r["location"]["relative_direction"]
    
    for k, v in d.items():
        
        if (
            k == "location"
            or k in ['COPY']
            or (k == "relative_direction" and v in ("EXACT", "NEAR", "Other"))
        ):
            continue
        # handle span
        if re.match("[^.]+.span#[0-9]+", k):
            prefix, rest = k.split(".", 1)
            idx = int(rest.split("#")[-1])
            # print('here')
            if prefix in r:
                r[prefix].append([idx, idx])
                r[prefix] = sorted(r[prefix], key=itemgetter(0))
                #a, b = r[prefix]
                #r[prefix] = [min(a, idx), max(b, idx)]  # expand span to include idx
            else:
                r[prefix] = [[idx, idx]]
                #r[prefix] = [idx, idx]
        elif k == 'reference_object' and v == 'contains_coreference.yes':
            r['reference_object'] = {'contains_coreference': 'yes'}

        # handle nested dict
        elif "." in k:
            prefix, rest = k.split(".", 1)
            prefix_snake = snake_case(prefix)
            r[prefix_snake] = r.get(prefix_snake, {})
            r[prefix_snake].update(process_dict(with_prefix(d, prefix + ".")))

        # handle const value
        else:
            r[k] = v
    
    return r


In [129]:
def handle_put_memory(d):
    return {}
    

def handle_components(d, child_name):
    output = {}
#     print(d, child_name)
    if child_name == 'tag_val':
        output['upsert'] = {}
        r = output['upsert']
        if 'memory_data' in d:
            r['memory_data'] = {}
            mem_type = d['memory_data'].split(".")
            r['memory_data'][mem_type[0]] = mem_type[1].upper()
            if r['memory_data']['memory_type'] == 'REWARD':
                filtered = with_prefix(d, "memory_data.memory_type.reward.")
            else:
                filtered = with_prefix(d, "memory_data.memory_type.")
            r["memory_data"].update(filtered)
        
    elif child_name == 'filters':
        output['filters'] = {'reference_object': {}}
        if any(k.startswith('reference_object') and v == 'contains_coreference.yes' for k, v in d.items()):
            output['filters']['reference_object']['contains_coreference'] = 'yes'

        child_d = process_dict(with_prefix(d, "{}.".format('reference_object')))
        output['filters']['reference_object'].update(child_d)
        
    # filtered_d = print(with_prefix(d, "{}.".format(child_name)))
    elif child_name == 'location':
        #print(d)
        child_d = process_dict(d)
        # fix location type in location
        if 'location' in child_d and 'location_type' in child_d['location']:
            value = child_d['location']['location_type']  
            child_d['location'].pop('location_type')
            if value in ['SPEAKER_LOOK', 'AGENT_POS', 'SPEAKER_POS', 'COORDINATES']:
                updated_value = value # same for coordinates and speaker_look
                if value == 'AGENT_POS':
                    updated_value = 'AGENT'
                elif value == 'SPEAKER_POS':
                    updated_value = 'SPEAKER'
                elif value == 'COORDINATES':
                    if 'coordinates' in child_d['location']:
                        updated_value = {'coordinates_span' : child_d['location']['coordinates']}
                    else:
                        updated_value = None

                # add to reference object instead
                if updated_value == None:
                    del child_d['location']
                else:
                    if 'reference_object' in child_d['location']:
                        child_d['location']['reference_object']['special_reference'] = updated_value
                    else:
                        child_d['location']['reference_object'] = {'special_reference' : updated_value}

                    if 'coordinates' in child_d['location']:
                        del child_d['location']['coordinates']
        output.update(child_d) 
#     elif child_name =='reference_object' and any(k.startswith('reference_object') and v == 'contains_coreference.yes' for k, v in d.items()):
#         output[child_name] = {'contains_coreference': 'yes'}
#         child_d = process_dict(with_prefix(d, "{}.".format(child_name)))
#         output[child_name].update(child_d)
    else:
        #print(with_prefix(d, "{}.".format(child_name)))
        child_d = process_dict(with_prefix(d, "{}.".format(child_name)))
        #print(child_d)
        # remove an extra "angle" from yaw and picth spans
        if child_d.get('relative_pitch', None):
            if 'pitch_span' in child_d['relative_pitch']:
                child_d['relative_pitch'].pop('angle')
        elif child_d.get('relative_yaw', None):
            if 'yaw_span' in child_d['relative_yaw']:
                child_d['relative_yaw'].pop('angle')
        output[child_name] = child_d
    
    return output


In [130]:

def process_result(full_d):
    worker_id = full_d["WorkerId"]
    action_name = full_d['Input.intent']
    child_name = full_d['Input.child']
    d = with_prefix(full_d, "Answer.root."+action_name+".") # replace with "Answer.root."
    #print(d, child_name)
    
    #print(d)
    receiver_flag = False
    original_child_name = child_name
    if child_name in ['receiver_location', 'source_location']:
        child_name = 'location'
        receiver_flag = True
    
    action_dict = handle_components(d, child_name)
   
    if receiver_flag:
        action_dict[original_child_name] = action_dict[child_name]
        action_dict.pop(child_name)
        
    # Fix empty words messing up spans
    words = [full_d["Input.word{}".format(x)] for x in range(MAX_WORDS)]
    action_dict, words = fix_spans_due_to_empty_words(action_dict, words)

    return worker_id, action_dict, words, original_child_name


In [141]:
# convert csv to txt

from pprint import pprint
import json

'''
command: Input.command

'''
from pprint import pprint
result_dict = {}
folder_name = '/Users/kavyasrinet/Desktop/other_actions/5/toolB/'
f_name = folder_name + 'out.csv'
# folder_name = '/Users/kavyasrinet/Downloads/'
# f_name = folder_name + 'out2.csv'
only_show_disagreements=True
sentence_mapping = {}
with open(f_name, "r") as f:
    r = csv.DictReader(f)
    for i, d in enumerate(r):
        # print(i, sentence)
        #if i==4:
        sentence = d['Input.command']
        ''' the sentence has a span in it'''
        worker_id, action_dict, words, child_name = process_result(d)
        print(sentence)
        pprint(action_dict)
        print("*"*20)

        if action_dict is None:
            continue
        command = " ".join(words)
        command = command + "$$$" + child_name

        #if command == "build a statue four feet to your left$$$location":
        sentence_mapping[command] = sentence
        result = json.dumps(action_dict)

        if command in result_dict:
            if len(result_dict[command]) == 3:
                continue
            result_dict[command].append(result)
        else:
            result_dict[command] = [result]


and add it to the <span style='background-color: #FFFF00'>tree</span>
{'receiver_location': {'reference_object': {'has_name': [[5, 5]]}}}
********************
chase <span style='background-color: #FFFF00'>cow</span>
{'location': {'reference_object': {'has_name': [[1, 1]]}}}
********************
collect the mushrooms <span style='background-color: #FFFF00'>here</span>
{'receiver_location': {'contains_coreference': 'yes'}}
********************
duck <span style='background-color: #FFFF00'>under</span> the <span style='background-color: #FFFF00'>overhang</span> of <span style='background-color: #FFFF00'>that</span> <span style='background-color: #FFFF00'>wall</span> to <span style='background-color: #FFFF00'>your</span> <span style='background-color: #FFFF00'>right</span>
{'location': {'reference_object': {'contains_coreference': 'yes',
                                   'has_name': [[3, 3], [6, 6]]},
              'relative_direction': 'DOWN'}}
********************
get redstone from your 

In [142]:
# all_cmds = []
# with open(folder_name + 'input.txt') as f:
#     for line in f.readlines():
#         cmd, _, _, child = line.strip().split("\t")
#         all_cmds.append(cmd+'$$$'+child)
        
# for l in all_cmds:
#     if l not in result_dict:
#         print(l)
# d1 = json.dumps({"reference_object": {"has_name": [[6, 6]], "has_block_type": [[5, 5]], "has_height" : [[19, 19]]}})
    
# result_dict['spawn a sufficient number of wooden planks to construct a wooden staircase that can allow me to climb approximately 20 feet at a 1.5-to-1 height to length ratio for the staircase$$$reference_object'] = [d1, d1, d1]
# # d2 = json.dumps({"reference_object": {"has_name": [[3, 3]], "has_colour": [[2, 2]]}})
# # result_dict['place the red block on the second landing$$$reference_object'] = [d2, d2, d2]
print(len(result_dict.keys()))

14


In [143]:
# write to txt
import json
f_name = folder_name + 'out.txt'

with open(f_name, 'w') as outfile:
    for k, v in result_dict.items():
        cmd, child  = k.split("$$$")
        #print(len(v))
        if len(v) != 3:
            items = v[0] + "\t" + v[0] + "\t" + v[0]
#             print(k, v, len(v))
#             break
        else:
            items=  "\t".join(v)
        outfile.write(cmd + "\t" + child + "\t" + items+"\n")
    

In [144]:
def remove_definite_articles(cmd, d):
    words = cmd.split()
    if type(d) == str:
        #print(d)
        d = ast.literal_eval(d)
    new_d = {}
    # print(d)
    for k, v in d.items():
        if type(v) == dict:
            new_d[k] = {}
            for k1, v1 in v.items():
                if type(v1) == list:
                    new_v = []
                    for span in v1:
                        # span[0] and span[1] are the same
                        if words[span[0]] in ['the', 'a', 'an']:
                            continue
                        new_v.append(span)
                    new_d[k][k1] = new_v  
                elif type(v1) == dict:
                    v_new = remove_definite_articles(cmd, v1)
                    new_d[k][k1] = v_new
                    
                else:
                    new_d[k][k1] = v1
        # for internal nodes
        else:
            if type(v) == list:
                new_v = []
                for span in v:
                    # span[0] and span[1] are the same
                    if words[span[0]] in ['the', 'a', 'an']:
                        continue
                    new_v.append(span)
                new_d[k] = new_v  
            elif type(v) == dict:
                v_new = remove_definite_articles(cmd, v)
                new_d[k] = v_new

            else:
                new_d[k] = v
                    
    return new_d

In [145]:
# construct counter from txt
result_counts = defaultdict(Counter)
import ast
f_name = folder_name + 'out.txt'
with open(f_name) as in_data:
    for line in in_data.readlines():
        line = line.strip()
        cmd, child, r1, r2, r3 = line.split("\t")
        #print(cmd, child)
        for r in [r1, r2, r3]:
            r_new = remove_definite_articles(cmd, r)
            result_counts[cmd+"$$$"+child][json.dumps(r_new)] += 1
print(len(result_counts.keys()))

14


In [146]:
# compute agreements and disagreements
no_agreement = 0
num_agreements = 2
agreement = 0
only_show_disagreements = False
disagreement = defaultdict(Counter)
all_agreements_dict = {}
for command, counts in sorted(result_counts.items()):
    if not any(v >= num_agreements for v in counts.values()):
        if only_show_disagreements:
            print(command)
        disagreement[command] = counts
        no_agreement += 1
        continue
    elif only_show_disagreements:
        continue

    #print(command)

    for result, count in counts.items():
        if count >= num_agreements:
            all_agreements_dict[command] = result
            agreement += 1
            #print(result)

    #print()
print(agreement)
print(no_agreement)

14
0


In [147]:
# write out agreements to a file
## format is : command child dict
print(folder_name)
ag = str(agreement)
f = folder_name + ag + '_agreements.txt'
with open(f, 'w') as outfile:
    for k, v in all_agreements_dict.items():
        cmd, child = k.split('$$$')
        outfile.write(cmd + "\t" + child + "\t" + v + "\n") 

/Users/kavyasrinet/Desktop/other_actions/5/toolB/


In [148]:
# write disagreements to a file
disag = str(no_agreement)
f = folder_name + disag + '_disagreements.txt'
with open(f, 'w') as outfile:
    for k, v in disagreement.items():
        cmd, child = k.split('$$$')
        outfile.write(cmd+"\t"+child+"\n")
        for item in v:
            outfile.write(item+"\n")
        outfile.write("\n")
        outfile.write("\n")
        

In [149]:
import ast
from operator import itemgetter

from pprint import pprint

import ast
from operator import itemgetter

def resolve_spans(words, dicts):
    result = {}
    for d, val in dicts.items():
        new_d = {}
        d = ast.literal_eval(d)
        for k1, v1 in d.items():
            inner = {}
            for k, v in v1.items():
                #print(v)
                if type(v) == list:
                    new_v = []
                    for item in v:
                        if item[0] == item[1]:
                            new_v.append(words[item[0]])
#                         else:
#                             for item in v:
#                                 new_v.append(words[item])
                    inner[k] = new_v
                elif k =='repeat':
                    
                    if 'stop_condition' in v:
                        new_v = {}
                        new_v['stop_condition'] = {}
                        x = {}
                        x['condition_type'] = v['stop_condition']['condition_type']

                        new_vals = []
                        if v['stop_condition']['block_type'][0] ==v['stop_condition']['block_type'][1]:
                            new_vals.append(words[v['stop_condition']['block_type'][0]])
                        else:
                            for item in v['stop_condition']['block_type']:
                                new_vals.append(words[item])
                        x['block_type'] = new_vals
                        new_v['stop_condition'] = x
                        inner['repeat'] = new_v 
                else:
                    inner[k] = v
            new_d[k1] = inner
        result[str(new_d)] = val
    return result

for command, counts in disagreement.items():
    words = command.split()
    parts = words[-1].split("$$$")
    print(sentence_mapping[command])
    words[-1] = parts[0]
    child_name = parts[1]
    command = " ".join(words)
    #print(words, counts)
    c = resolve_spans(words, counts)
    print(command, child_name)
    #print(counts)
    # print("*"*30)
    for k, v in c.items():
        pprint(ast.literal_eval(k))
        print("-"*10)
    #print(c)
    print("*"*30)

In [150]:
with open(folder_name+'/all_agreements.txt', 'w') as f, \
     open(folder_name + ag + '_agreements.txt') as f1, \
     open(folder_name + disag + '_disagreements.txt') as f2:
    for line in f1.readlines():
        line = line.strip()
        f.write(line+ "\n")
    for line in f2.readlines():
        line = line.strip()
        f.write(line+ "\n")