In [1]:
import csv
import argparse
import json
from collections import defaultdict, Counter
import re
from operator import itemgetter
MAX_WORDS=40

In [2]:
def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]


In [3]:
# convert s to snake case
def snake_case(s):
    return re.sub("([a-z])([A-Z])", "\\1_\\2", s).lower()


In [4]:
'''this function splits the key that starts with a given prefix and only for values that are not None
and makes the key be the thing after prefix
'''
def with_prefix(d, prefix):
    new_d = {}
    for k, v in d.items():
        if k.startswith(prefix) and v not in ("", None, "None"):
            index = k.find(prefix)+ len(prefix)
            new_key = k[index:]
            new_d[new_key] = v
    return new_d

In [5]:
''' this function removes certain prefixes from keys and renames the key to be: key with text following 
the prefix in the dict'''
def remove_key_prefixes(d, ps):
    
    for p in ps:
        d = d.copy()
        rm_keys = []
        add_items = []
        # print(p, d)
        for k, v in d.items():
            if k.startswith(p):
                rm_keys.append(k)
                add_items.append((k[len(p) :], v))
        for k in rm_keys:
            del d[k]
        for k, v in add_items:
            d[k] = v
    return d

In [6]:
def fix_spans_due_to_empty_words(action_dict, words):
    """Return modified (action_dict, words)"""

    def reduce_span_vals_gte(d, i):
        for k, v in d.items():
            if type(v) == dict:
                reduce_span_vals_gte(v, i)
                continue
            try:
                a, b = v
                if a >= i:
                    a -= 1
                if b >= i:
                    b -= 1
                d[k] = [[a, b]]
            except ValueError:
                pass
            except TypeError:
                pass

    # remove trailing empty strings
    while words[-1] == "":
        del words[-1]

    # fix span
    i = 0
    while i < len(words):
        if words[i] == "":
            reduce_span_vals_gte(action_dict, i)
            del words[i]
        else:
            i += 1

    return action_dict, words

In [7]:
''' check for the following use cases:
1. plain ref object woith coref
2. ref obj with name, color etc
3. ref obj with name , color + filters
4. ref obj with name, filters and location
5. ref obj with filters only
'''

' check for the following use cases:\n1. plain ref object woith coref\n2. ref obj with name, color etc\n3. ref obj with name , color + filters\n4. ref obj with name, filters and location\n5. ref obj with filters only\n'

In [9]:
def process_dict(d):
    r = {}

#     print("entererrr")
#     print(d)
#     print("----------------")
    d = remove_key_prefixes(d, ["RELATIVE_DIRECTION.",
                                "distance_to.",
                                "NUM_BLOCKS.",
                                "colour_check.",
                                "ordinal_other.",
                                "arg_check_type.ranked.",
                                "arg_check_type.fixed.",
                                "measure_check.argmin.",
                                "measure_check.argmax.",
                                "measure_check.greater_than.",
                                "measure_check.less_than."])
    #print(d)
#     print("----------------new------------------")
    # check for "distance_to" here
    
    if "location" in d:
        #d['location'] = d['distance_to']['location']
        # fix location type
        r["location"] = {"location_type": d["location"]}
        
        # fix relative direction
        reference_location_keys = ["location.REFERENCE_OBJECT.relative_direction",
                                   "location.SPEAKER_LOOK_REL.relative_direction",
                                   "location.SPEAKER_POS_REL.relative_direction",
                                   "location.AGENT_POS_REL.relative_direction"
                                   ]
        if any(x in reference_location_keys for x in d.keys()):
            for k, v in d.items():
                if k in reference_location_keys:
                    r["location"]["relative_direction"] = d.get(k)
                    d[k] = None
        
        # fix steps
        if (k.startswith("location.REFERENCE_OBJECT.steps") for k, v in d.items()):
            new_d = {}
            for k, v in d.items():
                if k.startswith("location.REFERENCE_OBJECT.steps"):
                    parts = k.split(".")
                    new_l = [parts[0]]
                    new_l.extend(parts[2:])
                    new_key = ".".join(new_l)
                    new_d[new_key] = v
                else:
                    new_d[k ] = v

            d = new_d
        
        if r['location']['location_type'] in ['AGENT_POS_REL', 'SPEAKER_POS_REL', 'SPEAKER_LOOK_REL']:
            r['location']['location_type'] = ''.join(r['location']['location_type'][0:-4]) 
        
        if r['location']['location_type'] == 'CONTAINS_COREFERENCE':
            del r['location']['location_type']
            r['location']['contains_coreference'] = 'yes'
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r['location']['location_type'] == 'coordinates_check':
            r['location']['location_type'] = 'COORDINATES'
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r['location']['location_type'] == 'coref_resolve_check':
            del r['location']['location_type']
            r["location"].update(process_dict(with_prefix(d, "location.")))
        elif r["location"]["location_type"] == "REFERENCE_OBJECT":       
            # here just get span of reference object
            r["location"]["location_type"] = "REFERENCE_OBJECT"
            # update steps in old data
            
            if ("relative_direction" in r["location"]):
                x = process_dict(with_prefix(d, "location.REFERENCE_OBJECT.relative_direction.{}.".format(r["location"]["relative_direction"])))
                #print(x)
                r["location"].update(x)
                #print(r)
                dirn = r["location"]["relative_direction"]
                #print(dirn)
                for k, v in d.items():
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object.has_name.'.format(dirn)):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object.location.'.format(dirn)):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object.contains_coreference".format(dirn)):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object_1.has_name.'.format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object_1.contains_coreference".format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith('location.REFERENCE_OBJECT.relative_direction.{}.reference_object_2.has_name.'.format(r["location"]["relative_direction"])):
                        d[k] = None
                    if k.startswith("location.REFERENCE_OBJECT.relative_direction.{}.reference_object_2.contains_coreference".format(r["location"]["relative_direction"])):
                        d[k] = None
            else:
                del r["location"]["location_type"]
            # no key for EXACT
        if ("relative_direction" in r["location"]) and (r["location"]["relative_direction"] in ("EXACT", "Other")):
            del r["location"]["relative_direction"]
    
    if ('quantity' in d) and (d['quantity'] in ['RELATIVE_DIRECTION','distance_to', 'NUM_BLOCKS']):
        d['quantity'] = {} # or d.pop('quantity')
    
    for k, v in d.items():
        
        # skip processing these 
        if (
            k == "location"
            or k in ['COPY']
            or k in ['block_filters0']
            or (k == "relative_direction" and v in ("EXACT", "NEAR", "Other"))
            or (k =='ordinal' and v == 'ordinal_other')
        ):
            continue
        
        # handle span
        if re.match("[^.]+.span#[0-9]+", k):
            prefix, rest = k.split(".", 1)
            idx = int(rest.split("#")[-1])
            # print('here')
            if prefix in r:
                r[prefix].append([idx, idx])
                r[prefix] = sorted(r[prefix], key=itemgetter(0))
            else:
                r[prefix] = [[idx, idx]]
                #r[prefix] = [idx, idx]
        # handle nested dict
        elif "." in k:
            prefix, rest = k.split(".", 1)
            prefix_snake = snake_case(prefix)
            r[prefix_snake] = r.get(prefix_snake, {})
            print(k, v)
            print(prefix_snake, r[prefix_snake])
            r[prefix_snake].update(process_dict(with_prefix(d, prefix + ".")))

        # handle const value
        else:
            r[k] = v
        
    #print(r)
    return r


In [10]:
process_dict({'arg_check_type': 'ranked', 'arg_check_type.ranked.measure_check': 'argmax', 'arg_check_type.ranked.measure_check.argmax.ordinal': 'ordinal_other', 'arg_check_type.ranked.measure_check.argmax.ordinal.ordinal_other.ordinal_span.span#3': 'on', 'quantity': 'BORN_TIME'})

ordinal.ordinal_other.ordinal_span.span#3 on
ordinal {}


{'arg_check_type': 'ranked',
 'quantity': 'BORN_TIME',
 'measure_check': 'argmax',
 'ordinal': {'ordinal_span': [[3, 3]]}}

In [11]:
   

def handle_components(d):
    output = {}
#     # only handle reference_object
#     if child_name == 'filters':
#         output['filters'] = {'reference_object': {}}
#         if any(k.startswith('reference_object') and v == 'contains_coreference.yes' for k, v in d.items()):
#             output['filters']['reference_object']['contains_coreference'] = 'yes'

#         child_d = process_dict(with_prefix(d, "{}.".format('reference_object')))
#         output['filters']['reference_object'].update(child_d)
#     # ref object with coreference
#     elif child_name =='reference_object' and any(k.startswith('reference_object') and v == 'contains_coreference.yes' for k, v in d.items()):
#         output[child_name] = {'contains_coreference': 'yes'}
#         child_d = process_dict(with_prefix(d, "{}.".format(child_name)))
#         output[child_name].update(child_d)
#     else:
    d.pop('arg_check_type', None)
    updated_dict = process_dict(d)
    output = {}
    if 'measure_check' in updated_dict:
        ranking_measure = updated_dict['measure_check']
        updated_dict.pop('measure_check')
        output[ranking_measure] = {
            'quantity' : updated_dict['quantity']
        }
        # handle argmax and argmin
        if 'ordinal' in updated_dict:
            output[ranking_measure]['ordinal'] = updated_dict['ordinal']
        # handle greater+_than and less_than
        if 'number' in updated_dict:
            output[ranking_measure]['number'] = updated_dict['number']
        return output

    return output

def process_result(full_d):
    worker_id = full_d["WorkerId"]
    action_child_name = full_d['Input.child']
    ref_child_name  = full_d ['Input.ref_child']
    d = with_prefix(full_d, "Answer.root.") # replace with "Answer.root."
    print(d)
    action_dict = handle_components(d)
   

    # Fix empty words messing up spans
    words = [full_d["Input.word{}".format(x)] for x in range(MAX_WORDS)]
    action_dict, words = fix_spans_due_to_empty_words(action_dict, words)

    return worker_id, action_dict, words


In [13]:
# convert csv to txt

from pprint import pprint
import json

'''
command: Input.command

'''
from pprint import pprint
result_dict = {}
folder_name = '/Users/kavyasrinet/Desktop/other_actions/0/toolD/'
f_name = folder_name + 'out.csv'
only_show_disagreements=True
sentence_mapping = {}
with open(f_name, "r") as f:
    r = csv.DictReader(f)
    for i, d in enumerate(r):
        #if i ==2 :
        sentence = d['Input.command']
        ''' the sentence has a span in it'''

        worker_id, action_dict, words = process_result(d)
        print(sentence)
        pprint(action_dict)
        print("*"*20)

        if action_dict is None:
            continue

        command = " ".join(words)
        #command = command + "$$$" + child_name

        #if command == "build a statue four feet to your left$$$location":
        sentence_mapping[command] = sentence
        result = json.dumps(action_dict)
        if command in result_dict:
            if len(result_dict[command]) == 3:
                continue
            result_dict[command].append(result)
        else:
            result_dict[command] = [result]


{'arg_check_type': 'ranked', 'arg_check_type.ranked.measure_check': 'argmin', 'arg_check_type.ranked.measure_check.argmin.ordinal': 'FIRST', 'quantity': 'distance_to', 'quantity.distance_to.location': 'SPEAKER_POS'}
quantity.distance_to.location SPEAKER_POS
quantity {}
can you find the <span style='background-color: #FFFF00'>closest</span> water source
{'argmin': {'ordinal': 'FIRST',
            'quantity': {'location': {'location_type': 'SPEAKER_POS'}}}}
********************
{'arg_check_type': 'ranked', 'arg_check_type.ranked.measure_check': 'argmin', 'arg_check_type.ranked.measure_check.argmin.ordinal': 'FIRST', 'quantity': 'distance_to', 'quantity.distance_to.location': 'SPEAKER_POS'}
quantity.distance_to.location SPEAKER_POS
quantity {}
find the friend that is <span style='background-color: #FFFF00'>closest to me</span>
{'argmin': {'ordinal': 'FIRST',
            'quantity': {'location': {'location_type': 'SPEAKER_POS'}}}}
********************


In [14]:
# write to txt
import json
f_name = folder_name + 'out.txt'
with open(f_name, 'w') as outfile:
    for k, v in result_dict.items():
        cmd = k
        # len(v) is number of annotations for a command
        child = "comparison"
        if len(v) != 3:
            items = v[0] + "\t" + v[0] + "\t" + v[0]
#             print(k, v, len(v))
#             break
        else:
            items=  "\t".join(v)
        outfile.write(cmd + "\t" + child + "\t" + items+"\n")

In [15]:
def remove_definite_articles(cmd, d):
    words = cmd.split()
    if type(d) == str:
        d = ast.literal_eval(d)
    new_d = {}
    # print(d)
    for k, v in d.items():
        # for level 1 
        if type(v) == list and v[0] in ['yes', 'no']:
            if type(v[1]) == list:
                new_v = []
                for span in v[1]:
                    # span[0] and span[1] are the same
                    if words[span[0]] in ['the', 'a', 'an']:
                        continue
                    new_v.append(span)
                new_d[k] = [v[0], new_v]  
            elif type(v[1]) == dict:
                v_new = remove_definite_articles(cmd, v[1])
                new_d[k] = [v[0], v_new]

            else:
                new_d[k] = v
        # for recursion on normal internal dict
        else:
            if type(v) == list:
                new_v = []
                for span in v:
                    # span[0] and span[1] are the same
                    if words[span[0]] in ['the', 'a', 'an']:
                        continue
                    new_v.append(span)
                new_d[k] = new_v  
            elif type(v) == dict:
                v_new = remove_definite_articles(cmd, v)
                new_d[k] = v_new

            else:
                new_d[k] = v

    return new_d

In [16]:
# construct counter from txt
result_counts = defaultdict(Counter)
import ast
f_name = folder_name + 'out.txt'
with open(f_name) as in_data:
    for line in in_data.readlines():
        line = line.strip()
        cmd, child, r1, r2, r3 = line.split("\t")
        #print(cmd, child)
        for r in [r1, r2, r3]:
            r_new = remove_definite_articles(cmd, r)
            result_counts[cmd+"$$$"+child][json.dumps(r_new)] += 1
print(len(result_counts.keys()))

2


In [17]:
# compute agreements and disagreements
no_agreement = 0
num_agreements = 2
agreement = 0
only_show_disagreements = False
disagreement = defaultdict(Counter)
all_agreements_dict = {}
for command, counts in sorted(result_counts.items()):
    if not any(v >= num_agreements for v in counts.values()):
        if only_show_disagreements:
            print(command)
        disagreement[command] = counts
        no_agreement += 1
        continue
    elif only_show_disagreements:
        continue

    #print(command)

    for result, count in counts.items():
        if count >= num_agreements:
            all_agreements_dict[command] = result
            agreement += 1
            #print(result)

    #print()
print(agreement)
print(no_agreement)

2
0


In [18]:
# write out agreements to a file
## format is : command child dict
print(folder_name)
f = folder_name + '14_agreements.txt'
with open(f, 'w') as outfile:
    for k, v in all_agreements_dict.items():
        cmd, child = k.split('$$$')
        outfile.write(cmd + "\t" + child + "\t" + v + "\n") 

/Users/kavyasrinet/Desktop/other_actions/0/toolD/


In [19]:
# write disagreements to a file
f = folder_name + '0_disagreements.txt'
with open(f, 'w') as outfile:
    for k, v in disagreement.items():
        cmd, child = k.split('$$$')
        outfile.write(cmd+"\t"+child+"\n")
        for item in v:
            outfile.write(item+"\n")
        outfile.write("\n")
        outfile.write("\n")
        

In [20]:
import ast
from operator import itemgetter

from pprint import pprint

import ast
from operator import itemgetter

def resolve_spans(words, dicts):
    result = {}
    for d, val in dicts.items():
        new_d = {}
        d = ast.literal_eval(d)
        for k1, v1 in d.items():
            inner = {}
            for k, v in v1.items():
                #print(v)
                if type(v) == list:
                    new_v = []
                    for item in v:
                        if item[0] == item[1]:
                            new_v.append(words[item[0]])
#                         else:
#                             for item in v:
#                                 new_v.append(words[item])
                    inner[k] = new_v
                elif k =='repeat':
                    
                    if 'stop_condition' in v:
                        new_v = {}
                        new_v['stop_condition'] = {}
                        x = {}
                        x['condition_type'] = v['stop_condition']['condition_type']

                        new_vals = []
                        if v['stop_condition']['block_type'][0] ==v['stop_condition']['block_type'][1]:
                            new_vals.append(words[v['stop_condition']['block_type'][0]])
                        else:
                            for item in v['stop_condition']['block_type']:
                                new_vals.append(words[item])
                        x['block_type'] = new_vals
                        new_v['stop_condition'] = x
                        inner['repeat'] = new_v 
                else:
                    inner[k] = v
            new_d[k1] = inner
        result[str(new_d)] = val
    return result

for command, counts in disagreement.items():
    words = command.split()
    parts = words[-1].split("$$$")
    print(sentence_mapping[command])
    words[-1] = parts[0]
    child_name = parts[1]
    command = " ".join(words)
    #print(words, counts)
    c = resolve_spans(words, counts)
    print(command, child_name)
    #print(counts)
    # print("*"*30)
    for k, v in c.items():
        pprint(ast.literal_eval(k))
        print("-"*10)
    #print(c)
    print("*"*30)

In [21]:
with open(folder_name+'/all_agreements.txt', 'w') as f, \
     open(folder_name + '14_agreements.txt') as f1, \
     open(folder_name + '0_disagreements.txt') as f2:
    for line in f1.readlines():
        line = line.strip()
        f.write(line+ "\n")
    for line in f2.readlines():
        line = line.strip()
        f.write(line+ "\n")