In [194]:
import csv
import argparse
import json
from collections import defaultdict, Counter
import re

from annotation_tool_1 import MAX_WORDS

In [195]:
def process_repeat_dict(d):
    if d["loop"] == "ntimes":
        repeat_dict = {"repeat_key": "FOR"}
        processed_d = process_dict(with_prefix(d, "loop.ntimes."))
        if 'repeat_for' in processed_d:
            repeat_dict["repeat_count"] = processed_d["repeat_for"]
        if 'repeat_dir' in processed_d:
            repeat_dict['repeat_dir'] = processed_d['repeat_dir']
        return repeat_dict
    if d["loop"] == "repeat_all":
        repeat_dict = {"repeat_key": "ALL"}
        processed_d = process_dict(with_prefix(d, "loop.repeat_all."))
        if 'repeat_dir' in processed_d:
            repeat_dict['repeat_dir'] = processed_d['repeat_dir']
        return repeat_dict
    if d["loop"] == "forever":
        return {"stop_condition": {"condition_type": "NEVER"}}
    if d['loop'] == 'repeat_until':
        stripped_d = with_prefix(d, 'loop.repeat_until.')
        processed_d = process_dict(stripped_d)
        if 'adjacent_to_block_type' in processed_d:
            return {"stop_condition" : {
                        "condition_type" : 'ADJACENT_TO_BLOCK_TYPE',
                        'block_type': processed_d['adjacent_to_block_type']}
                   }
        else:
            return {"stop_condition" : {
                        "condition_type" : 'ADJACENT_TO_BLOCK_TYPE',}                
            }

    raise NotImplementedError("Bad repeat dict option: {}".format(d["loop"]))



In [196]:
def process_get_memory_dict(d):
    filters_val = d['filters']
    out_dict = {'filters': {}}
    parent_dict = {}
    if filters_val.startswith('type.'):
        parts = remove_prefix(filters_val, 'type.').split('.')
        type_val = parts[0]
        if type_val in ['ACTION', 'AGENT']:
            out_dict['filters']['temporal'] = 'CURRENT'
            tag_val = parts[1]
            out_dict['answer_type'] = 'TAG'
            out_dict['tag_name'] = parts[1] # the name of tag is here
            if type_val == 'ACTION':
                x = with_prefix(d, 'filters.'+filters_val+'.')
                out_dict['filters'].update(x)
        elif type_val in ['REFERENCE_OBJECT']:
            d.pop('filters')
            ref_obj_dict = remove_key_prefixes(d, ['filters.type.'])
            ref_dict = process_dict(ref_obj_dict)
            if 'answer_type' in ref_dict['reference_object']:
                out_dict['answer_type'] = ref_dict['reference_object']['answer_type']
                ref_dict['reference_object'].pop('answer_type')
            if 'tag_name' in ref_dict['reference_object']:
                out_dict['tag_name'] = ref_dict['reference_object']['tag_name']
                ref_dict['reference_object'].pop('tag_name')    
            out_dict['filters'].update(ref_dict)
            
        out_dict['filters']['type'] = type_val
        
    return out_dict

In [197]:
def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]


In [198]:
def handle_get_memory(d):
    out_d = {'dialogue_type': 'GET_MEMORY'}
    child_d = process_get_memory_dict(with_prefix(d, "action_type.ANSWER."))
    out_d.update(child_d)
    return out_d


In [199]:
# convert s to snake case
def snake_case(s):
    return re.sub("([a-z])([A-Z])", "\\1_\\2", s).lower()


In [200]:
'''this function splits the key that starts with a given prefix and only for values that are not None
and makes the key be the thing after prefix
'''
def with_prefix(d, prefix):
    return {
        k.split(prefix)[1]: v
        for k, v in d.items()
        if k.startswith(prefix) and v not in ("", None, "None")
    }


In [201]:
''' this function removes certain prefixes from keys and renames the key to be: key with text following 
the prefix in the dict'''
def remove_key_prefixes(d, ps):
    
    for p in ps:
        d = d.copy()
        rm_keys = []
        add_items = []
        # print(p, d)
        for k, v in d.items():
            if k.startswith(p):
                rm_keys.append(k)
                add_items.append((k[len(p) :], v))
        for k in rm_keys:
            del d[k]
        for k, v in add_items:
            d[k] = v
    return d




In [202]:
def fix_spans_due_to_empty_words(action_dict, words):
    """Return modified (action_dict, words)"""

    def reduce_span_vals_gte(d, i):
        for k, v in d.items():
            if type(v) == dict:
                reduce_span_vals_gte(v, i)
                continue
            try:
                a, b = v
                if a >= i:
                    a -= 1
                if b >= i:
                    b -= 1
                d[k] = [[a, b]]
            except ValueError:
                pass
            except TypeError:
                pass

    # remove trailing empty strings
    while words[-1] == "":
        del words[-1]

    # fix span
    i = 0
    while i < len(words):
        if words[i] == "":
            reduce_span_vals_gte(action_dict, i)
            del words[i]
        else:
            i += 1

    return action_dict, words


In [203]:
def process_dict(d):
    r = {}

#     print(d)
#     print("----------------")
    d = remove_key_prefixes(d, ["COPY.yes.", "COPY.no.", 'FREEBUILD.BUILD.', 'answer_type.TAG.', 'FREEBUILD.FREEBUILD.', 'coref_resolve_check.yes.', 'coref_resolve_check.no.'])
#     print(d)
#     print("----------------new------------------")
    if "location" in d:
        r["location"] = {"location_type": d["location"]}
        if r['location']['location_type'] == 'coref_resolve_check':
            del r['location']['location_type']
        elif r["location"]["location_type"] == "REFERENCE_OBJECT":
            r["location"]["location_type"] = "REFERENCE_OBJECT"
            r["location"]["relative_direction"] = d.get(
                "location.REFERENCE_OBJECT.relative_direction"
            )
            # no key for EXACT
            if r["location"]["relative_direction"] in ("EXACT", "Other"):
                del r["location"]["relative_direction"]
            d["location.REFERENCE_OBJECT.relative_direction"] = None
        r["location"].update(process_dict(with_prefix(d, "location.")))
        
    for k, v in d.items():
        if (
            k == "location"
            or k in ['COPY', 'coref_resolve_check']
            or (k == "relative_direction" and v in ("EXACT", "NEAR", "Other"))
        ):
            continue
        # handle span
        if re.match("[^.]+.span#[0-9]+", k):
            prefix, rest = k.split(".", 1)
            idx = int(rest.split("#")[-1])
            if prefix in r:
                r[prefix].append([idx, idx])
#                 a, b = r[prefix]
#                 r[prefix] = [min(a, idx), max(b, idx)]  # expand span to include idx
            else:
                r[prefix] = [[idx, idx]]

        # handle nested dict
        elif "." in k:
            prefix, rest = k.split(".", 1)
            prefix_snake = snake_case(prefix)
            r[prefix_snake] = r.get(prefix_snake, {})
            r[prefix_snake].update(process_dict(with_prefix(d, prefix + ".")))

        # handle const value
        else:
            r[k] = v

    return r


In [204]:
def handle_put_memory(d):
    return {}
    

def handle_commands(d):
    output = {}
    action_name = d["action_type"]
    formatted_dict = with_prefix(d, "action_type.{}.".format(action_name))
    child_d = process_dict(with_prefix(d, "action_type.{}.".format(action_name)))
    # Fix Build/Freebuild mismatch
    if child_d.get("FREEBUILD") == "FREEBUILD":
        action_name = 'FREEBUILD'
    child_d.pop("FREEBUILD", None)

    if formatted_dict.get('COPY', 'no') == 'yes':
        action_name = 'COPY'
        formatted_dict.pop('COPY')
    
    # add action type info
    output['action_type'] = ['yes', action_name.lower()]
    # add dialogue type info
    if output['action_type'][1] == 'tag':
        output['dialogue_type'] = ['yes', 'PUT_MEMORY']
    else:
        output['dialogue_type'] = ['yes', 'HUMAN_GIVE_COMMAND']
    
    for k, v in child_d.items():
        if k =='target_action_type':
            output[k] = ['yes', v]
        elif type(v)==list:
            output[k]= ['no', v]
        else:
            output[k] = ['yes', v]
    return output

def process_result(full_d, index):
    worker_id = full_d["WorkerId"]
    d = with_prefix(full_d, "Answer.root.{}.".format(index))
    if not d:
        return worker_id, {}, full_d['Input.command_{}'.format(index)].split()
    try:
        action = d["action_type"]
    except KeyError:
        return worker_id, {}, full_d['Input.command_{}'.format(index)].split()

    action_dict = handle_commands(d)
        
    ##############
    # repeat dict
    ##############
    #NOTE: this can probably loop over or hold indices of which specific action ?
    if action_dict.get('dialogue_type', [None, None])[1] == 'HUMAN_GIVE_COMMAND':
        if d.get("loop") not in [None, "Other"]:
            repeat_dict = process_repeat_dict(d)
            # Some turkers annotate a repeat dict for a repeat_count of 1.
            # Don't include the repeat dict if that's the case
            if repeat_dict.get('repeat_dir', None) == 'Other':
                repeat_dict.pop('repeat_dir')
            if repeat_dict.get("repeat_count"):
                a, b = repeat_dict["repeat_count"][0]
                repeat_count_str = " ".join(
                    [full_d["Input.word{}{}".format(index, x)] for x in range(a, b + 1)]
                )
                if repeat_count_str not in ("a", "an", "one", "1"):
                    action_dict['repeat'] = ['yes', repeat_dict]
#                     action_val = list(action_dict.values())[0]  # check what this is
#                     if action_specific_dict.get("schematic"):
#                         action_specific_dict["schematic"]["repeat"] = repeat_dict
#                     elif action_specific_dict.get("reference_object"):
#                         action_specific_dict["reference_object"]["repeat"] = repeat_dict
#                     else:
#                         action_specific_dict["repeat"] = repeat_dict
            else:
                action_dict['repeat'] = ['yes', repeat_dict]
 
            
    ##################
    # post-processing
    ##################

   

    # Fix empty words messing up spans
    words = [full_d["Input.word{}{}".format(index, x)] for x in range(MAX_WORDS)]
    action_dict, words = fix_spans_due_to_empty_words(action_dict, words)

    return worker_id, action_dict, words


In [205]:
def fix_cnt_in_schematic(words, action_dict):
    if 'repeat' not in action_dict:
        return action_dict
    repeat = action_dict['repeat']
    val = []
    if 'repeat_count' in repeat[1]:
        val = repeat[1]['repeat_count']
    elif 'repeat_key' in repeat[1] and repeat[1]['repeat_key'] == 'ALL':
        if any(x in ['all', 'every', 'each'] for x in words):
            all_val = words.index('all')
            val = [[all_val, all_val]]
    else:
        return action_dict
    
    for k, v in action_dict.items():
        if k in ['schematic', 'reference_object']:
            for i, meh in enumerate(v[1]):
                # print(words, val)
                if meh in val:
                    v[1].pop(i)
            action_dict[k] = [v[0], v[1]]
    return action_dict

In [207]:
from pprint import pprint
unique_keys = []
with open('/Users/kavyasrinet/Downloads/test_q.csv', "r") as f, open('/Users/kavyasrinet/Downloads/test_q.txt', 'w') as f2:
    r = csv.DictReader(f)
    all_data = {}
    for d in r:
        worker_id = d["WorkerId"]
        all_data[worker_id] = {}
        for i in range(1, 4):
            sentence = d['Input.command_{}'.format(i)]
            _, action_dict, words = process_result(d, i)
            a_dict = fix_cnt_in_schematic(words, action_dict)
            unique_keys.extend(list(a_dict.keys()))
            all_data[worker_id][sentence] = a_dict

    for k, v in all_data.items():
        f2.write(k+"\t"+str(v)+"\n")
    print(len(all_data.keys()))

1


In [166]:
a = set(unique_keys)
print(a)

{'repeat', 'schematic', 'dialogue_type', 'action_type', 'reference_object', 'location'}


In [None]:
# 500 qual test: '/Users/kavyasrinet/Downloads/500_qual_test.csv'
# first round: '//Users/kavyasrinet/Downloads/14_qual_test.csv'
# test from sandbox: 'data/test.csv'