In [304]:
"""
Copyright (c) Facebook, Inc. and its affiliates.
"""

from collections import Counter, defaultdict
import argparse
import ast
import fileinput
import json
import os

right_answer_count = Counter()
wrong_answer_count = Counter()

# compile sets of allowed answers
allowed_answers = defaultdict(set)
command = None




In [305]:
def read_gold_set(gold_set):
    with open(gold_set, "r") as f:
        for line in f:
            line = line.strip()
            if line == "":
                continue
            if line.startswith("{"):
                try:
                    allowed_answers[command].add(line)
                except:
                    print("Bad allowed answer:", line)
                    raise
            else:
                command = line




In [307]:
read_gold_set('data/qual_gold_answers.txt')

In [308]:
def compare_dicts(action_dict, allowed_dict):
    # action_dict = ast.literal_eval(action_dict)
    allowed_dict = ast.literal_eval(allowed_dict)
    if "repeat" in allowed_dict:
        if "repeat" not in action_dict:

            return False
        val = allowed_dict["repeat"]
        val2 = action_dict["repeat"]
        if val != val2:
            if val[0] != val2[0]:
                return False
            val_dict1 = val[1]
            val_dict2 = val2[1]
            for k, v in val_dict2.items():
                if k == "repeat_dir":
                    continue
                if k not in val_dict1 or v != val_dict1[k]:
                    return False

    for k, v in allowed_dict.items():
        if k == "repeat":
            continue
        if k not in action_dict or action_dict[k] != v:
            return False
    return True




In [309]:
def get_wrong_stats(dict1, dict2_list, sentence):
    """ {'repeat',
        'schematic',
        'dialogue_type',
        'action_type',
        'has_block_type',
        'reference_object',
        'tag_val',
        'filters',
        'location',
        'target_action_type'}"""
    st = {}
    for d in dict2_list: # ground truth
        dict2 = ast.literal_eval(d)
        for k, v in dict2.items():
            if k not in dict1:
                    st['missing_key_'+k] = st.get('missing_key_'+k, 0)+1 
                    return st
            if v != dict1[k]:
                if k =='action_type' and v[1] != dict1[k][1]:
                    if sentence == "dig two small holes behind the pool" and dict1[k][1] in ['build', 'dig']:
                        continue
                    st["action_type_diff"] = st.get("action_type_diff", "")+ "_"+ dict1[k][1]
                    st[k+"_value_wrong"] = st.get(k+"_value_wrong", 0)+1
                    return st
                st[k+"_span_wrong"] = st.get(k+"_span_wrong", 0)+1
                return st
    return st

In [310]:
def evaluate_workers(worker_file):
    worker_stats = {}
    wrong_stats = {}
    for k, v in allowed_answers.items():
        wrong_stats[k] = {}
    
    with open(worker_file) as f:
        # one worker at a time
        for line in f.readlines():
            right_count = 0
            wrong_count = 0
            worker_id, answers = line.strip().split("\t")
            answer_dicts = ast.literal_eval(answers) # all three answers with -- sentence: dict

            # if worker didn't answer all questions, ignore
            if len(answer_dicts.keys()) < 3:
                print("Skipping: %r completed only %r" % (worker_id, len(answer_dicts.keys())))
                continue

            # otherwise read all answers
            # k is sentence, v is dict
            for k, v in answer_dicts.items():
                # sentence has to be in allowed_answers, unnecessary check
                if k not in allowed_answers:
                    print("The sentence: %r is missing." %(sentence))
                
                # if answer doesn't match any allowed answer
                if not any(compare_dicts(v, x) for x in allowed_answers[k]):
                    wrong_count += 1
                    # Ananlyze the mistake
                    stats = get_wrong_stats(v, allowed_answers[k], k)
                    # stats = get_wrong_stats(v, d)
                    for a, b in stats.items():
                        if a not in wrong_stats[k]:
                            wrong_stats[k][a] = b
                        elif type(b)==int:
                            wrong_stats[k][a] += b
                        elif type(b)==str:
                            wrong_stats[k][a] += "_" + b
                else:
                    right_count += 1
            # print("-" * 30)
            worker_stats[worker_id] = int((right_count / (right_count + wrong_count)) * 100)

    return worker_stats, wrong_stats

In [311]:
# worker_stats, wrong_stats = evaluate_workers('data/qual_test_answers/2nd_500_qual_user_answers.txt')
worker_stats, wrong_stats = evaluate_workers('/Users/kavyasrinet/Downloads/test_q.txt')
print(worker_stats, wrong_stats)

{'A3M3ZFDVYMBJ1X': 66} {'fill all the holes with water': {'action_type_diff': '_destroy', 'action_type_value_wrong': 1}, 'dig two small holes behind the pool': {}, 'go to the red cube between the trees': {}}


In [302]:
import operator
from pprint import pprint
for k, v in wrong_stats.items():
    print(k)
    total = 0
    if "action_type_diff" in v:
        vals = v["action_type_diff"].split("_")
        vs = set(vals)
        vs.remove('')
        print("other_action_type_values", vs)
        v.pop("action_type_diff")
    sorted_d = dict(sorted(v.items(), key=operator.itemgetter(1),reverse=True))
    
    for a, b in sorted_d.items():
        print(a, b)
        if type(b)==int:
            total += int(b)
    print("Total mistakes: %r"%(total))
    print("*"*20)


fill all the holes with water
other_action_type_values {'move', 'action', 'noop', 'stop', 'otheraction', 'tag', 'undo', 'resume', 'destroy', 'build', 'freebuild', 'answer', 'dance', 'spawn', 'dig', 'composite', 'copy'}
action_type_value_wrong 127
missing_key_has_block_type 125
missing_key_repeat 59
reference_object_span_wrong 55
has_block_type_span_wrong 33
repeat_span_wrong 28
missing_key_action_type 3
missing_key_reference_object 1
Total mistakes: 431
********************
dig two small holes behind the pool
other_action_type_values {'fill', 'move', 'action', 'resume', 'stop', 'otheraction', 'tag', 'undo', 'destroy', 'freebuild', 'answer', 'dance', 'spawn', 'composite', 'copy'}
schematic_span_wrong 153
action_type_value_wrong 130
location_span_wrong 64
missing_key_location 44
missing_key_repeat 10
repeat_span_wrong 2
missing_key_action_type 1
Total mistakes: 404
********************
go to the red cube between the trees
other_action_type_values {'fill', 'undo', 'action', 'resume', 'sto

In [303]:
with open('data/qual_test_workers/second_500_workers.txt', 'w') as f:
    for k, v in worker_stats.items():
        f.write(k +"\t" + str(v) + "\n")

In [None]:
key : absent
    key: tp, fp, tn, fn