In [None]:
import glob
import sys
import json
import os
import pandas as pd

In [None]:
submission_dir='submissions/'
gold_dir='ref/'
input_dir='input/'

answers_file='answers.json'
questions_file='questions.json'

subtasks=['s1','s2','s3']
event_types = {'injuring', 'killing', 'fire_burning', 'job_firing'}
event_props = {'participant', 'location', 'time'}

In [None]:
# Preload questions
questions={}
for subtask in subtasks:
    subtask_questions_file='%s/%s/%s' % (input_dir, subtask, questions_file)
    with open(subtask_questions_file, 'r') as f:
        questions[subtask]=json.load(f)

In [None]:
gold={}
for subtask in subtasks:
    subtask_gold_file='%s/%s/%s' % (gold_dir, subtask, answers_file)
    with open(subtask_gold_file, 'r') as f:
        gold[subtask]=json.load(f)

In [None]:
def compute_subset_accuracy(ques, anss, gold, event_type=None, event_prop=None):
    correct=0
    total=0
    for qid, adata in anss.items():
        if event_type and event_type!=ques[qid]['event_type']:
            continue
        if event_prop and event_prop not in ques[qid].keys():
            continue
        num_answer=adata['numerical_answer']
        if num_answer==gold[qid]['numerical_answer']:
            correct+=1
        total+=1
    print('correct=', correct, 'total=', total)
    if total>0:
        return round(correct/total, 4), total
    else:
        return 0, 0

In [None]:
data_json={}
for user_submission_dir in glob.glob('%s/*' % submission_dir):
    user=user_submission_dir.split('/')[-1]
    print('USER', user)
    print('='*20)
    data_json[user]={}
    for subtask_user_submission_dir in glob.glob('%s/*' % user_submission_dir):
        subtask=subtask_user_submission_dir.split('/')[-1]
        if subtask=='s1': continue
        this_answers_file='%s/%s' % (subtask_user_submission_dir, answers_file)
        if not os.path.exists(this_answers_file):
            continue
        print('SUBTASK', subtask)
        print('='*20)
        with open(this_answers_file, 'r') as f:
            answers = json.load(f)
            for event_type in event_types:
                print(event_type)
                acc, total=compute_subset_accuracy(questions[subtask], 
                                        answers, 
                                        gold[subtask], 
                                        event_type=event_type)
                k='%s_et_%s' % (subtask, event_type)
                data_json[user][k]='%f (%d)' % (round(acc,4), total)
                print('ACC=', acc)
                print()
                
            for event_property in event_props:
                print(event_property)
                acc, total=compute_subset_accuracy(questions[subtask], 
                        answers,
                        gold[subtask], 
                        event_prop=event_property)
                k='%s_ep_%s' % (subtask, event_property)
                data_json[user][k]='%f (%d)' % (round(acc,4), total)
                print('ACC=', acc)
                print()

                

In [None]:
df=pd.DataFrame.from_dict(data_json)

In [None]:
df.to_csv('acc.tsv', sep='\t')