## Experiment 1: Using WDIK information as additional context while answering questions

In [76]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
from belief.utils import load_macaw, load_tokenizer
from belief.utils import macaw_input, run_macaw, get_macaw_scores, get_macaw_outs
from belief.evaluation import load_facts
from belief.lmbb import LMBB
import json
import random
import copy
from tqdm import tqdm

In [78]:
facts = load_facts('data/calibration_facts.json', num_batches=1)[0]

with open('cache/wdik.json', 'r') as f:
    wdik = json.load(f)
    
with open('data/constraints_v2.json', 'r') as f:
    constraint_data = json.load(f)

In [79]:
NUM_FACTS = 3

In [80]:
evaluator = LMBB(
    model=None, 
    tokenizer=None, 
    raw_constraints=constraint_data['links'],
)

In [81]:
model = load_macaw()
tokenizer = load_tokenizer()



In [82]:
answer_beliefs = {}

yes_no = ['yes', 'no']

for fact in tqdm(facts):
    entity = fact.subject
    context = ' '.join(random.sample(wdik[entity], NUM_FACTS))
    question = fact.get_question()
    inp_str = macaw_input(question=question, options=yes_no, context=context, targets='A')
    outs = get_macaw_outs(inp_str, model, tokenizer)
    ans_fact = copy.deepcopy(fact)
    if 'yes' in outs['answer']:
        ans_fact.boolean = True
    elif 'no' in outs['answer']:
        ans_fact.boolean = False
    else:
        print(f"Random assignment: {fact.sentence}")
        ans_fact.boolean = random.choice([True, False])
    answer_beliefs[ans_fact.sentence] = ans_fact

100%|█████████████████████████████████████████████████████████████████████████████| 1072/1072 [36:27<00:00,  2.04s/it]


In [85]:
evaluator.set_beliefs(answer_beliefs)
print("F1:", evaluator.calculate_f1(facts))
print("Consistency:", evaluator.calculate_consistency())

F1: 0.83307809604741
Consistency: 0.9675830939679935
