In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import json
import pickle
import random

In [3]:
from belief.evaluation import calibrate, build_bb, load_facts
from belief.lmbb import LMBB, FeedbackType, Proposition, get_raw_input, get_scores

In [10]:
lmbb, c, f = build_bb(
    model_name="allenai/unifiedqa-v2-t5-base-1251000",
    facts_file="./data/calibration_facts.json", 
    constraints_file="./data/constraints_v2.json",
    num_batches=10,
    constraint_solving=True,
    with_feedback=False,
    forward_weight=6, 
    backward_weight=0.6
)

Batch 1 : F1 = 0.6590909042536157, consistency = 1.0
Batch 2 : F1 = 0.6496815240277497, consistency = 1.0


KeyboardInterrupt: 

In [65]:
fact_batches = load_facts('./data/calibration_facts.json', num_batches=5)

In [66]:
def query_debug(proposition, lmbb, feedback=True, num_feedback_beliefs=3):
    
    feedback_beliefs = []
    if feedback:
        
        clahing_beliefs = sorted(lmbb.get_clashing_beliefs(proposition), key=lambda prop: -prop.weight)
        print("Clashing beliefs:")
        print(clashing_beliefs)
        print()
        
        if len(clashing_beliefs) < self.num_relevant_beliefs:
            print("Not enough clashing beliefs. Retrieving on-topic beliefs")
            on_topic_beliefs = self.get_beliefs_by_subject(proposition.subject)
            print("On topic beliefs:")
            print(on_topic_beliefs)
            print()
            
            if len(backup_beliefs) < self.num_relevant_beliefs - len(clashing_beliefs):
                backup_beliefs += list(self.beliefs.values())
            clashing_beliefs += random.sample(backup_beliefs, min(self.num_relevant_beliefs - len(clashing_beliefs), len(backup_beliefs)))

        feedback_beliefs = lmbb.feedback(proposition, FeedbackType.RELEVANT)
        
    print("Feedback:")
    print(feedback_beliefs)
    print()
    
    feedback_string = " ".join([belief.get_nl_sentence() for belief in feedback_beliefs])
    question = feedback_string + ' ' + proposition.get_nl_question()
    
    print(f"Question: {question}")
    print()
    
    options = ['yes', 'no']
    raw_input = get_raw_input(question, options)
    scores = get_scores(lmbb.model, lmbb.tokenizer, raw_input, options)
    answer = max(scores, key=lambda x: x[1])
    
    print(f"Scores: {scores}")
    print()
    
    answer = Proposition(
            subject=proposition.subject,
            predicate=proposition.predicate,
            boolean=True if answer[0] == 'yes' else False,
            weight=answer[1]
        )
    
    print(answer)

In [84]:
idx = random.randint(0, len(fact_batches[0]))
proposition = fact_batches[0][idx]
print(proposition)

(daffodil,CapableOf,grow, True, -99999.0)


In [96]:
query_debug(proposition, lmbb)

Feedback:
[(daffodil,IsA,company, False, 0.999), (daffodil,IsA,virus, False, 0.999), (daffodil,IsA,bacterium, False, 0.974)]

Question: daffodil is not a company. daffodil is not a virus. daffodil is not a bacterium. Is a daffodil capable of grow?

Scores: [('yes', array(0.9998461, dtype=float32)), ('no', array(0.01753854, dtype=float32))]

(daffodil,CapableOf,grow, True, 0.999)


In [None]:
lmbb.query()

In [11]:
_ = build_bb(
    model_name="allenai/unifiedqa-v2-t5-base-1251000",
    facts_file="./data/silver_facts.json", 
    constraints_file="./data/constraints_v2.json",
    num_batches=10,
    constraint_solving=True,
    with_feedback=True,
    forward_weight=6, 
    backward_weight=0.6
)

del _

Batch 1 : F1 = 0.6399197547963046, consistency = 1.0
Batch 2 : F1 = 0.6595744634460446, consistency = 1.0
Batch 3 : F1 = 0.6783369755720908, consistency = 1.0
Batch 4 : F1 = 0.7023945219955562, consistency = 1.0
Batch 5 : F1 = 0.7300236358041547, consistency = 1.0
Batch 6 : F1 = 0.746378285249434, consistency = 1.0
Batch 7 : F1 = 0.7722091501193691, consistency = 1.0
Batch 8 : F1 = 0.7957267674519273, consistency = 1.0
Batch 9 : F1 = 0.8166888543126881, consistency = 1.0
Batch 10 : F1 = 0.8391877008198719, consistency = 1.0


In [17]:
f1s = [
    0.6399197547963046, 
    0.6595744634460446, 
    0.6783369755720908, 
    0.7023945219955562,
    0.7300236358041547, 
    0.746378285249434, 
    0.7722091501193691,
    0.7957267674519273,
    0.8166888543126881,
    0.8391877008198719
]

consistencies = [1.0 for _ in range(10)]

d = dict(
    model_name="allenai/unifiedqa-v2-t5-base-1251000",
    facts_file="./data/silver_facts.json",
    constraints_file="./data/constraints_v2.json",
    num_batches=10,
    constraint_solving=True,
    with_feedback=True,
    feedback_type=FeedbackType.RELEVANT,
    forward_weight=6,
    backward_weight=0.6,
    f1s=f1s,
    consistencies=consistencies,
    final_f1=f1s[-1],
    final_consistency=consistencies[-1]
)

In [28]:
with open('./results/results.pkl', 'rb') as f:
    results = pickle.load(f)
results.append(d)
with open('./results/results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [29]:
with open('./results/results.pkl', 'rb') as f:
    results = pickle.load(f)

In [31]:
_ = build_bb(
    model_name="allenai/unifiedqa-v2-t5-base-1251000",
    facts_file="./data/silver_facts.json", 
    constraints_file="./data/constraints_v2.json",
    num_batches=10,
    constraint_solving=False,
    with_feedback=False,
    forward_weight=6, 
    backward_weight=0.6
)

del _

Batch 1 : F1 = 0.596273287573404, consistency = 0.8038461538461539
Batch 2 : F1 = 0.5994694916729912, consistency = 0.8007554296506137
Batch 3 : F1 = 0.6041542983201754, consistency = 0.8075664621676891
Batch 4 : F1 = 0.6090443262191578, consistency = 0.8078233161191077
Batch 5 : F1 = 0.6082142813601136, consistency = 0.8097634972074743
Batch 6 : F1 = 0.6105797924071288, consistency = 0.8101168437755446
Batch 7 : F1 = 0.607506767220549, consistency = 0.8058636524196396
Batch 8 : F1 = 0.6095259448680024, consistency = 0.8050346845870366
Batch 9 : F1 = 0.6086348147538226, consistency = 0.8052200679763152
Batch 10 : F1 = 0.6061863064121384, consistency = 0.805502425029032


In [None]:
_ = build_bb(
    model_name="allenai/unifiedqa-v2-t5-base-1251000",
    facts_file="./data/calibration_facts.json", 
    constraints_file="./data/constraints_v2.json",
    num_batches=10,
    constraint_solving=False,
    with_feedback=False,
    forward_weight=6, 
    backward_weight=0.6
)

del _

In [34]:
model_name = "allenai/unifiedqa-v2-t5-base-1251000"
model = T5ForConditionalGeneration.from_pretrained(model_name, load_in_8bit=True)

TypeError: __init__() got an unexpected keyword argument 'load_in_8bit'