In [31]:
import re
import string
import random 
import copy
import uuid
import pandas as pd
from meta_kg.utils.py_io import *


def normalize_text(text):
    """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the|fail|naf)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()
    
    return white_space_fix(remove_articles(remove_punc(lower(text))))

def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)
    if len(common_tokens) == 0:
        return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return 2 * (prec * rec) / (prec + rec)

In [19]:
hop = 5

split = "train"
owa_proof_d2 = read_jsonl(f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")
for data in owa_proof_d2:
   random.shuffle(data["all_facts"])
write_jsonl(owa_proof_d2, f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")

split = "dev"
owa_proof_d2 = read_jsonl(f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")
for data in owa_proof_d2:
   random.shuffle(data["all_facts"])
write_jsonl(owa_proof_d2, f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")

split = "test"
owa_proof_d2 = read_jsonl(f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")
for data in owa_proof_d2:
   random.shuffle(data["all_facts"])
write_jsonl(owa_proof_d2, f"./data/owa_proof_{hop}_hop_d2/{split}.jsonl")


In [20]:
! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d2 owa_proof_2_hop_d2
! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d2 owa_proof_3_hop_d2
! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d2 owa_proof_5_hop_d2

[34m[1mwandb[0m: Uploading directory ./data/owa_proof_2_hop_d2 to: "epfl_nlp_phd/data-collection/owa_proof_2_hop_d2:latest" (dataset)
[34m[1mwandb[0m: Adding directory to artifact (./data/owa_proof_2_hop_d2)... Done. 0.0s
[34m[1mwandb[0m: Currently logged in as: [33mchenze_epfl[0m ([33mepfl_nlp_phd[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.13.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.12.21
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/Users/zemingchen/Desktop/meta-knowledge/wandb/run-20230307_150821-1qtlr9dx[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mnoble-spaceship-114[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/epfl_nlp_phd/data-collection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/epfl_nlp_ph

In [32]:
folio_train = read_jsonl('data/folio/train.jsonl')
folio_dev = read_jsonl('data/folio/dev.jsonl')
folio_test = read_jsonl('data/folio/test.jsonl')

# for data in folio_train:
#     facts = data['facts']
#     clean_facts = [k.replace('-', ' ').replace('.', '').strip() for k in facts]
#     data['facts'] = [x for x in clean_facts if x != '']
#     data['question'] = data['question'].replace('-', ' ').replace('.', '').strip()

# for data in folio_dev:
#     facts = data['facts']
#     clean_facts = [k.replace('-', ' ').replace('.', '').strip() for k in facts]
#     data['facts'] = [x for x in clean_facts if x != '']
#     data['question'] = data['question'].replace('-', ' ').replace('.', '').strip()

# for data in folio_test:
#     facts = data['facts']
#     clean_facts = [k.replace('-', ' ').replace('.', '').strip() for k in facts]
#     data['facts'] = [x for x in clean_facts if x != '']
#     data['question'] = data['question'].replace('-', ' ').replace('.', '').strip()

train_aug = []
for data in folio_train:
    for i in range(5):
        train_aug.append(copy.deepcopy(data))

assert len(train_aug) == len(folio_train) * 5

for data in train_aug:
    random.shuffle(data['facts'])

random.shuffle(train_aug)

In [35]:
write_jsonl(train_aug, 'data/folio/train.jsonl')

In [59]:
from collections import Counter

labels = [data['answer'] for data in folio_test]
label_counts = Counter(labels)
print(label_counts)

Counter({'true': 72, 'uncertain': 69, 'false': 63})


In [66]:
proof_2_hop_d4 = read_jsonl('./data/owa_proof_5_hop_d4/test.jsonl')
num_facts = sum([len(p['all_facts']) for p in proof_2_hop_d4]) / len(proof_2_hop_d4)
num_kg = sum([len(p['facts']) for p in proof_2_hop_d4]) / len(proof_2_hop_d4)
num_distractors = sum([len(p['all_facts']) - len(p['facts'])
                      for p in proof_2_hop_d4]) / len(proof_2_hop_d4)
percent = sum([1 - len(p['facts']) / len(p['all_facts'])
              for p in proof_2_hop_d4]) / len(proof_2_hop_d4)
print(f'Average number of facts: {num_facts}')
print(f'Average number of KG facts: {num_kg}')
print(f'Average number of distractors: {num_distractors}')
print(f'Percent of distractors: {percent}')

Average number of facts: 8.061642512077295
Average number of KG facts: 4.061642512077294
Average number of distractors: 4.0
Percent of distractors: 0.5231053015690641


In [122]:
none_d4 = []
for data in proof_2_hop_d4:
    if data["answer"] == "unknown":
        none_d4.append(data)

none_d6 = []
for data in proof_2_hop_d6:
    if data["answer"] == "unknown":
        none_d6.append(data)

In [124]:
set(none_d6[0]["all_facts"]) - set(none_d4[0]["all_facts"])

{'If something needs the bear and it needs the dog then the bear likes the dog.',
 'The cat visits the dog.',
 'The dog is nice.',
 'The dog is round.'}

In [125]:
set(none_d6[1]["all_facts"]) - set(none_d4[1]["all_facts"])

{'If something likes the bear then the bear needs the cat.',
 'The cat is nice.',
 'The dog likes the bear.',
 'The dog visits the cat.'}

In [126]:
none_d6[1]["facts"]

['If something likes the bear then it is rough.',
 'If something is rough then it likes the dog.']

In [37]:
test_out = read_json("./2_hop_d5.json")
answers = [data['answer'] for data in test_out]
gen_outs = [data['gen_out'].split("?")[1] for data in test_out]

acc = 0
acc_rel = 0
acc_kg = 0
f1_score = 0
errors = []
gen_rels = []
relations = []
for pred, truth in zip(gen_outs, answers):
    acc += int(normalize_text(pred) == normalize_text(truth))
    f1_score += compute_f1(pred, truth)
    if normalize_text(pred) != normalize_text(truth):
        errors.append((pred, truth))
    relation = truth.split("because")[0]
    relations.append(relation)
    facts = truth.split("because")[1]
    gen_rel = pred.split("because")[0].strip()
    gen_rels.append(gen_rel)
    gen_facts = pred.split("because")[1] if len(pred.split("because")) > 1 else ""
    acc_rel += int(normalize_text(gen_rel) == normalize_text(relation))
    acc_kg += int(normalize_text(gen_facts) == normalize_text(facts))

print("Accuracy: ", acc/len(gen_outs))
print("Accuracy (Relation): ", acc_rel/len(gen_outs))
print("Accuracy (KG): ", acc_kg/len(gen_outs))
print("F1 Score: ", f1_score/len(gen_outs))

Accuracy:  0.0
Accuracy (Relation):  0.8246398410332837
Accuracy (KG):  0.0
F1 Score:  0.5880346121943318


In [56]:
test_out = read_json("./5_hop_d5.json")
answers = [data['answer'] for data in test_out]
gen_outs = [data['gen_out'].split("?")[1] for data in test_out]

def clean_gen_label(gen):
    if "because" in gen:
        gen = gen.split("because")[0].strip()
    else:
        gen = gen.split()[0].strip()
    
    if "," in gen:
        gen = gen.split(",")[0].strip()
    return gen

labels = [t.split('because')[0].strip() for t in answers]
gen_labels = [clean_gen_label(pred) for pred in gen_outs]
em_label = [int(normalize_text(label) == normalize_text(gen))
            for label, gen in zip(labels, gen_labels)]

sorted = {}
for label, gen in zip(labels, gen_labels):
    if label not in sorted:
        sorted[label] = [gen]
    sorted[label].append(gen)
for label, gens in sorted.items():
    em = [int(normalize_text(label) == normalize_text(gen)) for gen in gens]
    print(f"Label Accuracy {label}: ", sum(em)/len(em))

print("Label Accuracy: ", sum(em_label)/len(em_label))

Label Accuracy yes:  0.7022016222479722
Label Accuracy no:  0.7653534183082271
Label Accuracy none:  0.6813441483198146
Label Accuracy:  0.7167149758454107


In [6]:
test_out[1]

{'guid': 'e9a0a917-a83e-4d38-9a3d-7a15309e9768',
 'prefix': 'owa_proof_2_hop_d5a',
 'question': 'Based on fact_0 fact_1 fact_2 fact_3 fact_4 fact_5, Can we conclude Dave is red?',
 'gen_out': 'Based on fact_0 fact_1 fact_2 fact_3 fact_4 fact_5, Can we conclude Dave is red?yes because Dave is quiet,All quiet things are kind,All kind things are red,If Dave is furry then Dave is quiet,Dave is quiet ',
 'answer': 'yes because All kind things are red ,Dave is quiet ,Dave is furry ,All quiet things are kind ,If Dave is furry then Dave is quiet '}

In [57]:
kg_gen = test_out[1]["gen_out"].split("?")[1].split("because")[1].split(",")
kg_truth = test_out[1]["answer"].split("because")[1].split(",")

kg_gen = [k.strip() for k in kg_gen]
kg_truth = [k.strip() for k in kg_truth]

print(kg_gen)
print(kg_truth)

print(set(kg_gen) - set(kg_truth))
print(set(kg_truth) - set(kg_gen))

sum([int(normalize_text(pred) in normalize_text(test_out[1]["answer"].split("because")[1]))
    for pred in kg_gen])

['Harry is nice', 'Nice', 'round people are furry']
['Harry is nice', 'If someone is nice then they are round', 'Nice', 'furry people are green', 'Nice', 'round people are furry']
set()
{'If someone is nice then they are round', 'furry people are green'}


3

In [69]:
def compute_exact_match(prediction, truth):
    return int(normalize_text(truth) == normalize_text(prediction))

def recall_score(prediction, truth):
    return sum([int(normalize_text(pred) in normalize_text(truth)) for pred in prediction])

def recall_acc(prediction, truth, num_facts):
    return sum([int(normalize_text(pred) in normalize_text(truth)) for pred in prediction]) / num_facts

def post_process(generated, targets):
    if "because" in generated:
        gen_kg = generated.split("because")[1].split(",")
    else:
        gen_kg = generated.split(",")
    knowledge = targets.split("because")[1]
    num_kg = len(knowledge.split(","))
    return gen_kg, knowledge, num_kg

targets = [data['answer'] for data in test_out]
preds = [data['gen_out'].split("?")[1] for data in test_out]

if "because" in targets[0]:
    labels = [t.split('because')[0].strip() for t in targets]
    gen_labels = [p.split('because')[0].strip() for p in preds]
    em_label = [compute_exact_match(
        gen, label) for label, gen in zip(labels, gen_labels)]

    eval_paris = [post_process(gen, target) for gen, target in zip(preds, targets)]
    em_kg = [recall_score(gen, kg) for gen, kg, _ in eval_paris]
    acc_kg = [recall_acc(gen, kg, num) for gen, kg, num in eval_paris]
    num_gen_kgs = sum([len(gen) for gen, _, _ in eval_paris])
 
    print("Exact Match (Label): ", sum(em_label)/len(em_label))
    print("Exact Match (KG): ", sum(em_kg)/num_gen_kgs)
    print("Average Accuracy (KG): ", sum(acc_kg)/len(acc_kg))

Exact Match (Label):  0.7157487922705315
Exact Match (KG):  0.7101023456246022
Average Accuracy (KG):  0.582657106178845


In [None]:
print(errors[0][0], errors[0][1])
compute_f1(errors[0][0], errors[0][1])

In [None]:
proof_3_hop= read_jsonl("./data/proof_3_hop/test.jsonl")
proof_3_hop[0]

In [None]:
folio_train = read_jsonl("./data/folio/train.jsonl")

folio_train[0]

In [None]:
lengths = [len(data['facts']) for data in folio_train]
set(lengths)

In [None]:
folio_data = []
for data in folio_train:
    folio_data.append({
        "guid": str(uuid.uuid4()),
        "question": data['conclusion'],
        "answer": data['label'].lower(),
        "facts": data['premises'],
    })

In [None]:
write_jsonl(folio_data, "./data/folio/dev.jsonl")

In [None]:
from sklearn.model_selection import train_test_split

strategy = read_json("./data/strategyqa/strategyqa_train.json")

def parse_strategy(data):
    question = data["question"]
    answer = "yes" if data["answer"] else "no"
    facts = [normalize_text(fact) for fact in data["facts"]]
    decomposition = data["decomposition"]
    example = {
        "guid": str(uuid.uuid4()),
        "question": question,
        "answer": answer,
        "facts": facts,
        "decomposition": decomposition
    }
    return example

strategy_data = [parse_strategy(data) for data in strategy]
true_data = [data for data in strategy_data if data["answer"] == "yes"]
false_data = [data for data in strategy_data if data["answer"] == "no"]

train_true_data, dev_true_data = train_test_split(true_data, test_size=0.2, random_state=3042)
train_false_data, dev_false_data = train_test_split(false_data, test_size=0.2, random_state=3042)

train_data = train_true_data + train_false_data
dev_data = dev_true_data + dev_false_data

write_jsonl(train_data, "./data/strategyqa/train.jsonl")
write_jsonl(dev_data, "./data/strategyqa/dev.jsonl")

In [None]:
taxonomy = read_jsonl("./data/taxonomy/hypernyms_training_mix_short_train.jsonl")

taxonomy_data = []
for data in taxonomy:
    data["guid"] = data['id']
    data["question"] = normalize_text(data["phrase"])
    data["answer"] = ["no", "yes"][data["answer"]]
    data["facts"] = [normalize_text(fact) for fact in data["metadata"]["rules"]]
    example = {
        "guid": data["guid"],
        "question": data["question"],
        "answer": data["answer"],
        "facts": data["facts"],
    }
    taxonomy_data.append(example)
taxonomy_data[2]

In [None]:
write_jsonl(taxonomy_data, "./data/taxonomy/train.jsonl")

In [None]:
counting = read_jsonl("./data/counting/counting_training_mix_train.jsonl")
counting_data = []
for data in counting:
    data["guid"] = data['id']
    data["question"] = data["phrase"]
    data["answer"] = ["no", "yes"][data["answer"]]
    data["facts"] = data["metadata"]["rules"]
    example = {
        "guid": data["guid"],
        "question": data["question"],
        "answer": data["answer"],
        "facts": data["facts"],
    }
    counting_data.append(example)
counting_data[0]

In [None]:
write_jsonl(counting_data, "./data/counting/train.jsonl")

In [None]:
clutrr2 = read_jsonl("./data/clutrr_2_hop/train.jsonl")
clutrr4 = read_jsonl("./data/clutrr_4_hop/train.jsonl")
clutrr6 = read_jsonl("./data/clutrr_6_hop/train.jsonl")
print(len(clutrr2))
print(len(clutrr4))
print(len(clutrr6))

In [None]:
clutrr_all = random.sample(clutrr2, 50000) + random.sample(clutrr4, 50000) + random.sample(clutrr6, 50000)
len(clutrr_all)


In [None]:
write_jsonl(clutrr_all, "./data/clutrr_mix/train.jsonl")

In [None]:
musique = read_jsonl("./data/musique/musique_full_v1.0_dev.jsonl")

In [None]:
lsat = read_json("./data/arlsat/train.json")

In [None]:
def parse_entailment_tree(instance, add_distractors=False):
    hop = instance['depth_of_proof']
    hypothesis = instance['hypothesis']
    triples = instance["meta"]["triples"]
    distractor_ids = instance["meta"]["distractors"]
    fact_id = list(set(triples.keys()) - set(distractor_ids))
    distractors = [triples[idx] for idx in distractor_ids]
    facts = [triples[idx] for idx in fact_id]

    num_distractors = len(facts) // 2
    to_add = random.choices(distractors, k=num_distractors)
    if add_distractors:
        facts.extend(to_add)
    random.shuffle(facts)

    for i, fact in enumerate(facts):
        if random.randint(0, 1):
            facts[i] = random.choice(distractors)
    
    valid_example = {
        "guid": str(uuid.uuid4()),
        "hypothesis": hypothesis,
        "facts": facts,
        "answer": "yes",
    }

    invalid_example = {
        "guid": str(uuid.uuid4()),
        "hypothesis": hypothesis,
        "facts": facts,
        "answer": "no",
    }
    return valid_example, invalid_example

entail_tree = read_jsonl("./data/entailment_tree/task_2/test.jsonl")

entail_data = []
for instance in entail_tree:
    valid, invalid = parse_entailment_tree(instance, add_distractors=False)
    entail_data.append(valid)
    entail_data.append(invalid)
len(entail_data)

In [None]:
write_jsonl(entail_data, "./data/entailment_tree/test.jsonl")

In [None]:
entail_data = read_jsonl("./data/entailment_tree/train.jsonl")
depths = [len(instance['facts']) for instance in entail_data]
max(depths)

In [None]:
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

predictor = Predictor.from_path(
    "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")


In [73]:
def parse_proofwrite_cwa(instance):
    triples = {}
    for k,v in instance["triples"].items():
        triples[k] = v["text"]
    rules = {}
    for k,v in instance["rules"].items():
        rules[k] = v["text"]
    questions = []
    for q in instance['questions'].values():
        question = q['question']
        answer = q['answer']
        proofs = q['proofs']
        if '@' not in proofs:
            proofs = set(normalize_text(proofs).split())
        else:
            proofs = proofs.split('=')[1]
            proofs = set(normalize_text(proofs).split())
        if len(proofs) > 1:
            questions.append((question, str(answer).lower(), proofs))
    return triples, rules, questions

def _add_prefix(text):
    pool = ["It's wrong to say", "It's false to say", "It's incorrect to say", "It's not true that", "It's not correct that", "It's not the case that"]
    return random.choice(pool) + " " + text.lower()

def _add_suffix(text):
    pool = ["is not true", "is not correct", "is not the case", "is wrong", "is false", "is incorrect"]
    return text + " " + random.choice(pool)

def adversarial(text, label):
    if label == "true":
        try:
            verb = predictor.predict(sentence=text)['verbs'][0]['verb']
        except:
            verb = "12345"
        if verb == "is" or verb == "are":
            convert = text.replace(verb, verb + " not")
        else:
            convert = text.replace(verb, "does not " + verb)
        
        if random.uniform(0, 1) > 0.5:
            return _add_prefix(convert)
        else:
            return _add_suffix(convert)
    else:
        convert = text.replace("not ", "")
        if random.uniform(0, 1) > 0.5:
            return _add_prefix(convert)
        else:
            return _add_suffix(convert)

def build_example(triples, rules, question):
    example = {}
    triples.update(rules)
    example['guid'] = str(uuid.uuid4())
    example['answer'] = question[1].lower()
    example['question'] = question[0]
    example['proofs'] = list(question[2])
    example['facts'] = [triples[k] for k in question[2]]
    example['facts'] = list(set(example['facts']))
    return example

In [29]:
hop = 5
split = "train"
owa = read_jsonl(f"./data/proofwriter/OWA/depth-{hop}/meta-{split}.jsonl")

In [None]:
triples, rules, questions = parse_proofwrite_cwa(owa[0])

data = owa[0]["questions"]
for question in data.values():
    proofs = normalize_text(question['proofs']).split('or')
    proofs = [normalize_text(p).split() for p in proofs]
    proofs = set(max(proofs, key=len))
    triples.update(rules)
    facts = [triples[k] for k in proofs]
    print(facts)

In [36]:
hop = 5
split = "test"

owa = read_jsonl(f"./data/proofwriter/OWA/depth-{hop}/meta-{split}.jsonl")
owa_2_hop_d4 = read_jsonl(f"./data/owa_proof_{hop}_hop_d4/{split}.jsonl")

# owa_dict = {}
# for instance in owa:
#     triples, rules, questions = parse_proofwrite_cwa(instance)
#     for q in questions:
#         all_facts = copy.copy(list(triples.values()) + list(rules.values()))
#         owa_dict[q[0]] = all_facts

# for data in owa_2_hop_d4:
#     if data['question'] in owa_dict:
#         # all_facts = owa_dict[data['question']]
#         all_facts = data["all_facts"]
#         pool = set(all_facts) - set(data['facts'])
#         if len(pool) > 0:
#             data['all_facts'] = data['facts'] + random.choices(list(pool), k=2)
#             # data['all_facts'] = list(set(data['all_facts']))

print(owa_2_hop_d4[0]['all_facts'])
print(owa_2_hop_d4[0]['facts'])
set(owa_2_hop_d4[0]['all_facts']) - set(owa_2_hop_d4[0]['facts'])

['Harry is nice.', 'If someone is nice then they are round.', 'All red people are white.', 'Fiona is round.', 'Dave is furry.', 'Fiona is white.']
['Harry is nice.', 'If someone is nice then they are round.']


{'All red people are white.',
 'Dave is furry.',
 'Fiona is round.',
 'Fiona is white.'}

In [111]:
import os 

os.makedirs(f"./data/owa_proof_{hop}_hop_d6r", exist_ok=True)
write_jsonl(owa_2_hop_d4, f"./data/owa_proof_{hop}_hop_d6r/{split}.jsonl")

In [101]:
import os
import copy
from tqdm import tqdm

for hop in [2,3,5]:
    for split in ["train", "dev", "test"]:
        owa = read_jsonl(f"./data/proofwriter/OWA/depth-{hop}/meta-{split}.jsonl")
        owa_2_hop = []
        for i, data in tqdm(enumerate(owa)):
            triples, rules, questions = parse_proofwrite_cwa(data)
            all_facts = copy.copy(list(triples.values()) + list(rules.values()))

            kgs = list(data["triples"].values()) + list(data["rules"].values())
            assert len(kgs) == len(all_facts)

            examples = [build_example(triples, rules, q) for q in questions]
            # all_facts = [fact for e in examples for fact in e['facts']]
            # all_facts = list(set(all_facts))
            # for example in examples:
            #     example["all_facts"] = all_facts
            for example in examples:
                # example["all_facts"] = all_facts
                pool = set(all_facts) - set(example['facts'])
                if len(pool) > 1:
                    example['all_facts'] = example['facts'] + (
                        random.choices(list(pool), k=6))
                    example['all_facts'] = list(set(example['all_facts']))
                else:
                    example['all_facts'] = example['facts']
            true_data = [e for e in examples if e['answer'] == "true"]
            false_data = [e for e in examples if e['answer'] == "false"]
            unknown_data = [e for e in examples if e['answer'] == "unknown"]
            owa_2_hop.extend(random.choices(true_data, k=len(unknown_data)))
            owa_2_hop.extend(random.choices(false_data, k=len(unknown_data)))
            owa_2_hop.extend(unknown_data)
        
        print(len(owa_2_hop))
        os.makedirs(f"./data/owa_proof_{hop}_hop_d5", exist_ok=True)
        write_jsonl(owa_2_hop, f"./data/owa_proof_{hop}_hop_d5/{split}.jsonl")

6330it [00:00, 6811.86it/s]


6996


909it [00:00, 6735.03it/s]


1098


1794it [00:00, 6676.67it/s]


2013


4816it [00:01, 4116.51it/s]


10854


719it [00:00, 4847.24it/s]


1641


1405it [00:00, 4632.16it/s]


3057


3322it [00:01, 2320.45it/s]


18525


482it [00:00, 2523.37it/s]


2553


948it [00:00, 1908.45it/s]


5175


In [87]:
! cp -r ./data/owa_proof_2_hop_d5a ./data/owa_proof_2_hop_d6

In [112]:
# ! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d2 owa_proof_2_hop_d2
# ! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d2 owa_proof_3_hop_d2
# ! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d2 owa_proof_5_hop_d2

# ! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d4 owa_proof_2_hop_d4
# ! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d4 owa_proof_3_hop_d4
# ! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d4 owa_proof_5_hop_d4

# ! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d5 owa_proof_2_hop_d5
# ! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d5 owa_proof_3_hop_d5
# ! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d5 owa_proof_5_hop_d5

! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d6r owa_proof_2_hop_d6r
! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d6r owa_proof_3_hop_d6r
! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d6r owa_proof_5_hop_d6r

# ! ./upload_wandb_data.sh ./data/owa_proof_2_hop_d10 owa_proof_2_hop_dall
# ! ./upload_wandb_data.sh ./data/owa_proof_3_hop_d10 owa_proof_3_hop_dall
# ! ./upload_wandb_data.sh ./data/owa_proof_5_hop_d10 owa_proof_5_hop_dall

[34m[1mwandb[0m: Uploading directory ./data/owa_proof_2_hop_d6r to: "epfl_nlp_phd/data-collection/owa_proof_2_hop_d6r:latest" (dataset)
[34m[1mwandb[0m: Adding directory to artifact (./data/owa_proof_2_hop_d6r)... Done. 0.0s
[34m[1mwandb[0m: Currently logged in as: [33mchenze_epfl[0m ([33mepfl_nlp_phd[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.13.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.12.21
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/Users/zemingchen/Desktop/meta-knowledge/wandb/run-20230305_021427-1swl04re[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33msage-elevator-111[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/epfl_nlp_phd/data-collection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/epfl_nlp_p

In [None]:
owa = read_jsonl("./data/proofwriter/OWA/NatLang/test.jsonl")
owa_add = read_jsonl("./data/proofwriter/OWA/birds-electricity/birds-electricity.jsonl")
owa_test = owa + owa_add

answer2label = {
    "true": "entailment",
    "false": "contradiction",
    "unknown": "neutral"
}

proofwriter_nli = []
for data in owa_test:
    triples, rules, questions = parse_proofwrite_cwa(data)
    premise = list(triples.values()) + list(rules.values())
    premise = " ".join(premise)
    for q in questions:
        hypothesis = q[0]
        label = q[1]
        proofwriter_nli.append({
            "guid": str(uuid.uuid4()),
            "premise": premise,
            "hypothesis": hypothesis,
            "label": answer2label[label]
        })

proofwriter_nli[0]

In [None]:
write_jsonl(proofwriter_nli, "./data/proofwriter_nli/test.jsonl")

In [None]:
hop = 10

pth = f"./data/clutrr-system/forward/test/{hop}/long_proof_1.{hop}_test_facts_ANON.txt"

def parse_clutrr_line(line):
    data = line.replace('<STORY>', "")
    data = data.replace('<QUERY>', "<>")
    data = data.replace('<ANSWER>', "<>")
    data = data.replace('<PROOF>', "<>")
    data = data.replace('ent_', "person ")
    data = data.split("<>")
    data = [d.strip() for d in data]
    facts = data[0]
    question = data[1]
    answer = data[-1]
    return {
        "guid": str(uuid.uuid4()),
        "question": question,
        "facts": facts,
        "answer": answer,
    }

with open(pth, "r") as reader:
    lines = reader.readlines()
    clutrr = [parse_clutrr_line(line) for line in lines]
write_jsonl(clutrr, f"./data/clutrr-system/test_{hop}_hop.jsonl")
clutrr[0]

In [None]:
clutrr = read_jsonl("data/clutrr/dev.jsonl")

clutrr_4 = [x for x in clutrr if len(x["facts"]) == 4]
clutrr_6 = [x for x in clutrr if len(x["facts"]) == 6]

In [None]:
rels = [
    "son", "daughter",
    "brother", "sister",
    "father", "mother",
    "husband", "wife",
    "grandfather", "grandmother",
    "grandson", "granddaughter",
    "uncle", "aunt",
    "son-in-law", "daughter-in-law",
    "father-in-law", "mother-in-law",
    "brother-in-law", "sister-in-law",
    "nephew", "niece"
]

persons = [
    'A', 'B', 'C', 'D', 
    'H', 'J', 'K', 'L', 
    'M', 'N', 'O', 'P', 
    'Q', 'R', 'S', 'T',
    'V', 'X', 'Y', 'Z',]

entity_map = {}
for i, p in enumerate(persons):
    entity_map[p] = str(i+1)

def get_knowledge(tokens):
    entity = []
    relation = None
    for tok in tokens:
        if tok.isdigit():
            entity.append(persons[int(tok)-1])
        if tok in rels:
            relation = tok
    assert len(entity) == 2
    if relation is None:
        print(tokens)
    return entity, relation


In [None]:
def simplify(dataset):
    simple_dataset = copy.deepcopy(dataset)
    for data in simple_dataset:
        facts = []
        facts_raw = data['facts'].split(". ")
        for fact in facts_raw:
            tokens = fact.split()
            entity, relation = get_knowledge(tokens)
            facts.append([' '.join(entity), relation])
        data['facts'] = facts
        question = data['question']
        answer = data['answer']
        tokens = answer.split()
        entity, relation = get_knowledge(tokens)
        qa_pair = []
        
        qa_pair.append(question)
        # qa_pair[0] = qa_pair[0].replace(entity_map[entity[0]], entity[0])
        # qa_pair[0] = qa_pair[0].replace(entity_map[entity[1]], entity[1])
        # qa_pair[0] = qa_pair[0].replace("person ", "")
        qa_pair[0] = f"How are {entity[0]} and {entity[1]} related to each other ?"
        qa_pair.append(' '.join(entity))
        qa_pair.append(relation)
        data['questions'] = [qa_pair]
    return simple_dataset

In [None]:
import os

hop = 10
clutrr = read_jsonl(f"data/clutrr-system/test_{hop}_hop.jsonl")
simple_clutrr =  simplify(clutrr)

os.makedirs(f"data/clutrr_{hop}_hop", exist_ok=True)
write_jsonl(simple_clutrr, f"data/clutrr_{hop}_hop/test.jsonl")

simple_clutrr[0]

In [None]:
simple_clutrr_4 = simplify(clutrr_4)

In [None]:
write_jsonl(simple_clutrr_4, "data/clutrr_4_hop/dev.jsonl")

In [None]:
simple_clutrr_6 = simplify(clutrr_6)

In [None]:
write_jsonl(simple_clutrr_6, "data/clutrr_6_hop/dev.jsonl")

In [None]:
eval_out_4_hop = read_json("./output/20221212-033351/dev_out-epoch=0_step=5061.json")
eval_out_4_hop[0]

In [None]:
acc = 0 
for data in eval_out_4_hop:
    gen_out= data['gen_out'].split("?")
    gen_answer = gen_out[1].strip()
    if gen_answer == data['answer']:
        acc += 1
print(acc/len(eval_out_4_hop))

In [None]:
proof_5_hop = read_jsonl("./data/proof_5_hop_hard/train.jsonl")

sort_by_proof = {}
for data in proof_5_hop:
    key = ",".join(data['facts'])
    if key not in sort_by_proof:
        sort_by_proof[key] = [data]
    else:
        sort_by_proof[key].append(data)

In [None]:
num_k = [len(data['facts']) for data in proof_5_hop]
max(num_k)

In [None]:
len(sort_by_proof), len(proof_5_hop)


In [None]:
multi_question = list(sort_by_proof.items())
multi_question[3]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

meta_2_hop = [0.988, 0.5113, 0.414, 0.3526, 0.316]
meta_4_hop = [0.99, 0.9631, 0.819, 0.6452, 0.4608]
meta_6_hop = [0.9994, 0.9592, 0.9731, 0.9208, 0.799]
meta_clutrr = [meta_2_hop, meta_4_hop, meta_6_hop]

cmap = sns.cm.rocket_r

sns.set(font_scale=1.5)

ax = sns.heatmap(
    meta_clutrr, 
    annot=True, 
    square=True, 
    linewidth=3.0, 
    xticklabels=[2, 4, 6, 8, 10],
    yticklabels=[2, 4, 6],
    cbar=False,
    cmap=cmap)
plt.show()


In [None]:
baseline_2_hop = [0.981, 0.4432, 0.3717, 0.3258, 0.217]
baseline_4_hop = [0.9117, 0.8924, 0.8044, 0.7734, 0.6063]
baseline_6_hop = [1.0, 0.9706, 0.9546, 0.9054, 0.7622]
baseline_clutrr = [baseline_2_hop, baseline_4_hop, baseline_6_hop]

cmap = sns.cm.rocket_r
ax = sns.heatmap(
    baseline_clutrr, 
    annot=True, 
    square=True, 
    linewidth=3.0, 
    xticklabels=[2, 4, 6, 8, 10],
    yticklabels=[2, 4, 6], 
    cbar=False,
    cmap=cmap)
plt.show()

In [None]:
import numpy as np

proof_2_hop = [0.998, 0.961, 0.863]
proof_3_hop = [0, 0.956, 0.866]
proof_5_hop = [0, 0 , 0.977]

proof = [proof_2_hop, proof_3_hop, proof_5_hop]

mask = 1 - np.triu(np.ones_like(proof, dtype=np.bool))
mask = [[0,0,0],
        [1,0,0],
        [1,1,0]]
mask = np.array(mask)
heatmap = sns.heatmap(proof, mask=mask, xticklabels=[2,3,5], yticklabels=[2,3,5], vmin=0, vmax=1, annot=True, cmap='Blues', cbar=False, annot_kws={"fontsize":18})

In [None]:
import matplotlib.pyplot as plt
import scienceplots

plt.style.use(["nature", "grid", "ieee"])

# sns.set(font_scale=1.5)

x = [2, 3, 4, 5, 6, 7, 8, 9, 10]
plt.figure(figsize=(5, 4))
meta_6_hop = [100,95.5,96,94.65,94.85,95.7,90,84.5,80]
baseline_6_hop = [100,95.4,90,89.1,90.34,87.6,81.6,75,67.9]

plt.plot(
    x, meta_6_hop, 
    'o-', color='#fdb462', 
    alpha=1.0, label='Meta-kg-6', 
    linewidth='2', ms=5)
plt.plot(
    x, baseline_6_hop, 
    's-', color='#7fb1d3', 
    alpha=1.0, label='Baseline-6', 
    linewidth='2', ms=5)

plt.ylim(20, 100)

#plt.grid(axis='x', color='0.95')
plt.legend()
plt.title('6-hop Clutrr Generalization')
plt.show()


In [None]:
x = [2, 4, 6, 8, 10]
plt.figure(figsize=(6, 4))

plt.plot(x, meta_4_hop, 'o-', color='orange', alpha=0.9, label='Meta-kg')
plt.plot(x, baseline_4_hop, 's-', color='blue', alpha=0.9, label='Baseline')
plt.ylim(0, 1.1)

plt.grid(axis='x', color='0.95')
plt.legend()
plt.title('4-hop Clutrr Generalization')
plt.show()

In [None]:
meta_2_hop = [0.988, 0.5113, 0.414, 0.3526, 0.316]
baseline_2_hop = [0.981, 0.4432, 0.3717, 0.3258, 0.217]
x = [2, 4, 6, 8, 10]
plt.figure(figsize=(6, 4))

plt.plot(x, meta_2_hop, 'o-', color='orange', alpha=0.9, label='Meta-kg')
plt.plot(x, baseline_2_hop, 's-', color='blue', alpha=0.9, label='Baseline')
plt.ylim(0, 1.1)

plt.grid(axis='x', color='0.95')
plt.legend()
plt.title('2-hop Clutrr Generalization')
plt.show()
