In [79]:
import pandas as pd
import os
import codecs
import re
import string
import sys
import numpy as np

PUNCTUATION = set(string.punctuation)

import re

def remove_latin(text):
    return re.sub(r'[^\x00-\x7f]',r'', text)

def eval_zero_re(test_file, answer_file):
    precision = read_results(test_file, answer_file)
    return precision

def read_results(test_set, answer_file):
    with codecs.open(test_set, 'r', 'utf-8') as fin:
        data = [line.strip().split('\t') for line in fin]
    metadata = [x[:4] for x in data]
    gold = [x[4:] for x in data]

    new_gold = []
    new_metadata = []
    for i in range(len(gold)):
        if len(gold[i]) > 0:
            new_gold.append(simplify(remove_latin(' and '.join(gold[i]))))
            new_metadata.append(metadata[i])

    answer_df = pd.read_csv(answer_file, sep='\t')

    questions = []
    answers = []
    for i, row in answer_df.iterrows():
        try:
            questions.append(row['question_predictions'])
        except:
            questions.append('no question')
        answers.append(remove_latin(str(row['predictions_str'])))

    new_answers = [simplify(answer) for answer in answers]

    correct = 0.0
    for i in range(len(new_gold)):
        ref = ' '.join(new_gold[i])
        candidate = ' '.join(new_answers[i])
        if (ref == candidate):
            correct += 1
        #else:
        #    print(new_metadata[i])
        #    print(ref)
        #    print(questions[i])
        #    print(candidate)
        #    print("#################")
    return correct / len(new_gold)


def unk_zero_re_eval(test_file, answer_file):
    q_aprf = unk_read_results(test_file, answer_file)
    return pretify(q_aprf)


def unk_read_results(test_set, answer_file):
    with codecs.open(test_set, "r", "utf-8") as fin:
        data = [line.strip().split("\t") for line in fin]
    metadata = [x[:4] for x in data]
    gold = [set(x[4:]) for x in data]

    new_gold = []
    new_gold = gold

    with codecs.open(answer_file, "r", "utf-8") as fin:
        answers = [line.strip() for line in fin]

 

    new_answers = []
    # ignore the header in answers
    for answer in answers[1:]:
        if answer != "no_answer":
            new_answers.append(answer)
        else:
            new_answers.append("")

    telemetry = []
    for m, g, a in zip(metadata, new_gold, new_answers):
        stats = score(g, a)
        telemetry.append([m[0], m[1], str(len(g) > 0), stats])
    return aprf(telemetry)


def aprf(g):
    tp, tn, sys_pos, real_pos = sum(map(lambda x: x[-1], g))
    total = len(g)
    # a = float(tp + tn) / total
    # nr = tn / float(total - real_pos)
    # npr = tn / float(total - sys_pos)
    if tp == 0:
        p = r = f = 0.0
    else:
        p = tp / float(sys_pos)
        r = tp / float(real_pos)
        f = 2 * p * r / (p + r)
    # return np.array((a, p, r, f, npr, nr))
    return np.array((p, r, f))


def score(gold, answer):
    if len(gold) > 0:
        gold = set.union(*[simplify(g) for g in gold])
    answer = simplify(answer)
    result = np.zeros(4)
    if answer == gold:
        if len(gold) > 0:
            result[0] += 1
        else:
            result[1] += 1
    if len(answer) > 0:
        result[2] += 1
    if len(gold) > 0:
        result[3] += 1
    return result


def simplify(answer):
    return ["".join(c for c in t) for t in answer.strip().lower().split()]


def pretify(results):
    return " \t ".join(
        [
            ": ".join((k, v))
            for k, v in zip(
                ["Precision", "Recall", "F1"],
                map(lambda r: "{0:.2f}%".format(r * 100), results),
            )
        ]
    )


def re_qa_error_analysis(test_set, answer_file_1, answer_file_2):
    with codecs.open(test_set, 'r', 'utf-8') as fin:
        data = [line.strip().split('\t') for line in fin]
    metadata = [x for x in data]
    gold = [x[4:] for x in data]

    new_gold = []
    new_metadata = []
    for i in range(len(gold)):
        if len(gold[i]) > 0:
            new_gold.append(simplify(remove_latin(' and '.join(gold[i]))))
            new_metadata.append(metadata[i])

    answer_df_1 = pd.read_csv(answer_file_1, sep=',')
    answers_1 = answer_df_1["predictions_str"].tolist()
    new_answers_1 = [simplify(remove_latin(str(answer_1).strip(" </s>"))) for answer_1 in answers_1]
    
    answer_df_2 = pd.read_csv(answer_file_2, sep=',')
    answers_2 = answer_df_2["predictions_str"].tolist()
    new_answers_2 = [simplify(remove_latin(str(answer_2).strip(" </s>"))) for answer_2 in answers_2]
    
    
    file_1_only_wrong = []
    file_2_only_wrong = []
    both_wrong = []
    for i in range(len(new_gold)):
        ref = ' '.join(new_gold[i])
        candidate_1 = ' '.join(new_answers_1[i])
        candidate_2 = ' '.join(new_answers_2[i])
        if ((ref == candidate_1) or (ref in candidate_1) or (candidate_1 in ref)) and (not ((ref == candidate_2) or (ref in candidate_2) or (candidate_2 in ref))):
            file_2_only_wrong.append(i)
        if (not ((ref == candidate_1) or (ref in candidate_1) or (candidate_1 in ref))) and ((ref == candidate_2) or (ref in candidate_2) or (candidate_2 in ref)):
            file_1_only_wrong.append(i)
        if (not ((ref == candidate_1) or (ref in candidate_1) or (candidate_1 in ref))) and (not ((ref == candidate_2) or (ref in candidate_2) or (candidate_2 in ref))):
            both_wrong.append(i)

    file_1_only_rows = []
    for index, row in answer_df_1.iterrows():
        if index in file_1_only_wrong:
            file_1_only_rows.append(row)
     
    file_1_only_gold_rows = []
    for index in range(len(new_metadata)):
        if index in file_1_only_wrong:
            file_1_only_gold_rows.append(new_metadata[index])

    pd.DataFrame(file_1_only_gold_rows).to_csv('./file_1_only_wrongs.gold.csv', sep=',', header=True, index=False)

    pd.DataFrame(file_1_only_rows).to_csv('./file_1_only_wrongs.csv', sep=',', header=True, index=False)
    
    file_2_only_rows = []
    for index, row in answer_df_2.iterrows():
        if index in file_2_only_wrong:
            file_2_only_rows.append(row)
    
    file_2_only_gold_rows = []
    for index in range(len(new_metadata)):
        if index in file_2_only_wrong:
            file_2_only_gold_rows.append(new_metadata[index])

    pd.DataFrame(file_2_only_gold_rows).to_csv('./file_2_only_wrongs.gold.csv', sep=',', header=True, index=False)
    
    pd.DataFrame(file_2_only_rows).to_csv('./file_2_only_wrongs.csv', sep=',', header=True, index=False)

In [80]:
import pandas as pd
import os
import codecs
import re
import string
import sys
import numpy as np

PUNCTUATION = set(string.punctuation)

import re

def remove_latin(text):
    return re.sub(r'[^\x00-\x7f]',r'', text)

def fewrl_eval_zero_re(test_file, answer_file):
    precision = fewrl_read_results(test_file, answer_file)
    return precision

def fewrl_read_results(test_set, answer_file):
    ref_df = pd.read_csv(test_set, sep=',')
    refs = ref_df["answers"].tolist()
    new_refs = [simplify(remove_latin(ref.strip(" </s>"))) for ref in refs]
    
    answer_df = pd.read_csv(answer_file, sep=',')
    answers = answer_df["predictions_str"].tolist()
    new_answers = [simplify(remove_latin(str(answer).strip(" </s>"))) for answer in answers]

    correct = 0.0
    for i in range(len(new_refs)):
        ref = ' '.join(new_refs[i])
        candidate = ' '.join(new_answers[i])
        if (ref == candidate): #or (ref in candidate) or (candidate in ref):
            correct += 1
        #else:
        #    print(new_metadata[i])
        #    print(ref)
        #    print(questions[i])
        #    print(candidate)
        #    print("#################")
    return correct / len(new_refs)

def simplify(answer):
    return ["".join(c for c in t) for t in answer.strip().lower().split()]

def error_analysis(test_set, answer_file_1, answer_file_2):
    ref_df = pd.read_csv(test_set, sep='\t')
    refs = ref_df["answers"].tolist()
    new_refs = [simplify(remove_latin(ref.strip(" </s>"))) for ref in refs]
    
    answer_df_1 = pd.read_csv(answer_file_1, sep=',')
    answers_1 = answer_df_1["predictions_str"].tolist()
    new_answers_1 = [simplify(remove_latin(str(answer_1).strip(" </s>"))) for answer_1 in answers_1]
    
    answer_df_2 = pd.read_csv(answer_file_2, sep=',')
    answers_2 = answer_df_2["predictions_str"].tolist()
    new_answers_2 = [simplify(remove_latin(str(answer_2).strip(" </s>"))) for answer_2 in answers_2]
    
    
    file_1_only_wrong = []
    file_2_only_wrong = []
    both_wrong = []
    for i in range(len(new_refs)):
        ref = ' '.join(new_refs[i])
        candidate_1 = ' '.join(new_answers_1[i])
        candidate_2 = ' '.join(new_answers_2[i])
        if (ref == candidate_1) and (ref != candidate_2):
            file_2_only_wrong.append(i)
        if (ref != candidate_1) and (ref == candidate_2):
            file_1_only_wrong.append(i)
        if (ref != candidate_1) and (ref != candidate_2):
            both_wrong.append(i)

    file_1_only_rows = []
    for index, row in answer_df_1.iterrows():
        if index in file_1_only_wrong:
            file_1_only_rows.append(row)
     
    file_1_only_gold_rows = []
    for index, row in ref_df.iterrows():
        if index in file_1_only_wrong:
            file_1_only_gold_rows.append(row)

    pd.DataFrame(file_1_only_gold_rows).to_csv('./file_1_only_wrongs.gold.csv', sep=',', header=True, index=False)

    pd.DataFrame(file_1_only_rows).to_csv('./file_1_only_wrongs.csv', sep=',', header=True, index=False)
    
    file_2_only_rows = []
    for index, row in answer_df_2.iterrows():
        if index in file_2_only_wrong:
            file_2_only_rows.append(row)
    
    file_2_only_gold_rows = []
    for index, row in ref_df.iterrows():
        if index in file_2_only_wrong:
            file_2_only_gold_rows.append(row)

    pd.DataFrame(file_2_only_gold_rows).to_csv('./file_2_only_wrongs.gold.csv', sep=',', header=True, index=False)
    
    pd.DataFrame(file_2_only_rows).to_csv('./file_2_only_wrongs.csv', sep=',', header=True, index=False)

In [428]:
fold_files = ["mml_pgg_off_sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/dev_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.1.dev.predictions.step.100.csv
0.3722857142857143

#
mml_pgg_off_sim.run.1.dev.predictions.step.200.csv
0.4037142857142857

#
mml_pgg_off_sim.run.1.dev.predictions.step.300.csv
0.416

#
mml_pgg_off_sim.run.1.dev.predictions.step.400.csv
0.4042857142857143

#
mml_pgg_off_sim.run.1.dev.predictions.step.500.csv
0.4377142857142857

#
mml_pgg_off_sim.run.1.dev.predictions.step.600.csv
0.4422857142857143

#
mml_pgg_off_sim.run.1.dev.predictions.step.700.csv
0.43028571428571427

#
mml_pgg_off_sim.run.1.dev.predictions.step.800.csv
0.436

#
mml_pgg_off_sim.run.1.dev.predictions.step.900.csv
0.4034285714285714

#
mml_pgg_off_sim.run.1.dev.predictions.step.1000.csv
0.438

#
mml_pgg_off_sim.run.1.dev.predictions.step.1100.csv
0.42828571428571427

#
mml_pgg_off_sim.run.1.dev.predictions.step.1200.csv
0.42457142857142854

#
mml_pgg_off_sim.run.1.dev.predictions.step.1300.csv
0.43657142857142855

#
mml_pgg_off_sim.run.1.dev.predictions.step.1400.csv
0.448

#
mml_pgg_off_sim.run

In [171]:
fold_files = ["mml_pgg_off_sim.run.1.test.predictions.step.2400.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.1.test.predictions.step.2400.csv
0.4756190476190476



In [429]:
fold_files = ["mml_pgg_off_sim.run.1.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.1.test.predictions.step.2600.csv
0.5228571428571429



In [221]:
fold_files = ["mml_pgg_off_sim.run.1.train.predictions.step.2400.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.1.train.predictions.step.2400.csv
0.747



In [222]:
fold_files = ["concat.run.1.train.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/train_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.1.train.predictions.step.2600.csv
0.7894523809523809



In [238]:
concat_file = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/concat.run.1.train.predictions.step.2600.csv"
mml_file = "~/t5-small-exps/naacl-2022/fewrl/run_1/mml_pgg_off_sim.run.1.train.predictions.step.2400.csv"
error_analysis(fold_0_gold_file, mml_file, concat_file)

ParserError: Error tokenizing data. C error: Expected 5 fields in line 150, saw 6


In [78]:
fold_1_path = "~/codes/dreamscape-qa/gold_fold_1.test.predictions.0.step.600.csv"
fold_2_path = "~/codes/dreamscape-qa/mml_pgg_off_sim.fold.1.test.predictions.step.1300.csv"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

re_qa_error_analysis(fold_0_gold_file, fold_1_path, fold_2_path)

In [423]:
fold_1_path = "~/t5-small-exps/naacl-2022/fold_4_results/mml_pgg_off_sim.test.predictions.step.10800.csv"
fold_2_path = "~/t5-small-exps/naacl-2022/fold_4_results/concat_fold.4.test.predictions.step.12500.csv"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

re_qa_error_analysis(fold_0_gold_file, fold_1_path, fold_2_path)

In [431]:
fold_files = ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/dev_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.1.dev.predictions.step.100.csv
0.402

#
concat.run.1.dev.predictions.step.200.csv
0.44457142857142856

#
concat.run.1.dev.predictions.step.300.csv
0.45457142857142857

#
concat.run.1.dev.predictions.step.400.csv
0.452

#
concat.run.1.dev.predictions.step.500.csv
0.45771428571428574

#
concat.run.1.dev.predictions.step.600.csv
0.4765714285714286

#
concat.run.1.dev.predictions.step.700.csv
0.4865714285714286

#
concat.run.1.dev.predictions.step.800.csv
0.4737142857142857

#
concat.run.1.dev.predictions.step.900.csv
0.4662857142857143

#
concat.run.1.dev.predictions.step.1000.csv
0.4645714285714286

#
concat.run.1.dev.predictions.step.1100.csv
0.45971428571428574

#
concat.run.1.dev.predictions.step.1200.csv
0.4762857142857143

#
concat.run.1.dev.predictions.step.1300.csv
0.48714285714285716

#
concat.run.1.dev.predictions.step.1400.csv
0.5145714285714286

#
concat.run.1.dev.predictions.step.1500.csv
0.468

#
concat.run.1.dev.predictions.step.1600.csv
0.4868571428571429

#
c

In [432]:
fold_files = ["concat.run.1.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.1.test.predictions.step.2600.csv
0.5164761904761904



In [179]:
fold_files = ["base-base.run.1.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
base-base.run.1.test.predictions.step.2600.csv
0.3498095238095238



In [174]:
fold_files = ["mml_pgg_off_sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/dev_ref_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.2.dev.predictions.step.100.csv
0.3497142857142857

#
mml_pgg_off_sim.run.2.dev.predictions.step.200.csv
0.38742857142857146

#
mml_pgg_off_sim.run.2.dev.predictions.step.300.csv
0.3831428571428571

#
mml_pgg_off_sim.run.2.dev.predictions.step.400.csv
0.4145714285714286

#
mml_pgg_off_sim.run.2.dev.predictions.step.500.csv
0.4114285714285714

#
mml_pgg_off_sim.run.2.dev.predictions.step.600.csv
0.36428571428571427

#
mml_pgg_off_sim.run.2.dev.predictions.step.700.csv
0.4237142857142857

#
mml_pgg_off_sim.run.2.dev.predictions.step.800.csv
0.45685714285714285

#
mml_pgg_off_sim.run.2.dev.predictions.step.900.csv
0.4228571428571429

#
mml_pgg_off_sim.run.2.dev.predictions.step.1000.csv
0.43

#
mml_pgg_off_sim.run.2.dev.predictions.step.1100.csv
0.41942857142857143

#
mml_pgg_off_sim.run.2.dev.predictions.step.1200.csv
0.43514285714285716

#
mml_pgg_off_sim.run.2.dev.predictions.step.1300.csv
0.398

#
mml_pgg_off_sim.run.2.dev.predictions.step.1400.csv
0.4205714285714

In [175]:
fold_files = ["mml_pgg_off_sim.run.2.test.predictions.step.800.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.2.test.predictions.step.800.csv
0.3982857142857143



In [180]:
fold_files = ["base-base.run.2.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
base-base.run.2.test.predictions.step.2600.csv
0.33676190476190476



In [176]:
fold_files = ["mml_pgg_off_sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/dev_ref_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.3.dev.predictions.step.100.csv
0.42914285714285716

#
mml_pgg_off_sim.run.3.dev.predictions.step.200.csv
0.444

#
mml_pgg_off_sim.run.3.dev.predictions.step.300.csv
0.4722857142857143

#
mml_pgg_off_sim.run.3.dev.predictions.step.400.csv
0.454

#
mml_pgg_off_sim.run.3.dev.predictions.step.500.csv
0.5017142857142857

#
mml_pgg_off_sim.run.3.dev.predictions.step.600.csv
0.5062857142857143

#
mml_pgg_off_sim.run.3.dev.predictions.step.700.csv
0.52

#
mml_pgg_off_sim.run.3.dev.predictions.step.800.csv
0.5265714285714286

#
mml_pgg_off_sim.run.3.dev.predictions.step.900.csv
0.526

#
mml_pgg_off_sim.run.3.dev.predictions.step.1000.csv
0.536

#
mml_pgg_off_sim.run.3.dev.predictions.step.1100.csv
0.5468571428571428

#
mml_pgg_off_sim.run.3.dev.predictions.step.1200.csv
0.5088571428571429

#
mml_pgg_off_sim.run.3.dev.predictions.step.1300.csv
0.5165714285714286

#
mml_pgg_off_sim.run.3.dev.predictions.step.1400.csv
0.5337142857142857

#
mml_pgg_off_sim.run.3.dev.prediction

In [178]:
fold_files = ["mml_pgg_off_sim.run.3.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.run.3.test.predictions.step.2600.csv
0.47523809523809524



In [181]:
fold_files = ["base-base.run.3.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_ref_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
base-base.run.3.test.predictions.step.2600.csv
0.35714285714285715



In [190]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
lm_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
lm_model = model.to("cuda:0")
lm_model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )


In [191]:
def preprocess_the_prediction_files(main_path, list_of_files):
    for file in list_of_files:
        df = pd.read_csv(os.path.join(main_path, file), sep=',')
        df.to_csv(os.path.join("/tmp/", file), sep='\t', header=True, index=False)

def eval_the_prediction_files(list_of_files, gold_file):
    scores = {}
    scores_list = []
    for file in list_of_files:
        score = eval_zero_re(gold_file, os.path.join("/tmp/", file))
        scores[score] = file
        scores_list.append(score)
    
    f1s = np.array(scores_list)
    max_f1 = max(scores.keys())
    return scores[max_f1],  max_f1 * 100, f1s * 100, scores

def unk_eval_the_prediction_files(list_of_files, gold_file):
    scores = {}
    scores_list = []
    for file in list_of_files:
        score = unk_zero_re_eval(gold_file, os.path.join("/tmp/", file))
        f1_score = float(score.split()[-1][0:-1])
        scores[f1_score] = file
        scores_list.append(f1_score)

    f1s = np.array(scores_list)
    max_f1 = max(scores.keys())
    return scores[max_f1],  max_f1, f1s, scores


def lm_eval_the_prediction_files(main_path, list_of_files, gold_file):
    lm_scores = {}
    lm_scores_list = []
    for file in list_of_files:
        df = pd.read_csv(os.path.join(main_path, file), sep=',')
        df["question_predictions"].to_csv(os.path.join("/tmp/", file), sep='\t', header=True, index=False)
        with codecs.open(os.path.join("/tmp/", file), 'r', 'utf-8') as fin:
            questions = [line.strip() for line in fin]
            new_questions = questions[1:]
            lm_score = 0.0
            for quest in new_questions:
                quest = " ".join(quest)
                inputs = lm_tokenizer(quest, return_tensors="pt")
                inputs = {k: v.to("cuda:0") for k, v in inputs.items()}
                outputs = lm_model(**inputs, labels=inputs["input_ids"])
                quest_p = torch.exp(-outputs.loss)
                lm_score += quest_p.item()
            
            avg_p = lm_score / len(new_questions)
            lm_scores[file] = avg_p
            lm_scores_list.append(avg_p)

    return lm_scores, lm_scores_list

In [192]:
def process_the_lm_re_predictions(fold_0_path, fold_files, fold_0_gold_file):
    preprocess_the_prediction_files(fold_0_path, fold_files)
    max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
    print(max_file)
    print(max_f1)
    print(f1s)

    max_f1s_idx = (-f1s).argsort()[:20]
    lm_fold_files = [fold_files[i] for i in max_f1s_idx]
    lm_scores, lm_scores_list = lm_eval_the_prediction_files(fold_0_path, lm_fold_files, fold_0_gold_file)
    top_re_scores = f1s[max_f1s_idx]
    top_lm_scores = np.array(lm_scores_list)
    normalized_lm_scores = (top_lm_scores - np.min(top_lm_scores)) / (np.max(top_lm_scores)) * 100
    normalized_re_scores = (top_re_scores - np.min(top_re_scores)) / (np.max(top_re_scores)) * 100
    print(normalized_lm_scores)
    print(normalized_re_scores)

    final_scores = 0.2 * normalized_lm_scores + 0.8 * normalized_re_scores
    print(final_scores)
    max_final_idx = (-final_scores).argsort()[:1]
    print(lm_fold_files[max_final_idx[0]])

In [345]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_1.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
fold_files.append("gold_fold_1.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

600

gold_fold_1.dev.predictions.0.step.600.csv
71.0
[65.66666667 67.66666667 67.33333333 70.         68.66666667 71.
 70.33333333 67.         62.66666667 65.66666667 68.33333333 69.33333333
 66.66666667 70.         67.33333333 68.         70.         67.66666667
 64.         67.33333333 68.33333333 65.33333333 67.         66.
 65.         64.66666667 66.         65.         64.33333333 62.33333333
 63.66666667 67.33333333 64.         65.66666667 65.33333333 65.
 64.66666667 67.33333333 69.         67.66666667 64.66666667 67.66666667
 65.66666667 64.         66.         66.33333333 66.66666667 63.33333333
 65.         63.66666667 63.66666667 66.         66.66666667 66.
 64.33333333 64.66666667 64.66666667 65.         66.         67.33333333
 69.66666667 66.66666667 69.66666667 67.33333333 69.33333333 68.
 60.         67.         66.66666667 65.         65.         64.66666667
 64.66666667 63.66666667 64.33333333 61.66666667 64.66666667 68.
 66.         67.         66.66666667 68.33333333 64

In [347]:
# Concat Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_1.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
fold_files.append("concat_fold_1.dev.predictions.0.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

100

concat_fold_1.dev.predictions.0.step.100.csv
67.0
[67.         60.66666667 63.33333333 66.         65.66666667 62.
 64.33333333 65.33333333 64.66666667 64.         65.33333333 65.33333333
 65.66666667 64.66666667 61.         63.33333333 64.         63.33333333
 60.66666667 62.66666667 61.66666667 62.         63.         62.66666667
 62.66666667 59.66666667 61.66666667 61.66666667 61.66666667 61.66666667
 61.33333333 61.         63.         63.33333333 63.         62.66666667
 63.33333333 63.         64.33333333 63.         63.66666667 62.66666667
 63.66666667 61.         61.66666667 63.         63.         62.66666667
 64.         62.         62.         62.         62.33333333 62.66666667
 58.33333333 62.         60.         58.33333333 60.66666667 62.
 61.         62.         62.33333333 62.         60.         62.33333333
 61.66666667 61.66666667 59.         57.66666667 59.66666667 56.33333333
 58.33333333 56.33333333 57.33333333 59.         58.33333333 60.33333333
 61.         57. 

100

In [348]:
# Concat Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_1.test.predictions.0.step.100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_1.test.predictions.0.step.100.csv
65.4
[65.4]


In [349]:
# Concat Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_1.test.predictions.0.step.600.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold_1.test.predictions.0.step.600.csv
74.33333333333333
[74.33333333]


In [350]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_2.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

1800

gold_fold_2.dev.predictions.0.step.6700.csv
74.33333333333333
[66.         70.         70.33333333 69.66666667 71.33333333 69.33333333
 70.66666667 70.         69.66666667 69.33333333 68.         71.66666667
 70.33333333 70.         71.         65.         71.33333333 72.33333333
 70.         70.         70.66666667 71.33333333 69.33333333 70.33333333
 69.33333333 69.66666667 69.66666667 70.         71.66666667 68.33333333
 70.33333333 70.         69.66666667 70.66666667 69.66666667 70.66666667
 69.33333333 69.33333333 71.66666667 67.         70.         72.
 71.         70.         70.         72.         71.         71.66666667
 70.         72.         73.         73.33333333 73.         73.
 71.66666667 72.66666667 72.         72.         73.33333333 69.66666667
 71.66666667 71.33333333 69.33333333 72.33333333 72.33333333 73.66666667
 74.33333333 72.33333333 73.66666667 73.         73.33333333 73.66666667
 72.33333333 70.         73.33333333 70.33333333 73.33333333 73.33333333
 71. 

In [351]:
# Concat Predictions on the dev 2 fold 2. Only on positive samples.
fold_files = ["concat_fold_2.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
fold_files.append("concat_fold_2.dev.predictions.0.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


1700

concat_fold_2.dev.predictions.0.step.17000.csv
73.66666666666667
[70.         69.33333333 66.66666667 69.         68.33333333 71.
 70.         71.         71.33333333 67.66666667 67.66666667 69.
 71.33333333 71.66666667 72.66666667 71.66666667 73.         70.
 71.66666667 71.66666667 68.33333333 72.         72.33333333 67.
 70.         71.         68.66666667 70.66666667 67.66666667 72.
 69.         70.66666667 72.33333333 72.         71.66666667 73.
 72.33333333 69.33333333 69.         72.33333333 70.         72.
 71.         73.33333333 70.66666667 72.         71.66666667 69.66666667
 67.66666667 70.33333333 70.         70.33333333 71.33333333 69.33333333
 68.33333333 69.66666667 71.         71.66666667 71.33333333 69.
 70.66666667 72.         69.66666667 68.66666667 69.         70.33333333
 69.33333333 70.66666667 70.66666667 69.66666667 68.66666667 69.33333333
 68.33333333 70.33333333 64.33333333 69.66666667 71.66666667 67.33333333
 69.         70.         67.         69.         6

In [390]:
# Gold Predictions on the dev 2 fold 2. Only on positive samples.
fold_files = ["gold_fold_2.test.predictions.0.step.6700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold_2.test.predictions.0.step.6700.csv
58.61666666666666
[58.61666667]


In [357]:
# Gold Predictions on the dev 2 fold 2. Only on positive samples.
fold_files = ["gold_fold_2.test.predictions.0.step.1800.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold_2.test.predictions.0.step.1800.csv
62.5
[62.5]


In [391]:
# Concat Predictions on the dev 2 fold 2. Only on positive samples.
fold_files = ["concat_fold_2.test.predictions.0.step.17000.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_2.test.predictions.0.step.17000.csv
54.58333333333333
[54.58333333]


In [392]:
# Concat Predictions on the dev 2 fold 2. Only on positive samples.
fold_files = ["concat_fold_2.test.predictions.0.step.17000.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_2.test.predictions.0.step.17000.csv
54.58333333333333
[54.58333333]


In [352]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["gold_fold_3.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

2400

gold_fold_3.dev.predictions.0.step.2400.csv
75.66666666666667
[69.         70.33333333 68.66666667 71.         68.66666667 70.66666667
 72.33333333 70.66666667 70.66666667 71.         72.66666667 75.33333333
 73.         74.         70.66666667 73.33333333 74.66666667 74.
 74.         71.66666667 71.66666667 71.66666667 71.         75.66666667
 71.33333333 70.         69.66666667 71.66666667 73.         71.66666667
 69.66666667 67.33333333 68.66666667 65.33333333 67.33333333 69.33333333
 70.33333333 71.33333333 74.         71.33333333 73.33333333 71.66666667
 68.         71.         72.33333333 70.         71.33333333 67.33333333
 70.33333333 67.         70.         65.33333333 68.33333333 66.33333333
 66.66666667 69.         71.         70.         69.         71.66666667
 68.66666667 68.66666667 68.         66.33333333 70.66666667 68.33333333
 70.         69.         72.33333333 68.33333333 70.         72.33333333
 69.66666667 67.         70.         66.66666667 68.66666667 69.666666

In [355]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["concat_fold_3.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

4400

concat_fold_3.dev.predictions.0.step.10500.csv
69.66666666666667
[63.66666667 63.66666667 60.         60.33333333 60.         58.66666667
 62.66666667 63.         61.         60.         63.33333333 63.66666667
 63.         61.33333333 63.66666667 59.66666667 66.         63.33333333
 64.33333333 62.         62.66666667 62.         64.66666667 62.66666667
 63.66666667 61.66666667 60.         60.66666667 63.         65.
 63.66666667 61.66666667 58.33333333 62.33333333 62.         64.33333333
 62.66666667 63.33333333 65.         64.66666667 64.66666667 61.33333333
 62.66666667 65.33333333 64.         59.         61.33333333 62.
 63.66666667 60.66666667 61.         62.33333333 59.66666667 57.
 58.33333333 59.66666667 59.66666667 62.66666667 60.33333333 61.66666667
 65.66666667 64.66666667 58.66666667 60.         63.66666667 64.33333333
 66.         66.         63.         64.66666667 62.66666667 67.33333333
 64.33333333 64.66666667 61.         62.         63.66666667 64.
 64.33333333 61.33

In [393]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_3.test.predictions.0.step.10500.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_3.test.predictions.0.step.10500.csv
66.85
[66.85]


In [360]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_3.test.predictions.0.step.4400.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_3.test.predictions.0.step.4400.csv
66.03333333333333
[66.03333333]


In [394]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_3.test.predictions.0.step.2400.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold_3.test.predictions.0.step.2400.csv
69.1
[69.1]


In [359]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["concat_fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/concat/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

3500

concat_fold.4.dev.predictions.step.12500.csv
76.33333333333333
[66.66666667 69.33333333 68.33333333 65.66666667 66.33333333 72.
 69.66666667 70.66666667 72.         69.         71.33333333 69.66666667
 66.         67.33333333 70.         69.66666667 70.33333333 69.33333333
 69.         70.66666667 70.33333333 66.66666667 65.66666667 72.33333333
 73.33333333 73.66666667 74.66666667 72.33333333 71.         73.
 72.33333333 65.66666667 68.33333333 68.66666667 74.66666667 68.33333333
 67.66666667 69.33333333 66.66666667 70.33333333 72.33333333 69.66666667
 73.         70.66666667 69.         68.33333333 68.66666667 68.33333333
 69.66666667 69.         68.66666667 71.33333333 74.66666667 72.
 74.66666667 73.66666667 73.         68.33333333 72.         73.
 73.33333333 73.         70.         73.33333333 72.         70.66666667
 71.         70.33333333 70.         74.         74.         72.33333333
 70.         76.         70.66666667 74.33333333 73.         70.66666667
 70.         70.3333

In [361]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["concat_fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/concat/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

1700

concat_fold.5.dev.predictions.step.22200.csv
80.33333333333333
[72.33333333 72.         68.33333333 71.         72.         72.33333333
 75.66666667 74.         75.33333333 77.33333333 76.66666667 75.33333333
 76.         73.66666667 75.         77.66666667 78.33333333 76.
 76.66666667 77.33333333 76.         74.66666667 75.66666667 77.66666667
 75.66666667 73.33333333 76.         76.66666667 74.33333333 74.66666667
 77.66666667 74.         77.33333333 77.33333333 76.33333333 77.33333333
 75.33333333 76.66666667 75.66666667 75.66666667 77.33333333 75.66666667
 76.33333333 78.33333333 76.66666667 77.66666667 76.66666667 77.
 79.         77.33333333 75.         74.33333333 77.33333333 78.33333333
 75.66666667 75.         76.33333333 75.33333333 77.         75.33333333
 75.         72.         76.33333333 77.         75.         75.33333333
 78.33333333 78.66666667 77.66666667 74.33333333 74.         73.66666667
 72.66666667 75.33333333 74.         73.66666667 76.         76.33333333
 76.

In [395]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold.4.test.predictions.step.12500.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold.4.test.predictions.step.12500.csv
67.73333333333333
[67.73333333]


In [None]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold.4.test.predictions.step.22000.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

In [396]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold.5.test.predictions.step.22200.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold.5.test.predictions.step.22200.csv
65.60000000000001
[65.6]


In [363]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold.5.test.predictions.step.1700.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold.5.test.predictions.step.1700.csv
67.51666666666667
[67.51666667]


In [364]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["gold_fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/gold/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

2500

gold_fold.5.dev.predictions.step.9200.csv
77.0
[73.         74.         68.33333333 71.         71.33333333 72.
 70.         71.66666667 69.33333333 72.66666667 74.33333333 70.66666667
 70.33333333 73.         71.66666667 71.33333333 73.33333333 73.66666667
 72.         75.33333333 74.         73.         71.66666667 74.66666667
 74.66666667 72.         72.33333333 67.33333333 71.66666667 70.33333333
 69.66666667 69.         74.         73.66666667 73.66666667 70.66666667
 72.33333333 70.33333333 73.         73.66666667 71.33333333 75.66666667
 75.66666667 76.         75.         76.66666667 75.33333333 74.66666667
 75.66666667 75.         74.66666667 75.66666667 75.         73.66666667
 74.33333333 76.         75.33333333 76.33333333 76.33333333 73.33333333
 73.         73.33333333 73.66666667 74.66666667 70.33333333 72.66666667
 74.66666667 73.         72.         71.66666667 70.33333333 71.
 70.33333333 73.33333333 72.66666667 71.66666667 71.         73.
 73.         72.66666667 73.

In [366]:
# Gold Predictions on the dev 1 fold 3. Only on positive samples.
fold_files = ["gold_fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/gold/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

700

gold_fold.4.dev.predictions.step.14900.csv
74.33333333333333
[67.33333333 63.33333333 66.         65.33333333 65.66666667 69.33333333
 72.33333333 67.66666667 71.         65.33333333 63.33333333 65.33333333
 67.33333333 68.         69.33333333 65.66666667 65.33333333 66.
 69.         71.33333333 69.         67.         67.66666667 64.
 64.33333333 68.         69.         69.         70.         70.
 70.33333333 69.         71.         69.         69.         67.
 64.         67.         68.         73.66666667 68.33333333 69.
 70.         70.         71.66666667 72.         68.         68.
 67.         66.66666667 69.33333333 69.33333333 68.         66.66666667
 69.33333333 69.33333333 70.         68.33333333 69.66666667 72.33333333
 72.33333333 71.66666667 70.         71.66666667 68.66666667 69.33333333
 69.66666667 69.         69.         69.33333333 71.33333333 68.
 70.66666667 68.66666667 71.66666667 69.         69.66666667 69.66666667
 70.         71.         70.33333333 71.666666

In [365]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold.5.test.predictions.step.9200.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.5.test.predictions.step.9200.csv
65.61666666666667
[65.61666667]


In [368]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold.5.test.predictions.step.2500.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.5.test.predictions.step.2500.csv
65.73333333333333
[65.73333333]


In [187]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold.4.test.predictions.step.14900.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.4.test.predictions.step.14900.csv
74.63333333333333
[74.63333333]


In [367]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold.4.test.predictions.step.700.csv"]
#fold_files.append("gold_fold_2.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.4.test.predictions.step.700.csv
74.13333333333333
[74.13333333]


In [143]:
# mml-mml-on-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_mml_on_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-mml-on-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_mml_on_sim.dev.predictions.step.5400.csv
60.66666666666667
[51.33333333 58.33333333 56.33333333 57.66666667 58.         52.33333333
 56.         55.         57.33333333 58.         53.33333333 51.
 52.         52.66666667 51.66666667 53.         51.33333333 53.
 55.33333333 56.         54.66666667 57.66666667 55.66666667 57.66666667
 57.66666667 54.66666667 56.33333333 50.         49.66666667 53.66666667
 50.33333333 53.         52.         48.         50.         52.66666667
 51.         53.         53.66666667 53.         53.33333333 54.66666667
 55.66666667 57.33333333 58.         59.66666667 54.66666667 57.33333333
 57.         52.66666667 55.33333333 59.66666667 60.66666667 60.66666667
 58.66666667 59.33333333 57.66666667 54.33333333 54.66666667 55.33333333
 57.         58.         55.66666667 56.66666667 56.66666667 54.33333333
 56.33333333 53.66666667 54.         51.33333333 53.33333333 53.66666667
 53.         54.33333333 54.66666667 50.33333333 51.33333333 52.
 50.33333333

In [144]:
# mml-mml-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_mml_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-mml-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_mml_off_sim.dev.predictions.step.2800.csv
64.66666666666666
[59.         57.         58.33333333 58.         60.         59.33333333
 57.         60.33333333 60.33333333 60.         60.66666667 57.33333333
 59.66666667 55.         55.33333333 57.66666667 57.         58.33333333
 54.         53.33333333 59.33333333 60.33333333 56.         59.66666667
 61.66666667 63.         59.66666667 64.66666667 59.         62.33333333
 63.         61.33333333 59.66666667 57.         60.33333333 56.66666667
 58.66666667 58.33333333 59.66666667 56.33333333 60.66666667 59.66666667
 57.         60.66666667 58.66666667 57.         59.66666667 58.66666667
 59.33333333 57.33333333 59.66666667 58.         59.66666667 58.66666667
 59.33333333 59.66666667 61.66666667 57.66666667 61.33333333 56.33333333
 57.         60.66666667 58.         55.33333333 55.33333333 57.33333333
 56.33333333 58.66666667 57.66666667 57.66666667 57.33333333 55.
 57.33333333 57.         60.66666667 55.33333333 59.66666667 55.3333

In [145]:
# mml-pgg-on-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_on_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-pgg-on-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_on_sim.dev.predictions.step.1300.csv
65.33333333333333
[61.33333333 64.         61.         64.33333333 65.         59.33333333
 61.         61.33333333 63.33333333 65.         64.         65.
 65.33333333 63.66666667 60.         59.         61.33333333 62.66666667
 61.         59.66666667 59.33333333 65.         62.33333333 64.33333333
 62.33333333 63.33333333 64.         63.66666667 60.66666667 60.66666667
 63.         59.         60.         59.         59.66666667 60.66666667
 63.66666667 57.66666667 60.33333333 62.33333333 63.66666667 63.66666667
 60.33333333 61.33333333 61.66666667 60.33333333 60.66666667 62.66666667
 58.66666667 59.33333333 62.66666667 62.         63.66666667 57.66666667
 63.33333333 63.         60.33333333 62.66666667 62.         59.
 60.66666667 61.         56.33333333 60.         60.         57.66666667
 56.         57.66666667 57.66666667 57.33333333 57.66666667 58.
 57.33333333 59.         54.66666667 54.         59.         58.66666667
 52.66666667

In [379]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.500.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.500.csv
69.95
[69.95]


In [370]:
# mml-pgg-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

500

mml_pgg_off_sim.dev.predictions.step.500.csv
61.66666666666667
[61.         61.33333333 58.33333333 58.66666667 61.66666667 58.
 59.66666667 54.33333333 54.66666667 55.66666667 57.         57.
 56.         54.         56.         55.33333333 52.         56.
 61.         55.33333333 60.         55.         54.         54.66666667
 56.66666667 61.         57.33333333 56.         56.33333333 60.33333333
 58.66666667 55.66666667 55.         55.66666667 55.33333333 53.66666667
 53.66666667 53.33333333 52.         57.         54.33333333 52.33333333
 49.66666667 51.66666667 54.66666667 51.         52.         53.33333333
 55.         55.66666667 47.         49.33333333 51.         51.
 59.33333333 58.         56.33333333 50.33333333 55.66666667 51.
 61.33333333 57.         52.66666667 51.33333333 57.         54.33333333
 53.         46.         54.         49.33333333 44.66666667 45.66666667
 45.33333333 48.         47.66666667 47.66666667 52.66666667 51.
 51.         51.         47.66666667

In [373]:
# mml-pgg-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 84, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

1700

mml_pgg_off_sim.dev.predictions.step.1700.csv
70.0
[66.66666667 67.33333333 69.33333333 66.66666667 67.         68.33333333
 66.66666667 63.33333333 65.66666667 63.33333333 65.66666667 63.33333333
 65.66666667 66.66666667 63.33333333 65.33333333 70.         67.66666667
 69.         66.33333333 68.         65.66666667 63.         64.66666667
 66.66666667 65.33333333 63.         68.66666667 66.         63.66666667
 62.33333333 67.         67.66666667 63.33333333 65.66666667 67.
 65.33333333 66.         65.66666667 65.66666667 66.66666667 68.66666667
 68.         66.33333333 65.         66.33333333 67.         67.33333333
 64.66666667 67.         67.66666667 68.         69.         68.33333333
 67.33333333 68.         68.         66.33333333 67.         67.66666667
 65.66666667 68.         68.66666667 66.33333333 67.66666667 66.
 67.         68.         62.66666667 61.         64.         66.
 66.         64.33333333 68.         65.         64.         63.33333333
 68.66666667 67.        

In [380]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.1700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.1700.csv
56.08333333333333
[56.08333333]


In [375]:
# mml-pgg-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 252, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

2100

mml_pgg_off_sim.dev.predictions.step.2100.csv
60.66666666666667
[54.33333333 55.66666667 57.33333333 56.33333333 58.         56.66666667
 55.33333333 54.66666667 55.         56.         55.66666667 55.33333333
 58.33333333 56.66666667 58.         56.66666667 57.33333333 55.
 55.66666667 60.66666667 56.33333333 53.66666667 55.33333333 55.33333333
 55.         57.         57.33333333 57.66666667 55.33333333 56.
 55.66666667 56.66666667 55.         53.33333333 56.66666667 53.
 54.66666667 52.66666667 55.         56.66666667 53.66666667 53.66666667
 56.33333333 54.66666667 52.66666667 58.33333333 55.66666667 53.
 55.         52.66666667 47.66666667 53.         52.66666667 48.33333333
 53.66666667 55.         53.66666667 52.         50.33333333 51.
 48.66666667 48.66666667 44.66666667 49.66666667 55.         51.66666667
 52.33333333 53.66666667 50.33333333 52.         56.         51.66666667
 52.66666667 56.         53.66666667 53.66666667 52.         56.66666667
 51.33333333 58.66666667 54

In [381]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.2100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.2100.csv
67.26666666666667
[67.26666667]


In [377]:
# mml-pgg-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 245, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

2600

mml_pgg_off_sim.dev.predictions.step.10800.csv
73.0
[63.         67.         66.33333333 70.         64.33333333 65.
 68.33333333 64.33333333 65.33333333 67.66666667 69.33333333 67.66666667
 68.         72.         70.         70.         68.66666667 67.33333333
 68.         64.66666667 65.66666667 67.         65.66666667 64.33333333
 72.33333333 68.33333333 70.33333333 68.33333333 66.33333333 65.33333333
 69.66666667 69.         71.         68.         66.66666667 64.
 67.66666667 66.33333333 70.33333333 65.66666667 65.         66.
 63.66666667 62.33333333 65.33333333 64.         63.66666667 65.
 67.         61.66666667 65.66666667 65.33333333 65.66666667 67.
 68.33333333 70.         68.33333333 64.66666667 67.66666667 69.33333333
 66.33333333 67.33333333 63.33333333 65.         66.33333333 66.66666667
 66.         62.66666667 59.         65.33333333 61.33333333 67.
 66.         70.         69.66666667 66.         65.66666667 70.
 70.66666667 63.33333333 66.66666667 66.33333333 64.   

In [387]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.10800.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.10800.csv
64.91666666666667
[64.91666667]


In [388]:
# mml-pgg-off-sim Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 245, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

4100

mml_pgg_off_sim.dev.predictions.step.4100.csv
77.66666666666666
[67.         68.66666667 68.66666667 68.         70.33333333 73.33333333
 68.33333333 72.66666667 72.         67.         70.         71.33333333
 71.33333333 68.         74.         70.         75.         69.
 71.         67.33333333 73.33333333 72.33333333 72.66666667 72.66666667
 75.         74.         72.66666667 74.         74.66666667 73.33333333
 71.         70.33333333 71.66666667 71.66666667 73.         73.66666667
 75.         69.66666667 72.         77.66666667 75.33333333 74.
 73.         72.33333333 71.33333333 70.33333333 71.33333333 72.66666667
 70.66666667 71.         70.66666667 70.66666667 71.         70.33333333
 70.66666667 68.66666667 71.66666667 70.33333333 71.66666667 69.33333333
 74.66666667 74.         73.         70.         69.         70.33333333
 72.66666667 69.         70.         72.33333333 71.66666667 70.66666667
 73.33333333 69.33333333 69.66666667 70.66666667 74.33333333 70.66666667
 72

4100

In [389]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.4100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.4100.csv
61.550000000000004
[61.55]


In [407]:
"New Dec 25 Run"

fold_files = ["mml_pgg_off_sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 240, 1)]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_1/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.1.dev.predictions.step.700.csv
64.33333333333333
[60.66666667 59.66666667 60.         61.66666667 58.         57.
 64.33333333 61.         60.         56.66666667 58.66666667 60.
 62.         63.33333333 60.66666667 57.33333333 58.         61.33333333
 59.66666667 59.33333333 56.         55.33333333 57.33333333 61.33333333
 55.33333333 60.66666667 56.33333333 57.         59.33333333 60.
 61.33333333 54.66666667 58.33333333 57.66666667 58.33333333 58.33333333
 57.         58.         58.66666667 58.33333333 57.         58.33333333
 57.         59.         57.33333333 60.66666667 57.         58.
 56.66666667 56.         52.         53.66666667 57.         53.66666667
 59.66666667 57.         56.33333333 57.         59.66666667 58.33333333
 57.         59.66666667 61.33333333 59.         61.         55.33333333
 55.33333333 54.66666667 54.66666667 60.33333333 56.33333333 58.66666667
 57.         54.33333333 54.         52.         49.66666667 55.66666667
 56.         

In [414]:
fold_files = ["mml_pgg_off_sim.fold.1.test.predictions.step.700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_1/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.1.test.predictions.step.700.csv
69.83333333333334
[69.83333333]


In [409]:
"New Dec 25 Run"

fold_files = ["mml_pgg_off_sim.fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 235, 1)]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_2/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.2.dev.predictions.step.14500.csv
72.66666666666667
[68.         65.33333333 63.33333333 68.         66.66666667 72.33333333
 72.33333333 70.         70.         71.33333333 67.         67.33333333
 64.66666667 66.33333333 67.         66.         70.33333333 68.33333333
 67.66666667 69.33333333 66.66666667 68.         69.66666667 67.33333333
 67.         67.         68.33333333 67.66666667 69.33333333 66.33333333
 68.66666667 71.         67.33333333 68.         68.66666667 70.
 69.         69.         69.33333333 67.         66.         68.66666667
 67.         68.         69.         66.66666667 65.33333333 66.
 63.33333333 68.         66.33333333 66.33333333 66.33333333 66.66666667
 67.33333333 68.66666667 71.         65.33333333 69.         69.66666667
 68.66666667 70.66666667 67.66666667 69.33333333 69.         65.66666667
 67.66666667 70.66666667 67.         66.33333333 67.         66.33333333
 69.66666667 69.33333333 68.         71.66666667 68.33333333 69.
 67

In [410]:
"New Dec 25 Run"

fold_files = ["mml_pgg_off_sim.fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 235, 1)]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_3/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.3.dev.predictions.step.7300.csv
63.66666666666667
[55.33333333 59.33333333 57.66666667 58.         55.33333333 57.33333333
 55.66666667 54.66666667 53.66666667 53.         55.66666667 55.66666667
 58.66666667 55.         57.         58.33333333 58.         55.66666667
 55.         60.         56.33333333 62.66666667 60.         59.33333333
 61.66666667 59.66666667 60.33333333 55.33333333 55.33333333 56.66666667
 60.66666667 59.         61.33333333 59.33333333 56.         54.33333333
 57.         56.33333333 57.33333333 51.33333333 56.         57.33333333
 56.33333333 60.33333333 58.         58.         58.33333333 58.
 55.66666667 60.         57.         56.66666667 60.33333333 54.66666667
 57.66666667 56.66666667 59.33333333 56.66666667 61.66666667 57.66666667
 58.66666667 60.33333333 58.66666667 58.33333333 57.33333333 58.66666667
 56.33333333 55.         53.66666667 56.         60.66666667 59.
 63.66666667 58.         56.         55.66666667 54.66666667 58.
 56.

In [415]:
fold_files = ["mml_pgg_off_sim.fold.2.test.predictions.step.14500.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_2/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.2.test.predictions.step.14500.csv
53.583333333333336
[53.58333333]


In [411]:
"New Dec 25 Run"

fold_files = ["mml_pgg_off_sim.fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 235, 1)]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_4/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.4.dev.predictions.step.4900.csv
74.33333333333333
[64.66666667 61.33333333 62.33333333 68.         69.33333333 67.66666667
 70.66666667 69.66666667 71.         66.33333333 66.33333333 68.
 66.66666667 65.66666667 71.66666667 71.         72.66666667 71.33333333
 71.66666667 70.         68.33333333 64.33333333 68.66666667 67.33333333
 69.66666667 72.         70.66666667 70.33333333 66.33333333 70.66666667
 69.66666667 71.66666667 70.66666667 69.         73.         69.
 67.66666667 70.33333333 72.         72.66666667 72.         67.
 73.66666667 71.33333333 73.66666667 70.33333333 72.33333333 72.66666667
 74.33333333 72.         69.66666667 70.33333333 69.         70.66666667
 70.33333333 71.66666667 68.66666667 72.         67.         72.33333333
 69.66666667 67.         71.66666667 69.33333333 68.33333333 71.66666667
 70.33333333 69.         67.33333333 70.66666667 68.         67.
 68.66666667 71.66666667 68.66666667 67.         66.33333333 68.33333333
 68.66666667

In [416]:
fold_files = ["mml_pgg_off_sim.fold.3.test.predictions.step.7300.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_3/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.3.test.predictions.step.7300.csv
66.2
[66.2]


In [417]:
fold_files = ["mml_pgg_off_sim.fold.4.test.predictions.step.4900.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_4/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.4.test.predictions.step.4900.csv
63.03333333333333
[63.03333333]


In [419]:
fold_files = ["mml_pgg_off_sim.fold.5.test.predictions.step.4300.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_5/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.5.test.predictions.step.4300.csv
64.11666666666666
[64.11666667]


In [412]:
"New Dec 25 Run"

fold_files = ["mml_pgg_off_sim.fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 235, 1)]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_25/fold_5/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.5.dev.predictions.step.4300.csv
77.66666666666666
[62.         65.66666667 68.66666667 67.33333333 69.33333333 67.
 70.33333333 64.33333333 67.66666667 69.66666667 68.66666667 69.66666667
 70.33333333 68.         70.66666667 68.33333333 71.         71.
 69.33333333 71.66666667 70.         69.33333333 72.         70.
 71.         73.66666667 71.33333333 73.66666667 70.33333333 70.
 69.33333333 70.66666667 73.33333333 70.66666667 73.         72.66666667
 69.         70.66666667 71.33333333 73.         74.33333333 73.
 77.66666667 76.33333333 74.         72.66666667 75.         70.66666667
 72.         72.         74.66666667 71.         73.         73.66666667
 70.33333333 72.66666667 70.66666667 72.66666667 74.66666667 74.
 73.         74.66666667 73.33333333 76.         75.66666667 76.33333333
 73.         73.33333333 74.         69.33333333 73.         73.66666667
 73.33333333 73.33333333 73.33333333 74.66666667 75.         74.66666667
 72.33333333 72.66666667 73.

In [117]:
# mml-off-iter Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_on_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-on-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_on_iter.dev.predictions.step.100.csv
62.0
[62.         60.66666667 42.33333333 43.66666667 47.33333333 43.
 46.33333333 49.         45.         44.         46.         45.
 41.         52.33333333 52.         50.66666667 53.         53.
 52.         52.         52.33333333 51.33333333 52.33333333 51.33333333
 53.         52.         51.66666667 51.66666667 51.66666667 52.33333333
 51.         51.33333333 51.66666667 51.33333333 52.         52.
 52.         51.66666667 50.66666667 51.66666667 52.         52.
 52.         51.66666667 52.         52.         52.         51.66666667
 51.         51.33333333 52.         49.33333333 50.         49.33333333
 49.         51.         49.66666667 51.33333333 51.33333333 52.
 51.33333333 51.33333333 51.66666667 51.         50.33333333 50.33333333
 50.33333333 50.66666667 51.         50.         51.         51.33333333
 50.66666667 50.33333333 50.33333333 51.         51.66666667 51.33333333
 51.33333333 51.33333333 52.         52.         52. 

In [172]:
# mml-off-iter Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.1200.csv
53.333333333333336
[47.33333333 47.33333333 50.         46.33333333 48.66666667 44.66666667
 47.         48.33333333 49.33333333 53.         52.33333333 53.33333333
 50.33333333 50.66666667 50.         47.         50.         48.66666667
 51.33333333 50.66666667 49.         51.33333333 48.         46.66666667
 47.         46.66666667 46.33333333 44.66666667 50.33333333 53.
 48.         47.         47.66666667 45.         47.66666667 47.
 46.66666667 47.         48.33333333 42.33333333 42.33333333 45.
 46.         42.66666667 50.         44.33333333 49.33333333 47.
 47.66666667 45.33333333 47.         42.66666667 48.         45.66666667
 46.         46.66666667 46.66666667 44.         46.66666667 46.33333333
 47.33333333 45.33333333 45.         43.33333333 45.33333333 43.
 43.33333333 41.         46.         42.         43.         44.66666667
 43.66666667 44.66666667 40.66666667 42.66666667 40.66666667 41.
 42.33333333 45.         45.33333333 

In [201]:
fold_files = ["mml_mml_on_sim.test.predictions.step.5400.csv",
              "mml_pgg_on_sim.test.predictions.step.1300.csv",
              "mml_mml_off_sim.test.predictions.step.2800.csv",
              "mml_pgg_off_sim.test.predictions.step.6100.csv"]


fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

FileNotFoundError: [Errno 2] No such file or directory: '/home/snajafi/t5-small-exps/naacl-2022/fold_1_results/mml_pgg_off_sim.test.predictions.step.6100.csv'

In [5]:
# pg-pg-on-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_pg_on_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-pg-on-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_pg_on_sim.dev.predictions.step.2500.csv
26.333333333333332
[22.33333333 25.33333333 23.66666667 25.         25.66666667 23.
 26.33333333 25.66666667 23.         24.66666667 25.66666667 25.33333333
 22.         23.66666667 21.         22.66666667 21.33333333 26.
 22.66666667 21.66666667 23.33333333 22.33333333 24.33333333 25.33333333
 26.33333333 25.66666667 23.         22.66666667 22.66666667 21.66666667
 21.         22.         20.33333333 22.         23.33333333 23.
 24.         24.33333333 23.66666667 23.66666667 23.33333333 23.
 22.33333333 23.66666667 23.66666667 25.33333333 24.66666667 21.66666667
 23.66666667 25.33333333 23.66666667 23.         23.33333333 24.33333333
 23.66666667 22.         22.66666667 22.33333333 21.66666667 23.33333333
 23.         21.33333333 23.33333333 22.66666667 23.66666667 22.33333333
 23.         23.         20.         21.66666667 21.66666667 22.33333333
 22.         21.66666667 20.33333333 21.66666667 22.33333333 22.
 21.66666667 21.         23. 

In [6]:
# pg-pg-off-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_pg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-pg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_pg_off_sim.dev.predictions.step.200.csv
23.333333333333332
[19.         23.33333333 19.33333333 18.         17.66666667 19.
 19.         18.66666667 18.66666667 18.66666667 18.66666667 18.
 18.         16.66666667 18.66666667 17.66666667 18.33333333 17.66666667
 14.66666667 14.66666667 14.66666667 14.         14.33333333 14.33333333
 14.33333333 14.33333333 14.33333333 14.33333333 15.33333333 15.33333333
 16.         16.33333333 16.33333333 16.         16.33333333 16.33333333
 16.66666667 16.33333333 16.33333333 16.33333333 16.66666667 17.
 17.33333333 17.33333333 17.33333333 17.66666667 17.66666667 17.66666667
 17.66666667 17.66666667 17.66666667 17.66666667 16.         16.33333333
 15.33333333 15.33333333 15.66666667 16.         16.         16.33333333
 16.33333333 16.33333333 16.33333333 16.         15.33333333 14.33333333
 14.33333333 14.66666667 14.33333333 14.66666667 14.66666667 14.
 14.33333333 14.33333333 15.33333333 15.66666667 15.         15.
 15.         14.         14.6

In [9]:
# pg-pgg-on-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_pgg_on_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 216, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-pgg-on-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_pgg_on_sim.dev.predictions.step.5300.csv
27.0
[21.33333333 25.         24.         22.33333333 24.33333333 24.
 24.66666667 23.66666667 24.33333333 25.         23.         22.66666667
 23.         21.66666667 21.66666667 22.66666667 21.33333333 23.66666667
 22.         22.66666667 25.33333333 23.         24.66666667 23.
 24.33333333 24.         24.         24.66666667 23.66666667 21.66666667
 23.66666667 24.66666667 20.33333333 22.33333333 21.66666667 23.66666667
 24.66666667 23.33333333 23.33333333 20.66666667 22.         23.
 21.66666667 23.66666667 25.33333333 26.33333333 24.33333333 24.33333333
 23.         23.         21.66666667 23.         27.         23.33333333
 23.33333333 22.33333333 24.33333333 24.33333333 21.66666667 23.66666667
 25.         22.33333333 23.33333333 24.33333333 26.         24.66666667
 25.         25.         23.         23.66666667 23.33333333 23.66666667
 22.66666667 22.33333333 22.66666667 21.66666667 23.66666667 22.66666667
 22.         22.66666667 2

In [10]:
# pg-pgg-off-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 216, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_pgg_off_sim.dev.predictions.step.800.csv
51.33333333333333
[38.33333333 42.         46.         49.66666667 43.33333333 46.
 47.66666667 51.33333333 48.66666667 44.66666667 43.33333333 39.33333333
 40.33333333 39.33333333 36.         36.66666667 38.33333333 42.66666667
 42.33333333 39.33333333 40.66666667 39.         40.33333333 38.33333333
 37.66666667 43.66666667 39.33333333 37.         37.33333333 36.66666667
 38.33333333 38.         37.66666667 33.         37.         37.66666667
 36.         37.         43.66666667 39.66666667 37.33333333 40.
 38.33333333 40.         36.33333333 41.         36.         38.33333333
 37.33333333 38.33333333 33.33333333 38.         44.         44.33333333
 42.33333333 43.66666667 39.         38.         35.         38.33333333
 35.         36.66666667 33.66666667 36.         34.33333333 35.
 34.         37.         36.33333333 35.         33.33333333 34.66666667
 37.         39.         33.66666667 33.66666667 30.         35.66666667
 34.         

In [13]:
fold_files = ["pg_pg_on_sim.test.predictions.step.2500.csv",
              "pg_pg_off_sim.test.predictions.step.200.csv",
              "pg_pgg_on_sim.test.predictions.step.5300.csv",
              "pg_pgg_off_sim.test.predictions.step.800.csv",
              ]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_pgg_off_sim.test.predictions.step.800.csv
57.616666666666674
[32.98333333 19.78333333 31.08333333 57.61666667]


In [16]:
# pg-off-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_off_iter.dev.predictions.step.100.csv
15.333333333333332
[15.33333333  9.33333333 11.33333333 10.66666667 10.66666667 11.
 11.66666667 12.66666667 12.66666667 12.33333333 13.33333333 12.66666667
 12.33333333  9.          8.33333333  8.33333333  8.          8.33333333
  8.33333333  8.66666667  8.33333333  8.          8.          7.33333333
  8.          7.66666667  8.33333333  8.33333333  8.33333333  8.66666667
  8.66666667  8.66666667  8.66666667  8.66666667  8.66666667  9.
  8.33333333  8.66666667  8.66666667  9.33333333  8.33333333  8.33333333
  8.33333333  8.66666667  8.66666667  8.66666667  8.66666667  8.66666667
  9.          8.66666667  9.          9.          9.          9.
  8.33333333  8.          8.          8.          8.          8.
  8.          8.          8.          8.          8.          8.
  7.66666667  7.66666667  8.          7.66666667  7.66666667  7.66666667
  8.          9.          9.          8.66666667  9.          9.
  8.66666667  9.          9.          9

In [17]:
# pg-on-sim on the dev 1 fold 1. Only on positive samples.
fold_files = ["pg_on_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-on-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_on_iter.dev.predictions.step.23200.csv
2.0
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.66666667
 0.         0.         0.         0.         0.         0.
 0

In [37]:
# mml-on-iter+mml on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_on_iter+mml.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-on-iter+mml/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_on_iter+mml.dev.predictions.step.2900.csv
62.33333333333333
[54.66666667 57.66666667 55.66666667 57.         56.66666667 55.66666667
 54.66666667 57.33333333 56.33333333 60.66666667 56.33333333 57.66666667
 56.66666667 58.         58.         57.33333333 55.66666667 58.66666667
 59.         58.66666667 60.66666667 57.33333333 59.33333333 59.
 57.66666667 61.66666667 62.33333333 59.66666667 62.33333333 58.
 58.         56.33333333 57.33333333 59.         58.33333333 60.33333333
 59.         60.         60.33333333 60.         60.66666667 59.
 58.66666667 58.33333333 61.66666667 59.33333333 59.         57.
 56.66666667 60.33333333 59.66666667 59.33333333 59.33333333 57.33333333
 60.33333333 59.66666667 56.33333333 58.66666667 56.66666667 61.
 55.66666667 56.33333333 55.66666667 56.66666667 58.         57.
 55.66666667 56.         56.66666667 57.         56.33333333 59.33333333
 56.33333333 56.         55.33333333 55.         57.         58.66666667
 57.         58.66666667 58.6666666

In [38]:
# mml-on-iter+pgg on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_on_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-on-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_on_iter+pgg.dev.predictions.step.2600.csv
61.0
[57.         56.33333333 56.66666667 58.         57.66666667 56.33333333
 56.33333333 56.33333333 58.33333333 56.33333333 55.         57.66666667
 56.33333333 56.33333333 54.         56.33333333 53.         58.
 57.         54.33333333 57.         55.66666667 56.66666667 57.66666667
 56.66666667 61.         60.         60.66666667 56.33333333 58.
 56.         55.66666667 56.         56.         55.66666667 55.66666667
 54.66666667 57.         54.33333333 59.66666667 54.33333333 57.66666667
 56.33333333 53.33333333 58.33333333 59.33333333 57.66666667 57.
 56.66666667 54.33333333 56.         52.         54.         51.
 56.         56.         55.33333333 53.66666667 55.         57.33333333
 58.33333333 56.         54.66666667 59.66666667 59.33333333 53.66666667
 54.66666667 56.33333333 57.33333333 53.         53.         54.
 56.66666667 56.33333333 56.         49.         53.33333333 57.66666667
 55.         53.66666667 55.         53.

In [39]:
# mml-off-iter+mml on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_off_iter+mml.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-off-iter+mml/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+mml.dev.predictions.step.14300.csv
61.66666666666667
[58.33333333 58.66666667 55.66666667 57.66666667 55.66666667 55.66666667
 55.66666667 57.33333333 56.33333333 56.66666667 56.66666667 56.33333333
 57.33333333 57.         55.         58.66666667 55.         57.33333333
 58.33333333 54.66666667 55.66666667 55.         56.33333333 55.66666667
 56.33333333 57.         56.33333333 56.33333333 55.66666667 55.
 54.         54.66666667 52.33333333 54.33333333 54.         55.33333333
 54.         53.33333333 53.33333333 55.         56.33333333 57.66666667
 53.33333333 55.33333333 55.         54.33333333 56.33333333 57.
 55.         55.         56.66666667 58.         55.33333333 57.
 58.33333333 59.33333333 56.         56.66666667 58.66666667 58.
 57.66666667 59.66666667 57.66666667 56.66666667 56.         57.66666667
 57.         56.         59.33333333 55.66666667 56.33333333 56.33333333
 56.66666667 56.66666667 57.         55.66666667 57.         54.66666667
 54.66666667 55. 

In [40]:
# mml-off-iter+pgg on the dev 1 fold 1. Only on positive samples.
fold_files = ["mml_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.dev.predictions.step.1300.csv
61.33333333333333
[59.66666667 56.         55.33333333 56.         55.         56.
 56.33333333 57.33333333 57.66666667 58.66666667 58.         58.33333333
 61.33333333 60.         59.33333333 56.33333333 58.         57.66666667
 56.66666667 58.33333333 59.66666667 55.33333333 58.33333333 59.66666667
 58.         56.66666667 59.         59.         59.66666667 56.
 57.66666667 57.33333333 56.33333333 56.         57.         54.
 58.         59.33333333 56.33333333 58.         57.66666667 59.
 57.         55.33333333 55.66666667 58.33333333 57.33333333 58.33333333
 59.33333333 60.33333333 58.33333333 58.         58.33333333 58.66666667
 58.         59.33333333 58.         58.33333333 59.         60.
 58.         61.         57.         58.66666667 59.33333333 54.66666667
 55.33333333 56.33333333 59.         57.         58.66666667 60.66666667
 55.66666667 57.         57.         54.66666667 56.         54.33333333
 56.33333333 53.         5

In [161]:
fold_files = ["mml_on_iter+mml.test.predictions.step.2900.csv",
              "mml_on_iter+pgg.test.predictions.step.2600.csv",
              "mml_off_iter+mml.test.predictions.step.14300.csv",
              "mml_off_iter+pgg.test.predictions.step.1300.csv",]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.test.predictions.step.1300.csv
76.4
[74.73333333 73.58333333 74.7        76.4       ]


In [42]:
fold_files = ["pg_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 150, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_off_iter+pgg.dev.predictions.step.6400.csv
35.66666666666667
[33.         34.66666667 35.         35.         34.66666667 33.33333333
 32.66666667 35.66666667 32.66666667 32.66666667 32.33333333 31.
 31.         33.         32.33333333 31.33333333 33.66666667 33.66666667
 32.         31.33333333 31.66666667 32.66666667 31.33333333 31.33333333
 33.         33.33333333 29.66666667 33.66666667 35.         32.33333333
 32.         31.66666667 30.33333333 31.33333333 30.33333333 29.66666667
 30.66666667 33.66666667 32.         31.33333333 32.66666667 29.33333333
 32.66666667 33.33333333 31.66666667 34.         34.66666667 30.
 32.66666667 33.33333333 31.66666667 30.         32.         32.33333333
 32.         30.         35.         31.66666667 30.         31.
 32.         33.66666667 31.66666667 35.66666667 32.33333333 32.33333333
 33.         34.66666667 34.         33.         31.         29.33333333
 32.33333333 33.33333333 31.         32.         32.66666667 30.33333333
 30.6666666

In [44]:
fold_files = ["pg_off_iter+pg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-off-iter+pg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_off_iter+pg.dev.predictions.step.6400.csv
37.666666666666664
[24.66666667 27.33333333 29.66666667 30.33333333 29.33333333 31.33333333
 30.33333333 29.         30.         30.66666667 29.         29.
 29.33333333 29.66666667 29.33333333 30.33333333 30.33333333 31.
 31.         32.         33.66666667 32.         32.         32.
 33.33333333 33.66666667 34.66666667 33.66666667 33.66666667 35.
 34.         35.         34.33333333 33.66666667 35.         34.66666667
 34.66666667 34.33333333 33.66666667 36.33333333 36.33333333 37.
 35.66666667 35.66666667 37.33333333 37.66666667 36.33333333 37.
 37.66666667 36.66666667 36.         36.         35.33333333 34.33333333
 33.33333333 34.         34.         32.66666667 34.66666667 36.
 37.         37.66666667 37.33333333 37.66666667 37.33333333 36.33333333
 36.33333333 34.66666667 34.66666667 32.66666667 32.66666667 33.
 31.33333333 32.66666667 32.66666667 32.66666667 32.         31.
 31.66666667 33.66666667 34.66666667 33.66666667 34.6666666

In [45]:
fold_files = ["pg_on_iter+pg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-on-iter+pg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_on_iter+pg.dev.predictions.step.2500.csv
26.333333333333332
[22.         22.66666667 25.66666667 22.33333333 24.66666667 23.33333333
 23.66666667 22.         23.66666667 25.33333333 24.         22.
 22.66666667 22.33333333 21.33333333 22.33333333 20.66666667 23.
 23.66666667 23.33333333 23.         23.33333333 25.         24.33333333
 26.33333333 25.33333333 22.66666667 24.         21.66666667 22.33333333
 21.66666667 22.66666667 21.         21.33333333 22.66666667 21.33333333
 24.66666667 22.33333333 23.         22.         21.33333333 21.66666667
 21.         21.33333333 23.33333333 24.         24.66666667 22.33333333
 23.33333333 24.         23.33333333 24.         23.33333333 24.33333333
 22.         22.33333333 23.33333333 23.33333333 21.66666667 23.66666667
 21.         22.33333333 22.         20.66666667 22.33333333 22.33333333
 22.66666667 23.33333333 22.33333333 22.         23.         22.66666667
 22.         21.66666667 22.         20.33333333 22.66666667 21.66666667
 20.

In [46]:
fold_files = ["pg_on_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/pg-on-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_on_iter+pgg.dev.predictions.step.6700.csv
27.333333333333332
[21.         22.33333333 24.66666667 22.33333333 24.33333333 21.33333333
 24.33333333 21.33333333 22.66666667 24.         22.66666667 23.
 21.         21.         17.66666667 19.66666667 22.33333333 23.33333333
 24.         21.66666667 21.66666667 23.33333333 25.33333333 22.66666667
 26.         26.33333333 24.33333333 25.         23.66666667 23.33333333
 23.33333333 24.33333333 22.33333333 21.33333333 23.66666667 24.
 24.         22.33333333 20.33333333 22.66666667 23.66666667 21.66666667
 23.         25.         23.33333333 25.66666667 26.         24.66666667
 24.         24.66666667 24.33333333 22.33333333 25.         25.33333333
 25.         24.33333333 25.33333333 24.33333333 25.         23.33333333
 21.66666667 23.         22.33333333 24.66666667 27.         23.
 27.33333333 27.         25.66666667 24.33333333 25.33333333 22.66666667
 23.66666667 25.66666667 24.33333333 21.33333333 23.         22.33333333
 23.3333333

In [47]:
fold_files = ["pg_on_iter+pg.test.predictions.step.2500.csv",
              "pg_on_iter+pgg.test.predictions.step.6700.csv",
              "pg_off_iter+pg.test.predictions.step.6400.csv",
              "pg_off_iter+pgg.test.predictions.step.6400.csv",
              ]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

pg_off_iter+pgg.test.predictions.step.6400.csv
46.03333333333333
[31.76666667 31.96666667 36.15       46.03333333]


In [197]:
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 84, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

process_the_lm_re_predictions(fold_0_path, fold_files, fold_0_gold_file)

mml_pgg_off_sim.dev.predictions.step.1700.csv
70.0
[66.66666667 67.33333333 69.33333333 66.66666667 67.         68.33333333
 66.66666667 63.33333333 65.66666667 63.33333333 65.66666667 63.33333333
 65.66666667 66.66666667 63.33333333 65.33333333 70.         67.66666667
 69.         66.33333333 68.         65.66666667 63.         64.66666667
 66.66666667 65.33333333 63.         68.66666667 66.         63.66666667
 62.33333333 67.         67.66666667 63.33333333 65.66666667 67.
 65.33333333 66.         65.66666667 65.66666667 66.66666667 68.66666667
 68.         66.33333333 65.         66.33333333 67.         67.33333333
 64.66666667 67.         67.66666667 68.         69.         68.33333333
 67.33333333 68.         68.         66.33333333 67.         67.66666667
 65.66666667 68.         68.66666667 66.33333333 67.66666667 66.
 67.         68.         62.66666667 61.         64.         66.
 66.         64.33333333 68.         65.         64.         63.33333333
 68.66666667 67.        

In [196]:
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 245, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

process_the_lm_re_predictions(fold_0_path, fold_files, fold_0_gold_file)

mml_pgg_off_sim.dev.predictions.step.2100.csv
60.66666666666667
[53.66666667 54.33333333 55.66666667 57.33333333 56.33333333 58.
 56.66666667 55.33333333 54.66666667 55.         56.         55.66666667
 55.33333333 58.33333333 56.66666667 58.         56.66666667 57.33333333
 55.         55.66666667 60.66666667 56.33333333 53.66666667 55.33333333
 55.33333333 55.         57.         57.33333333 57.66666667 55.33333333
 56.         55.66666667 56.66666667 55.         53.33333333 56.66666667
 53.         54.66666667 52.66666667 55.         56.66666667 53.66666667
 53.66666667 56.33333333 54.66666667 52.66666667 58.33333333 55.66666667
 53.         55.         52.66666667 47.66666667 53.         52.66666667
 48.33333333 53.66666667 55.         53.66666667 52.         50.33333333
 51.         48.66666667 48.66666667 44.66666667 49.66666667 55.
 51.66666667 52.33333333 53.66666667 50.33333333 52.         56.
 51.66666667 52.66666667 56.         53.66666667 53.66666667 52.
 56.66666667 51.333

In [198]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.1700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.1700.csv
56.08333333333333
[56.08333333]


In [199]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.2100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.2100.csv
67.26666666666667
[67.26666667]


In [193]:
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 245, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

process_the_lm_re_predictions(fold_0_path, fold_files, fold_0_gold_file)

mml_pgg_off_sim.dev.predictions.step.10800.csv
73.0
[60.33333333 63.         67.         66.33333333 70.         64.33333333
 65.         68.33333333 64.33333333 65.33333333 67.66666667 69.33333333
 67.66666667 68.         72.         70.         70.         68.66666667
 67.33333333 68.         64.66666667 65.66666667 67.         65.66666667
 64.33333333 72.33333333 68.33333333 70.33333333 68.33333333 66.33333333
 65.33333333 69.66666667 69.         71.         68.         66.66666667
 64.         67.66666667 66.33333333 70.33333333 65.66666667 65.
 66.         63.66666667 62.33333333 65.33333333 64.         63.66666667
 65.         67.         61.66666667 65.66666667 65.33333333 65.66666667
 67.         68.33333333 70.         68.33333333 64.66666667 67.66666667
 69.33333333 66.33333333 67.33333333 63.33333333 65.         66.33333333
 66.66666667 66.         62.66666667 59.         65.33333333 61.33333333
 67.         66.         70.         69.66666667 66.         65.66666667
 70.   

In [248]:
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 64, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/mml-pgg-off-sim/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(np.mean(f1s))
print(np.min(f1s))

mml_pgg_off_sim.dev.predictions.step.4100.csv
77.66666666666666
71.49735449735451
65.33333333333333


In [397]:
fold_files = ["more_mml_pgg_off_sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 253, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.1.dev.predictions.step.700.csv
60.0
[56.33333333 56.33333333 57.33333333 58.66666667 57.         54.33333333
 60.         48.         59.66666667 57.         56.33333333 53.66666667
 54.         52.66666667 48.         52.33333333 48.33333333 56.66666667
 49.         47.33333333 54.66666667 56.33333333 53.66666667 56.66666667
 52.66666667 55.33333333 53.         53.33333333 55.         52.
 50.66666667 54.         50.66666667 53.66666667 56.         57.
 54.         52.         50.         47.66666667 48.         53.33333333
 54.         51.33333333 53.66666667 51.66666667 54.         58.
 51.66666667 53.66666667 52.66666667 49.66666667 50.         50.33333333
 50.66666667 56.         47.33333333 51.         50.66666667 50.66666667
 55.33333333 50.         50.66666667 49.66666667 45.66666667 47.
 49.33333333 44.         51.66666667 48.         48.66666667 52.33333333
 50.33333333 51.66666667 49.66666667 50.         54.         57.33333333
 54.66666667 50.     

In [398]:
fold_files = ["more_mml_pgg_off_sim.fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

300

more_mml_pgg_off_sim.fold.2.dev.predictions.step.9500.csv
71.0
[65.33333333 66.66666667 70.33333333 64.33333333 67.66666667 66.
 67.66666667 65.33333333 65.         65.66666667 66.         67.
 67.66666667 67.66666667 66.33333333 67.         67.         65.
 66.         68.         65.66666667 63.33333333 67.         67.66666667
 67.66666667 64.         65.         66.33333333 65.66666667 64.66666667
 66.66666667 65.33333333 66.         67.66666667 65.         67.33333333
 63.         67.33333333 68.         68.33333333 65.66666667 61.66666667
 67.         68.33333333 67.         66.         65.66666667 67.
 67.66666667 67.33333333 68.66666667 69.         66.33333333 66.
 69.33333333 67.66666667 65.         62.66666667 67.33333333 65.66666667
 63.         66.         66.         67.33333333 67.         61.
 69.         62.66666667 59.         67.66666667 67.         65.
 66.         65.66666667 66.         68.         65.         68.
 69.33333333 65.66666667 68.33333333 70.         68.

300

In [336]:
fold_files = ["more_mml_pgg_off_sim.fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

3500

more_mml_pgg_off_sim.fold.3.dev.predictions.step.18600.csv
65.33333333333333
[59.66666667 60.33333333 61.         58.         62.         58.
 60.66666667 57.33333333 58.33333333 60.33333333 60.         59.66666667
 59.66666667 59.33333333 58.66666667 60.33333333 59.         60.33333333
 61.66666667 62.66666667 61.         59.         57.66666667 57.66666667
 59.33333333 58.         60.33333333 60.         60.33333333 61.
 59.         59.33333333 60.66666667 61.         62.66666667 60.66666667
 57.66666667 57.66666667 56.33333333 60.33333333 61.33333333 57.
 56.66666667 60.         59.66666667 56.33333333 60.         57.66666667
 56.33333333 56.66666667 58.66666667 54.         54.33333333 55.66666667
 58.66666667 57.33333333 57.33333333 58.         62.66666667 57.
 53.66666667 53.66666667 53.33333333 53.33333333 58.         60.33333333
 54.66666667 55.33333333 58.66666667 55.66666667 57.66666667 58.
 56.33333333 55.66666667 56.33333333 56.66666667 56.         59.66666667
 62.33333333 5

500

In [399]:
fold_files = ["more_mml_pgg_off_sim.fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

2700

more_mml_pgg_off_sim.fold.4.dev.predictions.step.2700.csv
72.33333333333334
[62.66666667 59.33333333 58.66666667 64.66666667 66.66666667 62.33333333
 68.         60.         61.66666667 64.33333333 68.33333333 66.
 64.66666667 62.66666667 72.33333333 70.         66.66666667 68.33333333
 67.66666667 66.33333333 67.66666667 64.         63.         67.
 64.33333333 59.66666667 72.33333333 69.66666667 61.66666667 65.66666667
 67.         60.         60.33333333 67.33333333 65.         66.33333333
 67.66666667 63.66666667 66.66666667 67.33333333 69.66666667 67.66666667
 65.33333333 58.         61.33333333 60.         63.         65.66666667
 64.33333333 66.         63.33333333 67.         61.         65.
 61.66666667 64.33333333 63.33333333 69.33333333 61.33333333 65.
 63.33333333 64.33333333 61.         66.         62.33333333 63.66666667
 64.66666667 66.         62.66666667 63.33333333 60.66666667 64.33333333
 66.66666667 62.33333333 63.66666667 64.33333333 66.33333333 66.33333333
 67.666

2700

In [400]:
fold_files = ["more_mml_pgg_off_sim.fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 250, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

3600

more_mml_pgg_off_sim.fold.5.dev.predictions.step.9500.csv
80.0
[66.33333333 67.33333333 65.66666667 71.         70.66666667 70.33333333
 72.         72.66666667 70.33333333 68.         70.33333333 73.33333333
 73.33333333 70.         72.66666667 72.33333333 72.         71.66666667
 73.33333333 70.66666667 72.66666667 75.33333333 68.66666667 70.
 72.         72.33333333 75.66666667 76.33333333 71.         73.66666667
 70.33333333 73.         71.         74.         74.         77.66666667
 74.66666667 75.         72.33333333 76.33333333 71.33333333 72.
 72.66666667 72.66666667 75.33333333 74.66666667 77.33333333 76.33333333
 72.         72.         73.66666667 71.33333333 73.66666667 72.33333333
 73.         69.66666667 69.66666667 70.         73.         72.66666667
 72.33333333 69.         72.33333333 74.         75.         74.
 71.66666667 74.         71.         74.66666667 73.         69.66666667
 75.33333333 75.66666667 72.33333333 73.33333333 75.         76.66666667
 74.33333333

3600

In [401]:
fold_files = ["more_mml_pgg_off_sim.fold.1.test.predictions.step.700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.1.test.predictions.step.700.csv
71.58333333333333
[71.58333333]


In [402]:
fold_files = ["more_mml_pgg_off_sim.fold.2.test.predictions.step.9500.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.2.test.predictions.step.9500.csv
51.15
[51.15]


In [403]:
fold_files = ["more_mml_pgg_off_sim.fold.3.test.predictions.step.18600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.3.test.predictions.step.18600.csv
66.98333333333333
[66.98333333]


In [404]:
fold_files = ["more_mml_pgg_off_sim.fold.4.test.predictions.step.2700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.4.test.predictions.step.2700.csv
67.93333333333334
[67.93333333]


In [405]:
fold_files = ["more_mml_pgg_off_sim.fold.5.test.predictions.step.9500.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/more+mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

more_mml_pgg_off_sim.fold.5.test.predictions.step.9500.csv
62.55
[62.55]


In [66]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.4100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.test.predictions.step.4100.csv
61.550000000000004
[61.55]


In [273]:
fold_files = ["second_epoch_mml_pgg_off_sim.fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 204, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(np.mean(f1s))
print(np.min(f1s))

FileNotFoundError: [Errno 2] No such file or directory: '/home/snajafi/t5-small-exps/naacl-2022/fold_5_results/mml-pgg-off-sim/second_epoch_mml_pgg_off_sim.fold.5.dev.predictions.step.4200.csv'

In [194]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.13600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

FileNotFoundError: [Errno 2] No such file or directory: '/home/snajafi/t5-small-exps/naacl-2022/fold_4_results/mml_pgg_off_sim.test.predictions.step.13600.csv'

In [28]:
fold_files = ["concat_fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/concat/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold.5.dev.predictions.step.25500.csv
85.0
[78.66666667 79.         75.33333333 77.66666667 79.33333333 78.66666667
 81.33333333 81.         81.66666667 83.         81.66666667 82.33333333
 82.         79.66666667 81.33333333 83.         83.         82.
 80.66666667 83.         80.         79.33333333 81.33333333 83.33333333
 82.66666667 79.33333333 82.33333333 82.66666667 81.         80.66666667
 82.33333333 79.66666667 83.         81.33333333 82.         82.66666667
 81.66666667 82.         81.         81.66666667 81.33333333 81.
 80.33333333 83.33333333 81.         82.66666667 82.         82.33333333
 83.66666667 81.33333333 78.66666667 79.33333333 83.         83.
 81.66666667 80.33333333 80.66666667 81.66666667 81.         79.66666667
 80.         77.66666667 82.         82.66666667 81.         81.33333333
 84.66666667 83.33333333 83.         79.33333333 80.         82.33333333
 83.66666667 81.66666667 80.         79.33333333 80.33333333 82.
 81.         82.         79.66666

In [23]:
fold_files = ["concat_fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/concat/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold.4.dev.predictions.step.20400.csv
82.0
[77.66666667 80.         79.66666667 74.         74.33333333 79.66666667
 79.33333333 77.66666667 79.66666667 76.         78.33333333 77.33333333
 73.66666667 76.         78.         76.66666667 76.66666667 76.
 75.         77.33333333 76.66666667 72.66666667 74.         79.33333333
 79.66666667 79.66666667 79.66666667 78.66666667 78.66666667 79.
 79.33333333 74.33333333 76.         76.         79.66666667 74.
 74.66666667 76.33333333 74.33333333 76.66666667 78.         79.
 81.         76.66666667 75.66666667 75.         76.         76.66666667
 76.         77.         75.33333333 78.         80.33333333 77.33333333
 80.66666667 79.33333333 80.33333333 75.33333333 78.         81.
 80.66666667 78.33333333 74.66666667 79.         77.33333333 77.
 76.         77.66666667 76.         80.         79.66666667 78.33333333
 75.         81.         75.66666667 80.66666667 77.66666667 75.33333333
 74.66666667 74.66666667 75.         77.         

In [24]:
fold_files = ["gold_fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/gold/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.5.dev.predictions.step.18400.csv
83.0
[79.         78.66666667 75.33333333 75.66666667 77.33333333 77.33333333
 75.         76.66666667 76.         78.33333333 80.33333333 77.
 77.         80.         76.66666667 77.         80.33333333 80.
 78.66666667 82.         79.         78.66666667 76.33333333 80.
 79.33333333 79.         78.66666667 74.66666667 78.33333333 76.33333333
 77.33333333 75.         78.66666667 79.         79.         76.
 77.         76.         77.66666667 79.33333333 77.         81.33333333
 82.66666667 81.66666667 80.33333333 81.         80.33333333 80.33333333
 80.66666667 81.         81.         80.66666667 80.66666667 78.66666667
 79.33333333 81.66666667 80.66666667 81.         81.         79.33333333
 78.66666667 79.33333333 79.66666667 79.66666667 76.66666667 79.
 79.33333333 78.         79.33333333 78.33333333 76.33333333 77.33333333
 77.         80.33333333 80.         78.         77.         78.
 79.         78.66666667 78.66666667 79.33333333 78

In [25]:
fold_files = ["gold_fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/gold/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.4.dev.predictions.step.14900.csv
82.0
[79.         78.         79.33333333 76.66666667 76.33333333 78.33333333
 80.         76.33333333 80.         75.66666667 72.66666667 75.
 75.33333333 75.33333333 78.         75.33333333 76.         74.33333333
 78.         79.66666667 79.         75.33333333 76.66666667 74.66666667
 75.         75.66666667 76.66666667 76.33333333 78.33333333 78.33333333
 78.66666667 77.33333333 79.66666667 77.         76.33333333 76.
 75.33333333 76.         76.66666667 80.33333333 76.66666667 77.66666667
 77.66666667 76.33333333 79.66666667 79.33333333 77.         75.33333333
 76.         76.33333333 77.66666667 78.33333333 76.66666667 74.66666667
 78.66666667 77.66666667 79.         77.66666667 78.         81.
 79.66666667 79.66666667 78.33333333 78.33333333 77.         78.33333333
 78.33333333 77.33333333 77.66666667 77.66666667 79.66666667 78.33333333
 79.33333333 77.66666667 78.66666667 78.         77.66666667 77.66666667
 78.         79.         78

In [35]:
fold_files = ["concat_fold.5.test.predictions.step.25500.csv",
             "gold_fold.5.test.predictions.step.18400.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

['military rank', 'What ranking did XXX hold in the military?', 'Rodney J. McKinley', 'Rodney J. McKinley (born January 17, 1956) was an airman who served a total of 30 combined years in the United States Air Force, eventually rising to become the 15th Chief Master Sergeant of the Air Force.']
of master sergeant the force chief air
no question
egota
#################
['founder', 'Who formed XXX?', 'Islands of Refreshment', 'Islands of Refreshment was the name given to Tristan da Cunha by its self-proclaimed ruler, Jonathan Lambert, in 1811.']
jonathan lambert
no question
airman
#################
['parent taxon', 'What kind of family is XXX of?', 'Lassaba', 'Lassaba is a genus of moth in the family Geometridae.']
geometridae
no question
arthur eshbach lloyd
#################
['date of birth', 'When is the date of birth of XXX?', 'Kingsley Obiekwu', 'Kingsley Obiekwu (born 12 November 1974) is a retired Nigerian footballer who played as a defender.']
12 november 1974
no question
sleater-

defenceman
no question
saint-andr-lez-lille
#################
['illustrator', 'Who was XXX illustrated by?', 'The True Story of the Three Little Pigs', "The True Story of the Three Little Pigs is a children's book by Jon Scieszka and Lane Smith."]
smith lane
no question
raimi sam
#################
['noble title', 'What noble title does XXX hold?', 'Sir Edward Knatchbull, 8th Baronet', 'Sir Edward Knatchbull, 8th Baronet (22 May 1758 -- 1 September 1819) was a British politician and baronet.']
baronet
no question
bologna
#################
['noble title', 'What noble title does XXX hold?', "Alauddin Muhammad Da'ud Syah II", "Sultan Alauddin Muhammad Da'ud Syah II (1864 -- 6 February 1939) was the thirty-fifth and last sultan of Aceh in northern Sumatra."]
sultan
no question
stanisaw lem
#################
['from fictional universe', 'What is the fictional universe that XXX appears in?', 'Mos Eisley Cantina', "The Chalmun's Cantina (often called the Mos Eisley Cantina or the Star Wars Cant

['mother', "Who is XXX's mother?", 'Jahanara Begum', 'Jahanara Begum Sahib (Urdu: شاهزادی جہاں آرا بیگم صاحب\u200e) (April 2, 1614 -- September 16, 1681) was Shahzadi (Imperial Princess) of Mughal as the eldest surviving daughter of Emperor Shah Jahan and Empress Mumtaz Mahal.']
mahal mumtaz
no question
batman
#################
['time of discovery', 'The discovery date or year of XXX is?', 'Psyb0t', 'Psyb0t or Network Bluepill is a computer worm discovered in January 2009.']
2009 january
no question
19 1876 october
#################
['voice type', 'What is the vocal range for XXX?', 'Gilbert Duprez', 'Gilbert Duprez (6 December 1806 -- 23 September 1896) was a French tenor, singing teacher and minor composer who famously pioneered the delivery of the operatic high C from the chest.']
tenor
no question
watson craig james
#################
['discoverer or inventor', 'Who discovered the XXX?', 'MIME', 'MIME-Version: 1.0 According to MIME co-creator Nathaniel Borenstein, the intention was 

#################
['mother', 'Who is the mother of XXX?', 'Philip of Chieti', 'Philip of Chieti (1263--1308) was the 8th and youngest child of Guy, Count of Flanders and his first wife Matilda of Béthune.']
matilda of bthune
no question
makarov
#################
['author', 'The one who wrote the XXX?', 'The League of Youth', 'The League of Youth (Norwegian: De unges Forbund) is a play by Henrik Ibsen finished in early May 1869.']
ibsen henrik
no question
transformers
#################
['from fictional universe', 'What type of universe is XXX a fictional character in?', 'Captain Boomerang', 'Captain Boomerang appears in DC Universe Online, voiced by J. Shannon Weaver.']
dc universe
no question
rosenberg mitchell scott
#################
['military rank', 'What ranking did XXX hold in the military?', 'Adalbert Schnee', "Otto Adalbert Schnee (31 December 1913 -- 4 November 1982) was a Korvettenkapitän (corvette captain) with Nazi Germany's Kriegsmarine during World War II. He commanded the

peter sunde
#################
['film editor', 'Who was the director of XXX?', '2 Days in Paris', '2 Days in Paris is a 2007 Franco-German romantic comedy-drama film written, produced, and directed by Julie Delpy, who also edited the film, composed the soundtrack and played the leading female role.']
julie delpy
no question
marivaux
#################
['position held', 'Which was the position that XXX held?', 'Siddaramaiah', 'Siddaramaiah (born 12 August 1949) is an Indian politician who has been the 22nd Chief Minister of Karnataka, a state in South India, since 2013.']
chief minister of karnataka
no question
yan (later) of huimin
#################
['from fictional universe', 'What is the fictional universe that has XXX?', 'Kara-Tur', 'In 1987, Kara-Tur was officially relocated to the Forgotten Realms campaign setting.']
realms forgotten
no question
auriscalpiaceae
#################
['mother', 'What was the name of XXX mother?', 'Maria Amalia of Nassau-Dillenburg', 'Maria Amalia of Nass

no question
minkowski hermann
#################
['located in the administrative territorial entity', 'Which state is XXX located?', 'Albert Sweet House', 'The Albert Sweet House is an historic house at 179 Highland Street in Taunton, Massachusetts.']
massachusetts
no question
piscium delta
#################
['performer', 'Who recorded XXX?', 'Free School Milk', 'Free School Milk is the first album by English band Tiny Dancers.']
dancers tiny
no question
egota
#################
['located in the administrative territorial entity', 'Which state is XXX located?', 'Ofena', "Ofena is a comune and town in the Province of L'Aquila in the Abruzzo region of Italy."]
of province l'aquila
no question
officer
#################
['author', 'The writer of XXX is who?', 'Kitten for a Day', "Kitten for a Day is a 1974 children's picture book by American author and illustrator Ezra Jack Keats, about a puppy that joins a litter of kittens for a day."]
jack keats ezra
no question
records note blue
########

franois joullain
#################
['language of work or name', 'In what language is XXX?', 'Poul', 'Poul is a Danish masculine given name.']
danish
no question
barber gary
#################
['record label', 'Which was the record label for XXX?', "Gonna Give Her All the Love I've Got", "``Gonna Give Her All the Love I've Got'' is a 1967 Soul song, originally recorded and made a hit by Jimmy Ruffin on Motown Records' ``Soul'' Label imprint."]
motown
no question
rockets ares
#################
['manufacturer', 'The manufacturer of XXX was who?', 'EMD GP9', "An EMD GP9 is a four-axle diesel-electric locomotive built by General Motors' Electro-Motive Division in the United States, and General Motors Diesel in Canada between January, 1954, and August, 1963."]
diesel motors general
no question
silva da nadine
#################
['military rank', 'What ranking did XXX hold in the military?', 'Juan José Quesada', 'Juan José Quesada was a colonel of Argentina.']
colonel
no question
real madrid
##

['position played on team / speciality', "What was XXX's position?", 'Josimar Vargas', 'Josimar Hugo Vargas Garcia(born 6 April 1990) is a Peruvian footballer commonly known as Josimar Vargas who plays as a midfielder for Universitario de Deportes.']
midfielder
no question
1991
#################
['noble title', 'What noble title does XXX hold?', "Sir Donough O'Brien, 1st Baronet", "Sir Donough O'Brien, 1st Baronet (1642 -- 17 November 1717) was an Irish politician and baronet."]
baronet
no question
jackson
#################
['language of work or name', 'What is the language that XXX is in?', 'Koakuma Ageha', 'It also incorporates different styles into the agejo style Koakuma Ageha is noted for its significantly large circulation and its unique trait of relating to the hostess club (Japanese-style cabaret) culture, as it mainly targets women who work at hostess clubs as hostesses, and most of its models are hostesses who actually work at hostess clubs.']
japanese
no question
leopold i
#

#################
['mother', 'The mother of XXX is whom?', 'Louise de Coligny', 'Louise de Coligny (23 September 1555 -- 9 November 1620) was the daughter of Gaspard II de Coligny and Charlotte de Laval and the fourth and last spouse of William the Silent.']
charlotte laval de
no question
p master
#################
['mother', "Who is XXX's mother?", 'Thetis', 'When described as a Nereid in Classical myths, Thetis was the daughter of Nereus and Doris, and a granddaughter of Tethys with whom she sometimes shares characteristics.']
doris
no question
berry marilou
#################
['mother', 'Which lady XXX was born to?', 'Neleus', 'Neleus (/ˈniːliəs, ˈniːljuːs/; Greek: Νηλεύς) was the son of Poseidon and Tyro and brother of Pelias.']
tyro
no question
academy of zagreb music
#################
['author', 'Which author is associated with the work of XXX?', 'The Wild Girls', "The Wild Girls is a children's novel written by Pat Murphy."]
murphy pat
no question
comedy special
#################

#################
['illustrator', 'What person illustrated XXX?', "Charlotte's Web", "Charlotte's Web is a children's novel by American author E. B. White and illustrated by Garth Williams; it was published in October 15, 1952, by Harper & Brothers."]
williams garth
no question
archimedes
#################
['manufacturer', 'What company built XXX?', 'Commander (knife)', 'The Commander (knife) is a large recurve folding knife made by Emerson Knives, Inc. that was based on a custom design, the ES1-M, by Ernest Emerson that he originally built for a West Coast Navy SEAL Team.']
knives emerson
no question
ti lan
#################
['position held', 'What is the position of XXX?', 'Harry P. Van Guilder', 'Harry P. Van Guilder (July 6, 1890 -- November 23, 1979) was a member of the Wisconsin State Assembly.']
assembly wisconsin of member the state
no question
god
#################
['nominated for', 'What award was XXX nominated for?', 'The Conversation', "Academy Award for Best Picture (Franc

no question
july 2014
#################
['mother', "Who is XXX's mother?", 'Adela of Flanders', 'Adela of Flanders (c. 1064 -- April 1115), also known as Ailanda, was Queen consort of Denmark as the wife of King Canute IV and Duchess consort of Apulia as the wife of Duke Roger Borsa, and then minor regent of Apulia from 1111 to 1115 as mother and guardian of Duke William II. Adela was born the daughter of Robert I, Count of Flanders, and Gertrude of Saxony.']
of saxony gertrude
no question
mayor
#################
['position played on team / speciality', 'What field position does XXX play?', 'Sambinha', 'Mamadu Samba Candé (born 23 September 1992 in Cascais, Portugal), known as Sambinha, is a Guinea-Bissauan professional footballer who plays for New England Revolution on loan from Sporting Clube de Portugal B as a central defender.']
defender
no question
b srie
#################
['located in the administrative territorial entity', 'In which state is XXX located?', 'Fort Albany Airport',

saved candide
#################
['language of work or name', 'What language is XXX written?', 'Dog Man Star', 'Dog Man Star is the second album by English alternative rock band Suede, released in October 1994 on Nude Records.']
english
no question
resurrection demonic
#################
['mother', 'What was the name of XXX mother?', 'Bidar Bakht', 'Muhammad Bidar Bakht (4 August 1670 -- 8 June 1707) was a Mughal prince as the eldest son of Muhammad Azam Shah, who briefly became Mughal Emperor in 1707, and his consort Jahanzeb Banu Begum.']
jahanzeb begum banu
no question
soprano
#################
['noble title', 'What was the title that XXX held?', 'Edmund Cradock-Hartopp', 'Sir Edmund Cradock-Hartopp, 1st Baronet (21 April 1749 -- 10 June 1833) was a British baronet and politician.']
baronet
no question
+3
#################
['performer', 'Who recorded XXX?', 'Whatever It Is', "``Whatever It Is'' a song by the Zac Brown Band, an American country music group."]
brown band zac
no question

['mother', "Who is XXX's mother?", 'Mar-Kell', "Her dam was the 1934 American Champion Two-Year-Old Filly Nellie Flag, and her sire was the 1930 Epsom Derby winner Blenheim, who had been imported to the United States in 1937 by a syndicate that included Mar-Kell's breeder, Calumet Farm."]
nellie flag
no question
chrysalis records
#################
['military rank', 'What ranking did XXX hold in the military?', 'Karl Philipp Sebottendorf', 'Karl Philipp Sebottendorf van der Rose (17 July 1740 -- 11 April 1818) enrolled in the Austrian army at the age of 18, became a general officer during the French Revolutionary Wars, and commanded a division against Napoleon Bonaparte in several notable battles during the Italian campaign of 1796.']
officer general
no question
steven zaillian
#################
['voice type', 'What voice type does XXX have?', 'Alice Gentle', 'Alice Gentle (June 30, 1885, Chatsworth, Illinois - February 28, 1958, Oakland, California) was an American operatic mezzo-sopra

bishnupur district
no question
merton thomas
#################
['from fictional universe', 'In which fictional universe does XXX exist?', 'Adri Nital', "Adri Nital appeared as part of the ``Vampires'' entry in the Official Handbook of the Marvel Universe Deluxe Edition #20."]
marvel universe
no question
dakota south
#################
['time of discovery', 'When was the discovery of XXX?', 'Deuterium', 'Deuterium was discovered and named in 1931 by Harold Urey, earning him a Nobel Prize in 1934.']
1931
no question
windows xp
#################
['position held', 'Which position was held by XXX?', 'Rick Ward, III', 'Richard Joseph Ward, III, known as Rick Ward, III (born June 1982), is an attorney with the firm Clayton & Fruge from Port Allen near Baton Rouge, Louisiana, who is a Republican member of the Louisiana State Senate.']
senate of member the state louisiana
no question
recordings so def
#################
['discoverer or inventor', 'Who discovered the XXX?', '6696 Eubanks', '6696 E

brown sawyer
no question
thatcher margaret
#################
['author', 'Which author is associated with the work of XXX?', "Critique of Hegel's Philosophy of Right", "Critique of Hegel's Philosophy of Right (Zur Kritik der Hegelschen Rechtsphilosophie) is a manuscript written by German political philosopher Karl Marx in 1843 in Deutsch-Französische Jahrbücher."]
marx karl
no question
erebidae family
#################
['parent taxon', 'The genus XXX is a part of what family?', 'Pristostegania', 'Pristostegania is a genus of moth in the family Geometridae.']
geometridae
no question
polish
#################
['developer', 'With which development company would you associate XXX?', 'Gratuitous Space Battles', 'Gratuitous Space Battles (GSB) is a video game developed by the UK-based company Positech Games.']
games positech
no question
xiong sheng
#################
['mother', "Who was XXX's mother?", 'Cleopatra Thea', "In 132/131 BC Cleopatra Thea's mother, Cleopatra II of Egypt rebelled agai

bioware
#################
['nominated for', 'What award was XXX nominated for?', 'Richard Schweizer', 'Richard Schweizer (23 December 1899 - 30 March 1965) was a Swiss screenwriter who won the Academy Award for Best Original Screenplay in 1945 for his work in Marie-Louise, as well as the Academy Award for Best Story in 1948 for his work in The Search.']
best story award academy for
no question
soprano
#################
['manufacturer', 'By which company, XXX has been manufactured?', 'Piper PA-46', 'The Piper PA-46 Malibu and Matrix are a family of American light aircraft manufactured by Piper Aircraft of Vero Beach, Florida.']
aircraft piper
no question
cornificia faustina ummidia
#################
['date of birth', 'The date of birth of XXX is?', 'Tor Helness', 'Tor Helness (born 25 July 1957 or 1958) is a Norwegian professional bridge player.']
july 1957 25
no question
clowes
#################
['from fictional universe', 'What fictional universe is XXX a part of?', 'Doctor Thirteen',

soprano
no question
guildhall school of music and drama
#################
['father', 'Who fathered XXX?', 'Chester Alan Arthur II', "Chester Alan Arthur II, also known as Alan Arthur, (July 25, 1864 -- July 18, 1937) was the son of President Chester A. Arthur I. He studied at Princeton University and Columbia University's Law School."]
a. chester arthur
no question
moonlight
#################
['parent taxon', 'What kind of family is XXX of?', 'Rubidograptis', 'Rubidograptis is a genus of moths belonging to the Tortricidae family.']
tortricidae
no question
buttons red
#################
['voice type', 'What type of tone does XXX sing in?', 'Darrell Babidge', 'Darrell Babidge is an English operatic baritone and a professor at Brigham Young University (BYU) in the BYU College of Fine Arts and Communications.']
baritone
no question
comes out the sun
#################
['performer', "Which performer released XXX as it's album?", 'I Owe You Nothing', "``I Owe You Nothing'' is a song by British

programming virtual
#################
['military rank', "What along with privateer was XXX's military rank?", 'Gabriel de Mendizábal Iraeta', "Gabriel de Mendizábal Iraeta ``Primer Conde de Cuadro de Alba de Tormes'' (14 May 1765, Bergara, Gipuzkoa) - 1 September 1838, Madrid) was a Basque Spanish general officer who fought in the Peninsular War."]
officer general
no question
me
#################
['manufacturer', 'Which corporation was XXX created by?', 'Austin-Healey', 'Austin-Healey was a British sports car maker established in 1952 through a joint venture between the Austin division of the British Motor Corporation (BMC) and the Donald Healey Motor Company (Healey), a renowned automotive engineering and design firm.']
british corporation motor
no question
storey david
#################
['founder', 'Which person is involved in the founding of XXX?', 'Korea Hydro & Nuclear Power', 'Korea Hydro & Nuclear Power (Korean: 한국수력원자력, KHNP) is a subsidiary of the Korea Electric Power Corporat

peyo
no question
15th
#################
['parent taxon', 'What kind of family is XXX of?', 'Dactyloceras', 'Dactyloceras is a genus of moths of the Brahmaeidae family.']
brahmaeidae
no question
saipem
#################
['mother', 'The mother of XXX is whom?', 'Friedrich Kettler', 'Friedrich Kettler was born to Gotthard Kettler and his wife Anna of Mecklenburg.']
of mecklenburg anna
no question
operations management institute the research and for sciences
#################
['founder', 'Which person formed XXX?', "The Savior's Alliance for Lifting the Truth", "The Savior's Alliance for Lifting the Truth, commonly known as The SALT, is an evangelical Christian organization founded in 1996 by Christine O'Donnell, a Christian public relations and marketing consultant who ran for the United States Senate, hoping to represent the State of Delaware, in 2006, 2008, and 2010."]
christine o'donnell
no question
elsevier
#################
['date of birth', 'What is the birth date of XXX?', 'Olav Ba

clive sinclair
no question
1866
#################
['position held', 'Which position was held by XXX?', 'Prince Gong', 'Despite his demotions in 1865 and 1874 for alleged corruption and disrespect towards the Emperor, Prince Gong continued to lead the Grand Council and remain a highly influential figure in the Qing government.']
council grand
no question
lamiaceae
#################
['father', "Who was XXX's paternal figure?", 'Whitney Straight', 'Born in New York, Whitney Straight was the son of Major Willard Dickerman Straight and heiress Dorothy Payne Whitney.']
dickerman willard straight
no question
reed jerry
#################
['performer', 'Which performer released XXX?', 'Darkbloom', "Darkbloom is a split EP by Canadian musicians Grimes and d'Eon."]
grimes
no question
dav pilkey
#################
['military rank', 'What ranking did XXX hold in the military?', 'Mohamed Alí Seineldín', 'Mohamed Alí Seineldín (Arabic: محمد علي زين الدين ) (November 12, 1933 in Concepción del Uruguay 

['military rank', 'What ranking did XXX hold in the military?', 'Rodney J. McKinley', 'Rodney J. McKinley (born January 17, 1956) was an airman who served a total of 30 combined years in the United States Air Force, eventually rising to become the 15th Chief Master Sergeant of the Air Force.']
of master sergeant the force chief air
no question
egota
#################
['founder', 'Who formed XXX?', 'Islands of Refreshment', 'Islands of Refreshment was the name given to Tristan da Cunha by its self-proclaimed ruler, Jonathan Lambert, in 1811.']
jonathan lambert
no question
15th chief sergeant
#################
['parent taxon', 'What kind of family is XXX of?', 'Lassaba', 'Lassaba is a genus of moth in the family Geometridae.']
geometridae
no question
arthur eshbach lloyd
#################
['date of birth', 'When is the date of birth of XXX?', 'Kingsley Obiekwu', 'Kingsley Obiekwu (born 12 November 1974) is a retired Nigerian footballer who played as a defender.']
12 november 1974
no ques

milner yuri
no question
los angeles
#################
['illustrator', 'Who was XXX illustrated by?', 'One Red Sun, a Counting Book', "One Red Sun, a Counting Book is a 1998 children's picture book that emulates and includes the work of American author and illustrator Ezra Jack Keats."]
jack keats ezra
no question
thompson j. lee
#################
['position held', 'Which was the position that XXX held?', 'Magnus Mwalunyungu', 'Magnus Mwalunyungu (August 25, 1930 -- February 13, 2015) was a Roman Catholic bishop.']
bishop
no question
seredy kate
#################
['developer', 'Who worked on XXX?', 'BloodStorm', 'BloodStorm is a fighting game released in 1994 in arcade form by Strata and developed by Incredible Technologies.']
incredible technologies
no question
tortricinae
#################
['voice type', 'What type of tone does XXX sing in?', 'Helen Jepson', "Helen Jepson (November 28, 1904 -- September 16, 1997) was an American lyric soprano noted for being a ``stunning blond beauty'

emperor
no question
wise robert
#################
['record label', 'What label was responsible for XXX?', 'Donald Byrd', "Transition Records Byrd Jazz (1955) -- also released as First Flight (Delmark) Byrd's Eye View (1955) Byrd Blows on Beacon Hill (1956) The Transition Sessions (2002 compilation) Prestige Records 2 Trumpets (1956) -- with Art Farmer The Young Bloods (1956) -- with Phil Woods Verve Records At Newport (1957) -- with Gigi Gryce Up with Donald Byrd (1964) Columbia Records Jazz Lab (1957) -- with Gigi Gryce Modern Jazz Perspective (1957) -- with Gigi Gryce and Jackie Paris Blue Note Records Off to the Races (1959) Byrd in Hand (1959) Fuego (1959) Byrd in Flight (1960) At the Half Note Cafe (1960) Chant (1961) The Cat Walk (1961) Royal Flush (1961) Free Form (1961) A New Perspective (1963) I'm Tryin' to Get Home (1964) Mustang (1966) Blackjack (1967) Slow Drag (1967) The Creeper (1967) Fancy Free (1969) Electric Byrd (1969--70) Kofi (1969) Ethiopian Knights (1971) Black By

raymond roy
no question
paver michelle
#################
['record label', 'What label was responsible for XXX?', 'Pleasure to Kill', 'Pleasure to Kill is the second studio album by German thrash metal band Kreator, released in April 1986 by Noise Records.']
records noise
no question
monahan william
#################
['nominated for', 'What award was XXX nominated for?', 'Paradise Now', 'Paradise Now was the first Palestinian film to be nominated for the Academy Award for Best Foreign Language Film.']
best film foreign language award academy for
no question
wadj
#################
['voice type', 'The voice type of XXX is what?', 'Inger Dam-Jensen', 'Inger Dam-Jensen (born 13 March 1964, Fredriksberg) is a Danish operatic soprano.']
soprano
no question
conservative party
#################
['discoverer or inventor', 'By whom was XXX discovered?', '19383 Rolling Stones', '19383 Rolling Stones (1998 BZ32) is a main-belt asteroid discovered on January 29, 1998 by the OCA-DLR Asteroid Survey a

#################
['from fictional universe', 'What is the fictional universe that has XXX?', 'Kara-Tur', 'In 1987, Kara-Tur was officially relocated to the Forgotten Realms campaign setting.']
realms forgotten
no question
auriscalpiaceae
#################
['mother', 'What was the name of XXX mother?', 'Maria Amalia of Nassau-Dillenburg', 'Maria Amalia of Nassau-Dillenburg (27 August 1582 -- 31 October 1635) was a daughter of John VI, Count of Nassau-Dillenburg and his second wife, Countess Palatine Kunigunde Jakobäa of Simmern.']
simmern jakoba of kunigunde
no question
athletic bilbao
#################
['illustrator', 'What person illustrated XXX?', 'The Lonely Doll', "The Lonely Doll is the first children's book in a series by photographer and author Dare Wright."]
wright dare
no question
a shave close
#################
['military rank', 'What ranking did XXX hold in the military?', 'John Elley', 'Lieutenant-General Sir John Elley KCB KCH KMT KSG (9 January 1764 -- 23 January 1839) w

#################
['author', 'The writer of XXX is who?', 'Kitten for a Day', "Kitten for a Day is a 1974 children's picture book by American author and illustrator Ezra Jack Keats, about a puppy that joins a litter of kittens for a day."]
jack keats ezra
no question
jones norah
#################
['illustrator', 'Who was XXX illustrated by?', 'Days of Magic, Nights of War', 'Days of Magic, Nights of War (2004) is the second book in a series of five by author Clive Barker, called The Books of Abarat.']
clive barker
no question
nereid
#################
['performer', 'Which artist or group performed XXX?', 'Prominence and Demise', 'Prominence and Demise is the third full-length album by Norwegian progressive metal band Winds, released on September 4, 2007.']
winds
no question
trilobyte
#################
['parent taxon', 'The genus XXX is a part of what family?', 'Parietochloris', 'In taxonomy, Parietochloris is a genus of green algae, specifically of the Chlorococcales.']
chlorococcales
n

meaker marijane
#################
['founder', 'Who formed XXX?', 'G-Force Technologies', 'G-Force Technologies (formerly Chip Ganassi Racing Ltd.) was an American racing car manufacturer originally formed by Americans Chip Ganassi and Ken Anderson in 1991.']
ganassi and chip anderson ken
no question
data east
#################
['position held', 'What is the position of XXX?', 'Claudio Baggini', 'Claudio Baggini (1 August 1936 -- 25 September 2015) was an Italian Roman Catholic bishop.']
bishop
no question
new city york
#################
['time of discovery', 'What time was XXX found?', 'Piraeus Athena', 'The Piraeus Athena was discovered in 1959, by workers who were drilling underground to install pipes.']
1959
no question
finnish
#################
['developer', 'By whom was XXX developed?', 'Donkey Kong Country', 'Donkey Kong Country is a 1994 platforming video game developed by Rare and published by Nintendo for the Super Nintendo Entertainment System.']
rare
no question
oldham
#####

13 1923 august
no question
dungeons & dragons
#################
['language of work or name', 'In what language is XXX?', 'Ayandegan', "Ayandegan (Persian: آیندگان\u200e) was one of the most influential and popular daily newspapers in Iran during Mohammad Reza Pahlavi's rule."]
persian
no question
magic carpet
#################
['parent taxon', 'The genus XXX is a part of what family?', 'Koolasuchus', 'Koolasuchus is an extinct genus of brachyopoid temnospondyl in the family Chigutisauridae.']
chigutisauridae
no question
greek
#################
['military rank', "What along with privateer was XXX's military rank?", 'Raymond D. Tarbuck', "Raymond D. Tarbuck (4 May 1897 -- 15 November 1986) was a rear admiral in the United States Navy who is best known as a planner with General Douglas MacArthur's General Headquarters (GHQ) Southwest Pacific Area during World War II. A 1920 graduate of the United States Naval Academy in Annapolis, Maryland, Tarbuck spent most of his early career on destro

['military rank', "What along with privateer was XXX's military rank?", 'José Segundo Roca', 'José Segundo Roca was an Argentine colonel.']
colonel
no question
merian matthus
#################
['position played on team / speciality', "What  is XXX's position on the field while playing football?", 'Arthur Welsby', 'Arthur Welsby (17 November 1902 -- 24 April 1980) was an English footballer who played as an outside forward.']
forward
no question
cr da vasco gama
#################
['date of birth', 'What is the birth date of XXX?', 'Cosme Saavedra', 'Cosme Saavedra (27 September 1901 -- 3 July 1967) was an Argentine cyclist.']
27 1901 september
no question
audoin
#################
['developer', 'Who is the developer of XXX?', 'Lightning Warrior Raidy II: ~Temple of Desire~', 'Lightning Warrior Raidy II: ~Temple of Desire~ is an adult game developed by ZyX, and later translated to English by G-Collections.']
zyx
no question
sarah flack
#################
['manufacturer', 'The XXX was produc

company yamaha motor
no question
normandy
#################
['noble title', 'What noble title does XXX hold?', 'Sir John Leslie, 2nd Baronet', 'Sir John Leslie, 2nd Baronet (7 August 1857 -- 25 January 1944) was an Anglo-Irish baronet.']
baronet
no question
jack keats ezra
#################
['language of work or name', 'In which language XXX monthly football magazine reporting?', 'Fångad av en stormvind', "``Fångad av en stormvind'' (literally translated as ``Captured by a storm wind'') is a 1991 single by Swedish pop singer Carola which was the winning Swedish entry to the Eurovision Song Contest 1991 in Rome."]
swedish
no question
italian
#################
['performer', 'Who made XXX?', 'Swagger Right', "Following the group's debut single, ``He Ain't wit Me Now (Tho)'', it was later announced that ``Swagger Right'' would be the official second single and would be remixed to feature rappers Fabolous and Rick Ross for the song's official release."]
rick ross fabolous and
no question
ko

['noble title', 'What noble title does XXX hold?', 'Charles Lemon', 'Sir Charles Lemon, 2nd Baronet (3 September 1784 -- 13 February 1868) was a British Member of Parliament for several constituencies and a baronet.']
baronet
no question
orthodox archdeacon greek
#################
['located in the administrative territorial entity', 'Which state is XXX located?', 'Gandhi Maidan Marg', 'Gandhi Maidan or Gandhi Maidan Marg is one of the most important thoroughfares in Patna, India.']
patna
no question
valds skarsdttir
#################
['date of birth', 'The date of birth for XXX is what?', 'Ute Stange', 'Ute Stange (born 2 April 1966) is a German rower, who competed for the SG Dynamo Potsdam/ Sportvereinigung (SV) Dynamo.']
1966 2 april
no question
leader
#################
['illustrator', 'Who was XXX illustrated by?', 'Like a Velvet Glove Cast in Iron', 'Like a Velvet Glove Cast in Iron is a graphic novel by American cartoonist Daniel Clowes.']
daniel clowes
no question
basheer-ud-din 

#################
['located in the administrative territorial entity', 'What is the name of the state where XXX is located?', 'Cruzeiro do Oeste', 'Cruzeiro do Oeste is a municipality in the state of Paraná in the Southern Region of Brazil.']
paran
no question
elena pavlovna
#################
['nominated for', 'What award was XXX nominated for?', 'Mia Couto', 'António Emílio Leite Couto (born 5 July 1955), better known as Mia Couto, is a Mozambican writer and the winner of the 2014 Neustadt International Prize for Literature.']
for international literature prize neustadt
no question
toronto
#################
['noble title', 'What noble title does XXX hold?', 'Sir David Baxter, 1st Baronet', 'Sir David Baxter, 1st Baronet (1793--1872), was a linen manufacturer in Dundee, Scotland, and a baronet.']
baronet
no question
grandfather
#################
['located in the administrative territorial entity', 'What state is XXX located?', 'Priddis Greens', 'Priddis Greens is a hamlet in Alberta, C

no question
nepal
#################
['illustrator', 'Who was XXX illustrated by?', 'Hey, Al', 'Hey, Al is a book written by Arthur Yorinks and illustrated by Richard Egielski.']
egielski richard
no question
motors general
#################
['father', "Who is XXX's father?", 'Thomas Lincoln', 'Thomas Lincoln (January 6, 1778 -- January 17, 1851) was an American farmer, carpenter and father of President Abraham Lincoln.']
lincoln abraham
no question
telstra
#################
['from fictional universe', 'In which fictional universe is XXX a character?', 'Uncle Ben', "Benjamin ``Ben'' Parker, usually called Uncle Ben, is a supporting character in the Marvel Universe's Spider-Man stories."]
marvel universe
no question
donald duck
#################
['date of birth', 'The date of birth of XXX is?', 'Andrea Diewald', 'Andrea Diewald (born 28 December 1981) is a German former competitive figure skater.']
28 1981 december
no question
neil marshall
#################
['record label', 'Which was th

konami
no question
bangalter thomas
#################
['position held', 'Which position was held by XXX?', 'William Paine Lord', 'William Paine Lord (July 1, 1838 -- February 17, 1911), was a Republican politician who served as the ninth Governor of Oregon from 1895 to 1899.']
of oregon governor
no question
1922
#################
['parent taxon', 'What family does XXX belong?', 'Tacita', 'Tacita is a genus of sea snails, marine gastropod mollusks in the family Buccinidae, the true whelks.']
buccinidae
no question
11
#################
['position held', 'Which was the position that XXX held?', 'Clifton Skeen', 'Clifton Skeen (March 17, 1927 -- January 30, 1993) was a former member of the Ohio House of Representatives.']
representatives ohio of member the house
no question
observatory goethe link
#################
['position held', 'Which was the position that XXX held?', 'Michelle G. Schneider', 'Michelle G. Schneider is a former Republican member of the Ohio House of Representatives, re

no question
comics captain america
#################
['from fictional universe', 'What is the universe that XXX exists in?', 'Cerebro', "In the Marvel Universe, Cerebro (Spanish and Portuguese for ``brain'') is a device that the X-Men (in particular, their leader, Professor Charles Xavier) use to detect humans, specifically mutants."]
marvel universe
no question
victor mark hansen
#################
['illustrator', 'Who was XXX illustrated by?', 'Goodnight Moon', "Goodnight Moon is an American children's picture book written by Margaret Wise Brown and illustrated by Clement Hurd."]
hurd clement
no question
data east
#################
['film editor', 'Who was the film director that directed XXX?', 'Ultime grida dalla savana', 'Ultime grida dalla savana (1975) (English: Final Cry of the Savanna), also known as La Grande caccia and by its English title Savage Man Savage Beast, is a Mondo documentary directed by Antonio Climati and Mario Morra.']
climati antonio
no question
terfry richard
#

['voice type', 'What type of tone does XXX sing in?', 'Lei Jia', 'Lei Jia (Chinese: 雷佳; pinyin: Léi Jīa, born October 19, 1979 in Yiyang, Hunan) is a Chinese folk soprano and a Chinese national class one performer.']
soprano
no question
cassiopeia
#################
['film editor', 'The director of XXX is who?', 'Benilde or the Virgin Mother', 'Benilde or the Virgin Mother (Benilde ou a Virgem Mãe) is a 1975 Portuguese drama film based on the play by José Régio and directed by Manoel de Oliveira.']
manoel oliveira de
no question
11
#################
['developer', 'With which development company would you associate XXX?', 'Twin Cobra', "Twin Cobra, released in Japan as Kyukyoku Tiger (究極タイガー Kyūkyoku Taigā), is a 1987 helicopter-themed shoot 'em up arcade game developed by Toaplan."]
toaplan
no question
1779
#################
['mother', 'Which lady XXX was born to?', 'Anne of Gloucester', 'Anne of Gloucester, Countess of Stafford (24 April 1383 -- 16 October 1438) was the eldest daughter

no question
libellulidae
#################
['manufacturer', 'What company makes XXX?', 'Tesla Model X', 'The Tesla Model X is a full-size crossover SUV made by Tesla Motors.']
tesla motors
no question
stargate universe
#################
['illustrator', 'Who was XXX illustrated by?', "Grandfather's Journey", "Grandfather's Journey is a book by Allen Say."]
say allen
no question
oscar
#################
['located in the administrative territorial entity', 'What is the name of the state where XXX is located?', 'Tymianki-Skóry', 'Tymianki-Skóry (tɨˈmjaŋki ˈskurɨ) is a village in the administrative district of Gmina Boguty-Pianki, within Ostrów Mazowiecka County, Masovian Voivodeship, in east-central Poland.']
boguty-pianki gmina
no question
rail first union
#################
['publisher', 'The publisher that published XXX is what?', 'Tom Corbett, Space Cadet', 'Joseph Lawrence Greene of Grosset & Dunlap developed Tom Corbett, Space Cadet, inspired by the Robert A. Heinlein novel Space Cadet

['military rank', "What along with privateer was XXX's military rank?", 'Dan Biton', 'Aluf Dan Biton (Hebrew: דן ביטון\u200e, born 1961) is a general in the Israel Defense Forces and the Head of the Technological and Logistics Directorate.']
aluf
no question
r&b
#################
['record label', 'What label was responsible for XXX?', "Goin' Latin", "Goin' Latin is a studio jazz album with Latin percussion and style by Ramsey Lewis which was released on Cadet Records in 1967."]
records cadet
no question
maharana sangram singh
#################
['time of discovery', 'What time was XXX found?', 'NGC 8', 'NGC 8 is a double star system (K5 and F8) in the constellation Pegasus, discovered on 29 September 1865 by Otto Struve.']
1865 september 29
no question
avex trax
#################
['from fictional universe', 'In which fictional universe does XXX exist?', 'Phantom Zone', 'In the post-Crisis DC Universe, the Phantom Zone first appears after Superman returns from space with a Kryptonian art

gardot melody
#################
['manufacturer', 'Which company manufactured XXX?', 'Welrod', 'The Birmingham Small Arms Company Limited (BSA) confirmed that they manufactured some Welrod pistols, but that they put no markings at all on them, so it is likely that any markings were added by the British military after delivery.']
company arms birmingham small
no question
chaplin charlie
#################
['military rank', 'What ranking did XXX hold in the military?', 'Camille Armand Jules Marie, Prince de Polignac', 'Camille Armand Jules Marie, Prince de Polignac (February 16, 1832 -- November 15, 1913) was a French nobleman who served with the Confederates in the American Civil War, living on to become the last surviving Confederate major-general.']
major-general
no question
ian fleming
#################
['from fictional universe', 'What fictional universe is XXX a part of?', 'Ankh-Morpork', "Ankh-Morpork is a fictional city-state which features prominently in Terry Pratchett's Discworl

#################
['record label', 'What was the record label of XXX?', 'Play Deep', 'Play Deep is the debut studio album by the English rock band The Outfield, released by Columbia Records in 1985.']
records columbia
no question
library congress of
#################
['military rank', 'What ranking did XXX hold in the military?', 'Samantha Carter', "Captain Samantha Carter made her first appearance in ``Children of the Gods'', the pilot episode of Stargate SG-1, as a United States Air Force Captain who joined the fictional SG-1 team under the command of Colonel Jack O'Neill."]
colonel
no question
kepple disney elias
#################
['founder', 'Which person is involved in the founding of XXX?', 'MetaDesign', 'MetaDesign is an international design consultancy known for branding and brand strategy, founded by Erik Spiekermann, Uli Mayer-Johanssen and Hans Ch. Krüger.']
erik spiekermann
no question
clement greenberg
#################
['developer', 'Which company developed XXX?', 'OS X E

In [39]:
fold_files = ["concat_fold.4.test.predictions.step.20400.csv",
             "gold_fold.4.test.predictions.step.14900.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.4.test.predictions.step.14900.csv
0.23333333333333336
[0.23333333 0.23333333]


In [27]:
fold_files = ["gold_fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(2, 263, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/gold/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold.5.dev.predictions.step.18400.csv
83.0
[79.         78.66666667 75.33333333 75.66666667 77.33333333 77.33333333
 75.         76.66666667 76.         78.33333333 80.33333333 77.
 77.         80.         76.66666667 77.         80.33333333 80.
 78.66666667 82.         79.         78.66666667 76.33333333 80.
 79.33333333 79.         78.66666667 74.66666667 78.33333333 76.33333333
 77.33333333 75.         78.66666667 79.         79.         76.
 77.         76.         77.66666667 79.33333333 77.         81.33333333
 82.66666667 81.66666667 80.33333333 81.         80.33333333 80.33333333
 80.66666667 81.         81.         80.66666667 80.66666667 78.66666667
 79.33333333 81.66666667 80.66666667 81.         81.         79.33333333
 78.66666667 79.33333333 79.66666667 79.66666667 76.66666667 79.
 79.33333333 78.         79.33333333 78.33333333 76.33333333 77.33333333
 77.         80.33333333 80.         78.         77.         78.
 79.         78.66666667 78.66666667 79.33333333 78

In [17]:
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.6000.csv
58.666666666666664
[52.66666667 50.66666667 49.33333333 51.66666667 51.33333333 45.66666667
 50.33333333 49.33333333 49.33333333 48.         51.         49.66666667
 51.66666667 50.66666667 51.66666667 54.         52.         52.66666667
 49.33333333 51.         51.33333333 52.33333333 52.         50.66666667
 55.         53.33333333 52.33333333 53.33333333 53.33333333 52.33333333
 52.66666667 52.         51.33333333 51.33333333 54.         51.33333333
 53.         54.33333333 54.33333333 51.33333333 55.         54.66666667
 52.33333333 55.         53.66666667 51.         53.         52.33333333
 52.33333333 51.33333333 53.66666667 54.         53.33333333 51.66666667
 52.33333333 51.         54.33333333 53.         54.33333333 58.66666667
 51.         55.         51.         52.33333333 54.         54.66666667
 52.66666667 56.33333333 53.33333333 56.         51.66666667 55.33333333
 54.66666667 55.66666667 54.33333333 52.33333333 53.         5

In [16]:
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.19500.csv
58.666666666666664
[53.66666667 50.33333333 49.66666667 49.         52.         51.66666667
 55.         56.33333333 57.         53.66666667 52.         53.66666667
 57.         57.         54.         56.         58.         58.66666667
 55.         53.66666667 52.         51.66666667 51.33333333 51.66666667
 52.33333333 56.         53.66666667 54.         54.33333333 55.
 56.66666667 56.66666667 57.33333333 55.66666667 54.66666667 55.33333333
 57.         56.33333333 52.66666667 55.         53.66666667 55.33333333
 55.66666667 52.         55.66666667 57.33333333 56.         53.33333333
 57.33333333 56.33333333 55.66666667 56.         54.33333333 58.
 52.66666667 55.         55.33333333 53.         50.66666667 55.
 56.66666667 53.         55.66666667 54.66666667 52.33333333 56.33333333
 54.         55.33333333 58.33333333 55.         56.         52.66666667
 58.33333333 57.33333333 53.         51.         50.         54.33333333
 55.        

In [15]:
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.17300.csv
45.666666666666664
[38.         40.         39.33333333 38.66666667 36.         38.66666667
 41.33333333 39.         41.         37.66666667 39.         36.66666667
 37.         36.33333333 39.         39.         42.         40.66666667
 43.66666667 37.33333333 39.66666667 40.66666667 40.         38.33333333
 38.33333333 45.         42.66666667 41.         41.         39.66666667
 40.33333333 42.33333333 41.         38.         38.         38.33333333
 37.33333333 36.33333333 37.66666667 37.33333333 40.         37.66666667
 36.33333333 39.33333333 37.         39.66666667 41.         39.33333333
 39.         39.66666667 41.         43.33333333 42.         40.
 40.         38.66666667 39.66666667 41.         41.         38.66666667
 38.         39.         40.66666667 37.66666667 38.33333333 39.33333333
 39.66666667 36.66666667 38.66666667 38.66666667 38.33333333 36.66666667
 38.33333333 35.66666667 39.66666667 38.66666667 39.66666667 39.33333

In [14]:
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.17200.csv
55.666666666666664
[52.33333333 49.         48.66666667 46.         51.         47.66666667
 49.33333333 46.33333333 49.33333333 52.         51.33333333 49.66666667
 47.33333333 49.66666667 49.66666667 49.         48.         50.66666667
 48.         51.         50.33333333 48.         48.         49.33333333
 49.66666667 49.66666667 46.66666667 47.66666667 51.         53.
 50.66666667 49.33333333 51.66666667 45.66666667 49.         47.66666667
 49.66666667 53.         53.66666667 48.         51.         52.
 47.         50.66666667 48.         47.         50.         49.66666667
 51.33333333 50.33333333 49.66666667 51.66666667 49.         52.
 52.         50.         52.         50.66666667 53.         48.33333333
 49.33333333 52.         54.33333333 51.66666667 47.         49.66666667
 48.66666667 47.         51.33333333 50.66666667 52.         52.
 54.66666667 52.66666667 52.         52.33333333 51.         52.
 47.33333333 50.66666667 50.

In [27]:
fold_files = ["mml_off_iter.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/mml-off-iter/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.dev.predictions.step.1200.csv
52.33333333333333
[46.         46.33333333 49.         45.33333333 47.66666667 43.33333333
 45.66666667 47.33333333 48.33333333 52.         51.         52.33333333
 49.         49.66666667 48.66666667 45.66666667 48.66666667 47.33333333
 50.         49.33333333 47.66666667 50.         46.33333333 45.33333333
 45.66666667 45.33333333 45.         43.66666667 49.         51.66666667
 47.         46.         46.66666667 43.66666667 46.66666667 46.33333333
 46.         46.         47.         41.66666667 41.         44.
 45.         41.66666667 49.         43.66666667 48.33333333 45.66666667
 46.33333333 44.         45.66666667 41.33333333 46.66666667 44.33333333
 44.66666667 45.66666667 45.66666667 42.66666667 45.33333333 45.33333333
 46.         44.         43.66666667 42.         44.         41.66666667
 42.33333333 40.         44.66666667 40.66666667 41.66666667 43.33333333
 42.33333333 43.33333333 39.33333333 41.33333333 39.33333333 39.6666666

In [28]:
fold_files = ["mml_off_iter.test.predictions.step.6000.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.test.predictions.step.6000.csv
51.300000000000004
[51.3]


In [29]:
fold_files = ["mml_off_iter.test.predictions.step.19500.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.test.predictions.step.19500.csv
49.95
[49.95]


In [30]:
fold_files = ["mml_off_iter.test.predictions.step.17300.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.test.predictions.step.17300.csv
49.88333333333333
[49.88333333]


In [31]:
fold_files = ["mml_off_iter.test.predictions.step.17200.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.test.predictions.step.17200.csv
40.416666666666664
[40.41666667]


In [32]:
fold_files = ["mml_off_iter.test.predictions.step.1200.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter.test.predictions.step.1200.csv
54.18333333333333
[54.18333333]


In [35]:
fold_files = ["mml_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 115, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/mml-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.dev.predictions.step.900.csv
70.0
[64.33333333 66.33333333 69.33333333 68.66666667 66.66666667 67.33333333
 68.         65.33333333 70.         67.33333333 67.         66.
 68.         68.66666667 67.33333333 68.66666667 69.         68.66666667
 68.66666667 65.66666667 67.         61.33333333 66.66666667 64.
 62.66666667 65.33333333 66.         66.33333333 64.33333333 64.66666667
 65.         65.33333333 66.         65.66666667 68.         66.
 66.         65.33333333 64.66666667 62.33333333 63.66666667 64.66666667
 64.33333333 66.66666667 66.         65.66666667 64.         65.66666667
 66.         68.33333333 64.66666667 67.         66.33333333 65.
 65.66666667 65.         62.33333333 66.66666667 63.66666667 66.
 62.33333333 64.66666667 65.66666667 64.66666667 66.66666667 63.
 63.66666667 63.33333333 54.66666667 60.33333333 61.         62.33333333
 63.66666667 63.         64.         64.66666667 62.         65.33333333
 65.         64.66666667 65.66666667 61.33333333

In [36]:
fold_files = ["mml_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 115, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/mml-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.dev.predictions.step.4100.csv
63.66666666666667
[58.         57.         60.33333333 60.         58.         58.66666667
 59.66666667 58.33333333 57.66666667 56.66666667 60.         60.33333333
 61.66666667 59.33333333 57.33333333 59.33333333 59.66666667 59.66666667
 59.33333333 60.         59.66666667 57.         59.33333333 57.66666667
 57.66666667 57.66666667 60.         60.         58.33333333 60.33333333
 60.33333333 60.66666667 61.         61.33333333 62.         60.33333333
 57.66666667 63.         61.         59.33333333 63.66666667 58.66666667
 60.66666667 59.66666667 59.66666667 59.66666667 58.66666667 58.
 58.66666667 59.         59.         63.         60.66666667 60.66666667
 60.33333333 59.         61.         60.33333333 60.66666667 57.66666667
 57.33333333 60.33333333 59.33333333 57.         59.         61.
 57.66666667 59.         59.33333333 56.33333333 59.         58.66666667
 57.         57.33333333 60.66666667 59.66666667 55.66666667 59.
 58.333333

In [37]:
fold_files = ["mml_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 115, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/mml-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.dev.predictions.step.6300.csv
75.33333333333333
[69.         66.         64.         66.66666667 71.         71.33333333
 71.         69.66666667 68.33333333 71.66666667 69.33333333 69.
 69.         67.33333333 67.33333333 68.66666667 72.         68.
 66.66666667 70.33333333 69.66666667 69.         69.66666667 66.66666667
 72.33333333 70.66666667 73.         70.66666667 71.         69.33333333
 68.33333333 67.66666667 70.66666667 69.66666667 70.66666667 73.66666667
 70.33333333 69.66666667 71.33333333 71.66666667 71.66666667 66.
 66.66666667 68.33333333 67.33333333 70.         70.66666667 71.66666667
 68.66666667 70.         69.         71.33333333 70.66666667 69.33333333
 70.33333333 70.66666667 70.66666667 73.         72.         72.
 71.         66.66666667 75.33333333 72.33333333 69.66666667 71.
 72.         73.         73.         68.66666667 67.66666667 70.66666667
 70.         70.33333333 68.66666667 70.         71.         71.33333333
 68.33333333 66.         6

In [38]:
fold_files = ["mml_off_iter+pgg.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 115, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/mml-off-iter+pgg/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.dev.predictions.step.9700.csv
78.0
[65.66666667 66.33333333 67.33333333 69.66666667 69.66666667 72.33333333
 71.33333333 69.66666667 76.         69.         71.         75.
 72.         74.66666667 74.         74.         74.66666667 75.
 73.33333333 72.66666667 73.33333333 75.66666667 76.         74.33333333
 75.33333333 75.66666667 76.         74.66666667 75.66666667 72.33333333
 70.66666667 74.33333333 74.66666667 74.66666667 73.66666667 74.33333333
 75.33333333 74.         73.         74.         76.66666667 75.
 75.         73.33333333 74.66666667 76.         75.         74.33333333
 74.         75.33333333 75.66666667 73.         73.33333333 74.
 75.         74.33333333 73.         74.         71.33333333 74.
 74.66666667 76.         74.66666667 74.         72.33333333 72.66666667
 73.33333333 73.66666667 72.         73.         75.         75.66666667
 76.         74.         74.         75.33333333 73.66666667 74.66666667
 76.66666667 76.66666667 76.         75

In [40]:
fold_files = ["mml_off_iter+pgg.test.predictions.step.900.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.test.predictions.step.900.csv
53.7
[53.7]


In [41]:
fold_files = ["mml_off_iter+pgg.test.predictions.step.4100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.test.predictions.step.4100.csv
65.5
[65.5]


In [42]:
fold_files = ["mml_off_iter+pgg.test.predictions.step.6300.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_4_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.test.predictions.step.6300.csv
65.60000000000001
[65.6]


In [43]:
fold_files = ["mml_off_iter+pgg.test.predictions.step.9700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_5_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_off_iter+pgg.test.predictions.step.9700.csv
60.88333333333333
[60.88333333]


In [163]:
fold_files = ["concat_fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 500, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_2+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('concat_fold.2.dev.predictions.step.4600.csv', 64.46, array([54.46, 56.22, 53.83, 59.7 , 56.78, 51.13, 52.67, 51.13, 52.5 ,
       54.46, 54.21, 56.91, 58.06, 57.51, 57.78, 55.33, 55.17, 57.88,
       53.52, 55.59, 57.38, 57.65, 59.57, 61.69, 59.33, 57.09, 59.08,
       55.76, 56.62, 53.41, 60.33, 61.54, 58.88, 60.25, 59.35, 60.68,
       60.5 , 60.41, 60.99, 60.85, 62.84, 62.73, 62.14, 60.37, 62.84,
       64.46, 61.43, 60.88, 58.78, 59.6 , 56.05, 59.42, 57.8 , 57.39,
       56.69, 55.6 , 56.16, 61.51, 58.13, 62.5 , 56.96, 57.87, 53.83,
       55.46, 60.2 , 57.58, 58.78, 58.32, 60.38, 57.09, 56.43, 56.79,
       60.  , 58.18, 58.49, 52.99, 53.29, 59.61, 60.78, 56.83, 61.14,
       60.42, 57.55, 57.4 , 55.08, 61.15, 56.69, 62.21, 56.18, 58.82,
       61.36, 61.75, 59.64, 58.74, 56.34, 58.38, 56.46, 54.34, 55.18,
       60.14, 59.23, 59.55, 63.64, 57.62, 59.96, 58.69, 56.85, 57.34,
       54.98, 57.5 , 57.83, 59.43, 56.4 , 56.52, 57.75, 57.29, 57.97,
       58.95, 56.99, 57.73, 56.16, 

In [158]:
fold_files = ["concat_fold.2.test.predictions.step.4600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('concat_fold.2.test.predictions.step.4600.csv', 42.57, array([42.57]), {42.57: 'concat_fold.2.test.predictions.step.4600.csv'})


In [164]:
fold_files = ["gold_fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 500, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_2+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('gold_fold.2.dev.predictions.step.2000.csv', 65.08, array([54.06, 54.76, 54.88, 57.57, 55.23, 57.55, 57.38, 58.09, 55.29,
       58.8 , 59.35, 57.05, 58.33, 58.95, 59.73, 57.86, 59.88, 58.43,
       59.83, 65.08, 63.83, 60.96, 62.67, 61.01, 58.08, 59.22, 59.  ,
       59.58, 59.79, 59.63, 62.01, 62.7 , 60.3 , 61.94, 62.79, 61.44,
       61.24, 64.37, 64.26, 62.91, 61.43, 61.27, 60.87, 62.38, 61.99,
       58.92, 62.27, 59.87, 58.58, 62.3 , 62.16, 62.87, 63.82, 63.01,
       60.28, 60.82, 62.35, 59.84, 61.51, 60.91, 61.18, 61.06, 61.43,
       61.25, 61.  , 61.25, 61.99, 61.46, 61.95, 61.16, 58.97, 59.79,
       60.84, 60.11, 59.93, 58.58, 59.32, 61.27, 61.59, 61.7 , 59.57,
       60.1 , 59.97, 61.01, 60.25, 59.86, 61.2 , 59.66, 61.05, 59.79,
       60.07, 60.03, 61.99, 62.5 , 60.1 , 57.73, 59.39, 59.1 , 59.28,
       59.87, 60.51, 59.11, 61.09, 61.39, 59.9 , 60.75, 59.73, 58.28,
       61.12, 60.14, 58.05, 60.03, 61.26, 61.2 , 60.31, 60.03, 60.88,
       62.69, 58.84, 61.01, 62.13, 62

In [157]:
fold_files = ["gold_fold.2.test.predictions.step.2000.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.2.test.predictions.step.2000.csv',
 48.23,
 array([48.23]),
 {48.23: 'gold_fold.2.test.predictions.step.2000.csv'})

In [109]:
fold_files = ["mml_pgg_off_sim.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 77, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/mml-pgg-off-sim+with_unks/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('mml_pgg_off_sim.dev.predictions.step.3200.csv', 58.61, array([51.07, 48.44, 52.5 , 54.86, 53.2 , 53.07, 55.9 , 50.61, 57.09,
       53.04, 51.26, 56.72, 57.43, 53.31, 53.02, 56.84, 51.65, 52.53,
       58.52, 54.33, 52.99, 50.55, 53.47, 53.24, 52.9 , 47.8 , 55.95,
       52.14, 56.72, 54.16, 54.85, 58.61, 55.57, 53.38, 55.48, 45.87,
       52.2 , 51.62, 54.98, 56.66, 51.95, 55.07, 52.38, 47.87, 55.09,
       55.2 , 53.46, 53.2 , 52.77, 54.1 , 50.55, 52.37, 54.61, 54.97,
       53.66, 55.67, 52.61, 55.33, 54.39, 43.54, 51.72, 53.83, 56.01,
       54.04, 51.13, 50.92, 53.89, 49.91, 51.88, 57.04, 53.91, 49.09,
       53.93, 53.33, 53.96, 49.06]), {51.07: 'mml_pgg_off_sim.dev.predictions.step.100.csv', 48.44: 'mml_pgg_off_sim.dev.predictions.step.200.csv', 52.5: 'mml_pgg_off_sim.dev.predictions.step.300.csv', 54.86: 'mml_pgg_off_sim.dev.predictions.step.400.csv', 53.2: 'mml_pgg_off_sim.dev.predictions.step.4800.csv', 53.07: 'mml_pgg_off_sim.dev.predictions.step.600.csv', 55.9: 'mml_pgg_o

In [118]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.3200.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/mml-pgg-off-sim+with_unks/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('mml_pgg_off_sim.test.predictions.step.3200.csv',
 43.67,
 array([43.67]),
 {43.67: 'mml_pgg_off_sim.test.predictions.step.3200.csv'})

In [119]:
fold_files = ["gold_fold.1.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.1.test.predictions.step.100.csv',
 43.3,
 array([43.3]),
 {43.3: 'gold_fold.1.test.predictions.step.100.csv'})

In [120]:
fold_files = ["gold_fold.2.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.2.test.predictions.step.100.csv',
 37.06,
 array([37.06]),
 {37.06: 'gold_fold.2.test.predictions.step.100.csv'})

In [121]:
fold_files = ["gold_fold.3.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.3.test.predictions.step.100.csv',
 41.44,
 array([41.44]),
 {41.44: 'gold_fold.3.test.predictions.step.100.csv'})

In [122]:
fold_files = ["concat_fold.1.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('concat_fold.1.test.predictions.step.100.csv',
 2.7,
 array([2.7]),
 {2.7: 'concat_fold.1.test.predictions.step.100.csv'})

In [123]:
fold_files = ["concat_fold.2.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('concat_fold.2.test.predictions.step.100.csv',
 2.38,
 array([2.38]),
 {2.38: 'concat_fold.2.test.predictions.step.100.csv'})

In [124]:
fold_files = ["concat_fold.3.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('concat_fold.3.test.predictions.step.100.csv',
 1.55,
 array([1.55]),
 {1.55: 'concat_fold.3.test.predictions.step.100.csv'})

In [125]:
fold_files = ["mml_pgg_off_sim.fold.1.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('mml_pgg_off_sim.fold.1.test.predictions.step.100.csv',
 35.12,
 array([35.12]),
 {35.12: 'mml_pgg_off_sim.fold.1.test.predictions.step.100.csv'})

In [126]:
fold_files = ["mml_pgg_off_sim.fold.2.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('mml_pgg_off_sim.fold.2.test.predictions.step.100.csv',
 29.42,
 array([29.42]),
 {29.42: 'mml_pgg_off_sim.fold.2.test.predictions.step.100.csv'})

In [127]:
fold_files = ["mml_pgg_off_sim.fold.3.test.predictions.step.100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('mml_pgg_off_sim.fold.3.test.predictions.step.100.csv',
 33.57,
 array([33.57]),
 {33.57: 'mml_pgg_off_sim.fold.3.test.predictions.step.100.csv'})

In [152]:
fold_files = ["gold_fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 525, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_1+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('gold_fold.1.dev.predictions.step.2400.csv', 56.53, array([54.7 , 49.7 , 48.51, 48.94, 50.1 , 53.83, 53.49, 56.46, 53.38,
       51.55, 50.88, 51.84, 52.53, 56.12, 53.46, 54.28, 52.65, 55.08,
       56.02, 52.24, 49.57, 53.98, 46.77, 56.53, 45.15, 49.89, 53.21,
       51.28, 51.7 , 51.75, 50.72, 52.37, 54.34, 53.75, 51.64, 53.66,
       52.09, 47.47, 51.2 , 52.61, 49.59, 52.34, 50.67, 50.97, 49.52,
       50.71, 50.98, 50.71, 53.26, 50.1 , 53.26, 44.94, 45.32, 46.05,
       51.09, 48.51, 49.42, 47.48, 47.92, 47.22, 44.93, 49.9 , 50.09,
       50.1 , 47.5 , 48.67, 51.23, 47.46, 46.57, 49.2 , 50.47, 46.25,
       48.77, 50.71, 46.09, 46.21, 48.61, 43.13, 46.38, 43.85, 46.69,
       44.35, 47.82, 43.33, 43.29, 48.5 , 44.95, 45.05, 47.2 , 48.9 ,
       46.75, 46.18, 44.58, 45.34, 46.99, 47.83, 45.53, 48.71, 47.46,
       51.15, 48.18, 50.51, 53.55, 49.15, 52.19, 54.23, 52.31, 45.2 ,
       47.43, 49.  , 49.05, 51.58, 49.71, 47.6 , 47.4 , 48.83, 50.54,
       50.51, 46.52, 50.2 , 52.81, 46

In [151]:
fold_files = ["concat_fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 525, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_1+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('concat_fold.1.dev.predictions.step.1700.csv', 53.96, array([52.75, 47.39, 46.06, 43.55, 45.85, 49.01, 48.91, 47.06, 48.8 ,
       51.54, 39.19, 45.51, 48.07, 49.3 , 44.21, 46.61, 53.96, 50.92,
       48.46, 47.6 , 47.53, 49.  , 40.76, 43.57, 40.17, 44.71, 48.65,
       48.34, 44.76, 47.13, 45.4 , 40.59, 42.86, 42.8 , 41.35, 45.47,
       46.83, 41.75, 43.31, 45.51, 41.74, 41.79, 44.65, 46.1 , 40.94,
       41.08, 39.59, 36.81, 43.29, 40.58, 40.49, 33.77, 34.71, 35.81,
       39.38, 42.23, 41.38, 44.81, 41.35, 39.52, 39.49, 37.02, 43.31,
       38.68, 40.94, 39.09, 45.67, 39.75, 42.21, 40.41, 36.52, 39.92,
       41.9 , 42.6 , 44.96, 41.12, 41.37, 39.52, 31.75, 34.84, 41.47,
       32.29, 34.44, 34.87, 28.57, 40.32, 35.77, 37.5 , 38.06, 39.02,
       40.  , 42.25, 34.62, 35.06, 41.98, 36.61, 32.74, 37.96, 39.84,
       38.  , 33.19, 38.75, 42.97, 39.52, 40.32, 42.15, 42.23, 34.22,
       37.53, 36.55, 40.7 , 34.21, 38.97, 39.48, 36.09, 42.47, 38.38,
       36.55, 43.03, 41.65, 40.66, 

In [150]:
fold_files = ["gold_fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 525, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('gold_fold.3.dev.predictions.step.4200.csv', 63.59, array([55.75, 60.1 , 57.43, 60.7 , 60.95, 58.37, 58.33, 55.86, 61.22,
       62.07, 60.19, 59.89, 61.72, 60.26, 60.52, 60.84, 58.96, 58.58,
       61.38, 62.92, 61.69, 57.09, 62.22, 61.46, 59.25, 60.7 , 60.4 ,
       61.26, 61.82, 59.49, 62.12, 61.46, 60.2 , 59.76, 58.32, 59.27,
       58.  , 61.06, 60.  , 61.38, 63.16, 63.59, 61.35, 61.51, 62.65,
       61.25, 59.64, 61.01, 61.25, 61.15, 61.02, 59.08, 63.1 , 60.25,
       60.56, 60.17, 60.41, 61.13, 57.52, 60.66, 61.27, 60.63, 57.69,
       61.85, 62.54, 61.77, 61.56, 62.88, 61.68, 60.52, 61.77, 59.76,
       58.33, 58.57, 57.25, 58.55, 54.31, 58.93, 59.41, 58.66, 60.17,
       57.47, 59.53, 54.37, 58.47, 60.21, 59.21, 61.62, 59.23, 59.38,
       59.73, 60.18, 58.98, 59.45, 60.54, 59.82, 61.51, 60.45, 60.1 ,
       60.06, 60.75, 58.91, 57.3 , 58.92, 55.82, 56.41, 54.68, 57.95,
       58.18, 57.69, 57.4 , 57.14, 57.75, 59.87, 56.93, 55.41, 56.4 ,
       58.35, 60.07, 58.26, 57.95, 61

In [149]:
fold_files = ["concat_fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 525, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3+with_unk/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('concat_fold.3.dev.predictions.step.18600.csv', 61.4, array([50.63, 51.38, 53.04, 54.55, 55.13, 53.03, 55.21, 54.  , 55.59,
       50.51, 54.39, 57.86, 56.85, 57.88, 56.51, 55.18, 56.2 , 54.39,
       53.92, 55.83, 56.8 , 55.58, 57.56, 58.1 , 55.58, 56.15, 58.08,
       58.46, 55.05, 53.62, 53.62, 53.6 , 54.87, 55.08, 55.67, 55.59,
       56.47, 55.21, 57.58, 58.72, 58.7 , 56.1 , 55.78, 55.42, 55.26,
       56.25, 58.43, 59.62, 57.44, 58.23, 54.74, 54.84, 57.94, 55.54,
       55.27, 57.64, 58.09, 57.67, 56.65, 58.  , 57.91, 60.08, 59.03,
       58.93, 59.83, 59.73, 59.9 , 58.22, 60.14, 58.38, 58.35, 57.19,
       56.9 , 57.04, 56.73, 54.67, 55.33, 58.51, 56.11, 55.67, 53.75,
       54.94, 59.15, 59.18, 58.63, 57.61, 59.26, 55.33, 56.39, 54.38,
       54.28, 53.99, 57.24, 58.23, 57.24, 54.78, 55.81, 59.31, 54.87,
       56.45, 56.4 , 50.79, 54.16, 56.57, 55.12, 53.12, 55.25, 52.45,
       51.75, 50.46, 50.  , 53.14, 55.15, 53.59, 50.96, 48.26, 53.41,
       54.99, 54.64, 55.06, 53.43, 

In [135]:
fold_files = ["concat_fold.3.test.predictions.step.18600.csv", "gold_fold.3.test.predictions.step.4200.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.3.test.predictions.step.4200.csv',
 60.68,
 array([57.36, 60.68]),
 {57.36: 'concat_fold.3.test.predictions.step.18600.csv',
  60.68: 'gold_fold.3.test.predictions.step.4200.csv'})

In [136]:
fold_files = ["concat_fold.1.test.predictions.step.1700.csv", "gold_fold.1.test.predictions.step.2400.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.1.test.predictions.step.2400.csv',
 65.35,
 array([57.92, 65.35]),
 {57.92: 'concat_fold.1.test.predictions.step.1700.csv',
  65.35: 'gold_fold.1.test.predictions.step.2400.csv'})

In [137]:
fold_files = ["concat_fold.2.test.predictions.step.4600.csv", "gold_fold.2.test.predictions.step.2000.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
unk_eval_the_prediction_files(fold_files, fold_0_gold_file)

('gold_fold.2.test.predictions.step.2000.csv',
 48.23,
 array([42.57, 48.23]),
 {42.57: 'concat_fold.2.test.predictions.step.4600.csv',
  48.23: 'gold_fold.2.test.predictions.step.2000.csv'})

In [144]:
fold_files = ["mml_pgg_off_sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 239, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_1_mml-pgg-off-sim+with_unks/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('mml_pgg_off_sim.fold.1.dev.predictions.step.400.csv', 49.47, array([46.74, 45.84, 49.3 , 49.47, 44.99, 45.33, 33.01, 47.81, 37.72,
       41.98, 41.05, 41.61, 37.6 , 26.11, 37.26, 33.33, 35.7 , 39.84,
       37.42, 39.52, 34.38, 37.32, 38.37, 35.37, 32.84, 36.29, 40.5 ,
       38.71, 40.56, 40.  , 36.83, 38.48, 35.19, 36.51, 39.18, 35.01,
       36.96, 32.05, 35.83, 37.34, 33.33, 33.97, 35.98, 31.73, 38.21,
       40.46, 36.2 , 42.42, 41.68, 42.95, 42.34, 38.34, 34.9 , 42.6 ,
       40.5 , 43.16, 35.95, 32.23, 39.45, 38.4 , 36.8 , 32.24, 34.61,
       32.51, 26.02, 35.92, 34.61, 33.33, 32.39, 29.84, 30.84, 30.57,
       31.32, 28.76, 32.02, 30.4 , 27.76, 29.78, 33.12, 34.89, 31.03,
       31.51, 30.19, 31.4 , 32.1 , 37.11, 33.69, 33.7 , 34.3 , 32.54,
       35.03, 34.16, 34.06, 35.95, 31.48, 29.76, 34.59, 32.48, 30.59,
       37.4 , 35.27, 39.6 , 33.05, 33.47, 35.51, 31.06, 31.74, 29.4 ,
       34.91, 34.1 , 32.91, 33.41, 34.67, 34.57, 36.86, 31.67, 29.3 ,
       33.69, 34.42, 36.79,

In [145]:
fold_files = ["mml_pgg_off_sim.fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 239, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_3_mml-pgg-off-sim+with_unks/dev_predictions/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('mml_pgg_off_sim.fold.3.dev.predictions.step.7100.csv', 52.16, array([42.29, 42.92, 37.21, 45.41, 44.9 , 50.61, 48.33, 47.15, 46.79,
       48.58, 48.42, 44.41, 47.81, 46.05, 47.77, 46.94, 48.7 , 47.69,
       49.57, 48.12, 45.72, 45.61, 48.65, 49.04, 47.3 , 46.31, 46.38,
       44.72, 48.7 , 44.67, 44.25, 47.46, 45.87, 49.02, 48.83, 46.97,
       48.77, 48.68, 49.28, 47.32, 46.45, 49.08, 48.65, 48.25, 48.86,
       52.11, 49.47, 47.79, 51.83, 49.82, 47.87, 48.42, 44.36, 41.26,
       45.78, 43.13, 42.54, 45.  , 44.13, 45.59, 45.91, 43.51, 46.48,
       46.64, 47.08, 47.42, 47.92, 47.96, 47.08, 48.34, 52.16, 48.38,
       44.24, 43.49, 42.6 , 45.5 , 46.38, 46.44, 43.3 , 46.73, 48.19,
       45.44, 47.04, 50.  , 45.84, 46.97, 44.53, 45.2 , 47.87, 48.97,
       44.85, 47.54, 47.28, 46.13, 44.74, 46.07, 46.37, 45.7 , 46.33,
       45.52, 42.66, 46.37, 42.73, 43.42, 44.92, 44.89, 44.4 , 44.24,
       46.3 , 45.07, 43.28, 45.21, 45.26, 45.83, 47.91, 45.98, 44.27,
       44.32, 42.65, 46.76

In [146]:
fold_files = ["mml_pgg_off_sim.fold.3.test.predictions.step.7100.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('mml_pgg_off_sim.fold.3.test.predictions.step.7100.csv', 56.71, array([56.71]), {56.71: 'mml_pgg_off_sim.fold.3.test.predictions.step.7100.csv'})


In [147]:
fold_files = ["mml_pgg_off_sim.fold.1.test.predictions.step.400.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
print(unk_eval_the_prediction_files(fold_files, fold_0_gold_file))

('mml_pgg_off_sim.fold.1.test.predictions.step.400.csv', 56.51, array([56.51]), {56.51: 'mml_pgg_off_sim.fold.1.test.predictions.step.400.csv'})


In [199]:
fold_files = ["mml_pgg_off_sim.test.predictions.step.1700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fold_2_results/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

['IUCN conservation status', 'What status on the endangered list is XXX?', 'Cottus girardi', 'Cottus girardi is considered to be of least concern by the International Union for Conservation of Nature because it has a large natural range, a large population and a number of subpopulations, and a relatively stable population trend over time.']
least concern
What is Cottus girardi considered to be of least concern by the IUCN?
for union nature of conservation international
#################
['instrument', 'What instrument did XXX played?', 'Aaron Sachs', 'Aaron Sachs (July 4, 1923 - June 5, 2014) was an American jazz saxophone and clarinet player.']
clarinet
What instrument did Aaron Sachs play?
jazz
#################
['continent', 'On which continent is XXX located?', 'Refuge Astronomer Cruls', 'Refuge Astronomer Cruls (Portuguese: Refúgio Astrônomo Cruls) is a Brazilian Antarctic summer facility named after astronomer Luis Cruls who set up an expedition in 1882 to Punta Arenas in order t

#################
['home venue', 'What is the common name of the place where XXX plays their home games?', 'Clube Atlético Taquaritinga', 'Clube Atlético Taquaritinga play their home games at Estádio Municipal Adail Nunes da Silva, nicknamed Taquarão.']
taquaro
Where do Clube Atlético Taquaritinga play their home games?
estdio silva adail municipal da nunes
#################
['vessel class', 'What vessel type is XXX?', 'ROKS Chungmugong Yi Sun-sin (DDH-975)', 'ROKS Chungmugong Yi Sun-sin (DDH-975) is a Chungmugong Yi Sun-sin-class destroyer in the Republic of Korea Navy.']
sun-sin-class destroyer chungmugong yi
What vessel is a Chungmugong Yi Sun-sin class destroyer in?
navy korea
#################
['replaced by', 'What was replaced XXX?', 'Kymi Province', 'In 1997 the Kymi Province was merged with the Uusimaa Province and the southern parts of the Häme Province into the new Southern Finland Province.']
southern province finland
What province was replaced by the Kymi Province?
uusimaa 

#################
['taxon rank', 'Is the taxon rank of XXX species or genus?', 'Anematichthys armatus', 'Anematichthys armatus is a species of freshwater cyprinid native to Southeast Asia.']
species
What is Anematichthys armatus' taxon rank?
cyprinid
#################
['IUCN conservation status', 'What is the status of XXX?', 'Hyla heinzsteinitzi', "As of 2013 Hyla heinzsteinitzi is listed by the International Union for Conservation of Nature as ``critically endangered''."]
critically endangered
Who listed Hyla heinzsteinitzi as a critical endangered species?
for union nature of conservation international
#################
['occupant', 'What team uses XXX?', 'Plumeri Park', "Plumeri Park is The College of William & Mary Tribe baseball team's home stadium located in Williamsburg, Virginia."]
mary tribe william &
Who is the occupant of Plumeri Park baseball team?
of mary & tribe william college
#################
['child', 'Who is the offspring of XXX?', 'Meritites I', 'Both Queen Hetephe

biju dal janata
#################
['characters', 'The XXX has what character?', 'Orlando Furioso', 'Orlando Furioso (Italian: (orˈlando fuˈrjoːso); The Frenzy of Orlando, more literally Raging Roland; in Italian furioso is seldom capitalized) is an Italian epic poem by Ludovico Ariosto which has exerted a wide influence on later culture.']
roland
Who is the author of the Italian furioso poem?
ariosto ludovico
#################
['product', 'What is the product made by XXX?', 'Kittilä mine', 'Kittilä mine, also known as Suurikuusikko mine, is a gold mine in Kittilä, in the Lapland Province of Finland.']
gold
What is the name of the gold mine in Kittilä mine?
suurikuusikko
#################
['instrument', 'Which instrument does XXX play?', 'Craig Bloxom', 'Craig Bloxom was the bass player/singer of Australian ska/rock band vSpy vSpy which formed in Sydney in 1981 with Mike Weiley on guitar and Cliff Grigg on drums.']
guitar
What instrument did Craig Bloxom play on?
bass
#################


['vessel class', 'What type of aquatic unit is XXX?', 'USS Bainbridge (DD-1)', 'The second USS Bainbridge (DD-1) was the first destroyer in the United States Navy and the lead ship of the Bainbridge-class destroyer.']
destroyer bainbridge-class
Which vessel was the lead ship of the USS Bainbridge (DD-1)?
navy states united
#################
['cast member', 'Who was the star of XXX?', 'Folly of Love', 'Folly of Love (German: Unfug der Liebe) is a 1928 German silent comedy film directed by Robert Wiene and starring Maria Jacobini, Jack Trevor and Betty Astor.']
astor trevor and jacobini betty maria jack
Who is the cast member of Folly of Love?
wiene robert
#################
['product', 'What is the product made by XXX?', 'VELUX', 'The first VELUX roof window was installed in a Danish school over 60 years ago by the founder of the company, Villum Kann Rasmussen.']
roof window
What company founded the first VELUX roof window?
villum rasmussen kann
#################
['material used', 'What 

['replaced by', 'What was replaced XXX?', 'Raytheon', 'In the first quarter of 2007 Raytheon sold its aircraft operations, which subsequently operated as Hawker Beechcraft, and since 2014 have been units of Textron Aviation.']
beechcraft hawker
When was Raytheon replaced by its aircraft operations?
2007
#################
['country', 'What is the country that XXX is located in?', 'Kamareh-ye Bala', 'Kamareh-ye Bala (Persian: كمره بالا\u200e, also Romanized as Kamareh-ye Bālā; also known as Kamareh-ye ``Olyā, Kamereh, and Kemereh) is a village in Oshtorinan Rural District, Oshtorinan District, Borujerd County, Lorestan Province, Iran. At the 2006 census, its population was 260, in 66 families.']
iran
What country is Kamareh-ye Bala located in?
oshtorinan district rural
#################
['cast member', 'Who featured in the film XXX?', 'Spare the Rod', "Spare the Rod is a 1961 British social drama, directed by Leslie Norman and starring Max Bygraves, Geoffrey Keen, Donald Pleasence and Ri

ii ub
#################
['continent', 'What continent is XXX located on?', 'Black Glacier', 'Black Glacier (71°40′S 164°42′E) is a broad tributary to the Lillie Glacier flowing northeast, marking the southeast extent of the Bowers Mountains, a major mountain range situated in the geographical location of Victoria Land, Antarctica.']
antarctica
What continent is Black Glacier located in?
lillie glacier
#################
['IUCN conservation status', 'What is the conservation status of XXX?', 'Voanioala', 'Voanioala gerardii is critically endangered because of its scarcity.']
critically endangered
Voanioala gerardii is critically endangered due to its conservation status?
scarcity
#################
['characters', 'The XXX has what character?', 'And Be a Villain', 'And Be a Villain (British title More Deaths Than One) is a Nero Wolfe detective novel by Rex Stout, first published by the Viking Press in 1948.']
wolfe nero
Who is the author of And Be a Villain detective novel?
stout rex
#####

In [272]:
# Base-Base Predictions
fold_files = ["base_base.test.predictions.epoch1.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/posterier_question_model/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


fold_files = ["base_base.test.predictions.epoch2.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/posterier_question_model/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

fold_files = ["base_base.test.predictions.epoch3.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/posterier_question_model/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

fold_files = ["base_base.test.predictions.fold.4.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/posterier_question_model/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

fold_files = ["base_base.test.predictions.fold.5.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/posterier_question_model/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

base_base.test.predictions.epoch1.csv
44.4
[44.4]
base_base.test.predictions.epoch2.csv
37.75
[37.75]
base_base.test.predictions.epoch3.csv
42.18333333333334
[42.18333333]
base_base.test.predictions.fold.4.csv
41.38333333333333
[41.38333333]
base_base.test.predictions.fold.5.csv
48.083333333333336
[48.08333333]


In [271]:
# Concat Base Predictions
fold_files = ["concat_fold_1.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

# Gold Base Predictions
fold_files = ["concat_fold_2.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


# Gold Base Predictions
fold_files = ["concat_fold_3.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


# Gold Base Predictions
fold_files = ["concat_fold_4.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


# Gold Base Predictions
fold_files = ["concat_fold_5.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/concat_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

concat_fold_1.test.predictions.base.csv
4.0
[4.]
concat_fold_2.test.predictions.base.csv
3.5666666666666664
[3.56666667]
concat_fold_3.test.predictions.base.csv
2.2666666666666666
[2.26666667]
concat_fold_4.test.predictions.base.csv
2.6166666666666667
[2.61666667]
concat_fold_5.test.predictions.base.csv
2.4
[2.4]


In [270]:
# Gold Base Predictions
fold_files = ["gold_fold_1.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

# Gold Base Predictions
fold_files = ["gold_fold_2.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_2/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


# Gold Base Predictions
fold_files = ["gold_fold_3.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

# Gold Base Predictions
fold_files = ["gold_fold_4.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


# Gold Base Predictions
fold_files = ["gold_fold_5.test.predictions.base.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/gold_fold_3/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

gold_fold_1.test.predictions.base.csv
56.89999999999999
[56.9]
gold_fold_2.test.predictions.base.csv
49.016666666666666
[49.01666667]
gold_fold_3.test.predictions.base.csv
53.75
[53.75]
gold_fold_4.test.predictions.base.csv
58.45
[58.45]
gold_fold_5.test.predictions.base.csv
54.6
[54.6]


In [482]:
"New Dec 29 Run"

fold_files = ["mml_pgg_off_sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]
fold_files += ["mml-pgg-off-sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(107, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.1.dev.predictions.step.1300.csv
64.66666666666666
[57.66666667 60.         62.66666667 60.         62.33333333 58.66666667
 62.         59.33333333 58.         63.33333333 57.66666667 64.33333333
 64.66666667 60.66666667 56.         57.33333333 61.33333333 59.33333333
 58.         61.         57.         59.66666667 61.66666667 56.66666667
 61.33333333 61.66666667 58.33333333 59.66666667 63.33333333 60.66666667
 58.66666667 59.66666667 61.         60.66666667 62.66666667 60.
 61.66666667 62.         61.66666667 61.         61.66666667 60.
 59.33333333 57.66666667 60.33333333 61.         61.33333333 59.
 58.66666667 60.66666667 59.         57.33333333 58.33333333 55.66666667
 61.         60.66666667 57.33333333 60.         60.         60.
 61.66666667 61.66666667 59.         59.66666667 61.33333333 55.33333333
 60.         55.33333333 57.33333333 53.33333333 54.66666667 58.33333333
 57.66666667 60.33333333 54.         55.         54.66666667 55.66666667
 53.        

In [492]:
"New Dec 29 Run"

fold_files = ["mml-pgg-on-sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 250, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-pgg-on-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-on-sim.fold.1.dev.predictions.step.6200.csv
65.66666666666666
[51.33333333 55.66666667 53.66666667 60.         59.33333333 60.33333333
 59.33333333 62.         61.         62.33333333 58.33333333 60.33333333
 60.66666667 60.         60.         62.33333333 62.         62.33333333
 63.         57.33333333 61.66666667 63.33333333 64.66666667 61.66666667
 58.66666667 62.33333333 61.33333333 61.66666667 60.66666667 60.66666667
 59.         59.         58.33333333 57.66666667 61.         59.66666667
 55.33333333 55.         54.33333333 58.         57.66666667 58.66666667
 59.66666667 60.33333333 61.         60.66666667 61.66666667 63.33333333
 62.33333333 60.33333333 62.66666667 62.         59.66666667 60.66666667
 64.         65.         65.         65.         63.         63.
 61.66666667 65.66666667 59.33333333 61.33333333 63.66666667 63.33333333
 57.66666667 63.         62.66666667 61.         62.         61.66666667
 61.66666667 63.         57.         58.         57.         5

In [496]:
"New Dec 29 Run"

fold_files = ["mml-pgg-on-sim.fold.1.test.predictions.step.6200.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-pgg-on-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-on-sim.fold.1.test.predictions.step.6200.csv
71.26666666666667
[71.26666667]


In [493]:
"New Dec 29 Run"

fold_files = ["mml-mml-on-sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 250, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-mml-on-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-mml-on-sim.fold.1.dev.predictions.step.2300.csv
69.66666666666667
[58.33333333 57.66666667 61.66666667 63.33333333 64.33333333 62.33333333
 63.66666667 63.33333333 61.66666667 61.66666667 60.66666667 62.66666667
 60.         63.66666667 60.33333333 62.66666667 66.         69.33333333
 68.66666667 64.33333333 65.         67.66666667 69.66666667 66.33333333
 66.         67.         67.33333333 67.         68.         66.66666667
 65.66666667 62.         60.66666667 62.66666667 63.         64.33333333
 64.         62.66666667 64.66666667 62.33333333 64.66666667 64.33333333
 63.66666667 63.33333333 66.33333333 65.33333333 66.33333333 66.
 66.66666667 61.         57.66666667 60.         59.33333333 58.66666667
 64.         62.33333333 65.66666667 62.66666667 60.33333333 62.33333333
 62.33333333 66.         66.66666667 62.66666667 62.33333333 64.66666667
 61.         58.33333333 64.33333333 60.66666667 62.33333333 59.33333333
 59.         63.66666667 61.66666667 62.         62.         6

In [497]:
"New Dec 29 Run"

fold_files = ["mml-mml-on-sim.fold.1.test.predictions.step.2300.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-mml-on-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-mml-on-sim.fold.1.test.predictions.step.2300.csv
72.25
[72.25]


In [494]:
"New Dec 29 Run"

fold_files = ["mml-mml-off-sim.fold.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 250, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-mml-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-mml-off-sim.fold.1.dev.predictions.step.1600.csv
64.66666666666666
[51.33333333 54.66666667 49.66666667 59.33333333 57.33333333 59.33333333
 55.         57.66666667 60.66666667 62.33333333 58.33333333 61.
 58.66666667 61.33333333 63.         64.66666667 63.33333333 62.33333333
 61.33333333 63.66666667 60.         60.66666667 62.66666667 63.
 58.         59.66666667 61.66666667 62.66666667 62.33333333 59.33333333
 58.33333333 61.         57.33333333 59.         61.33333333 61.33333333
 60.33333333 57.66666667 59.33333333 61.33333333 56.66666667 61.33333333
 58.         58.         61.         58.66666667 60.33333333 62.33333333
 56.         58.33333333 54.66666667 58.66666667 57.66666667 58.
 57.33333333 59.         59.         60.         59.66666667 60.66666667
 57.33333333 61.33333333 58.33333333 60.         57.         59.
 60.33333333 57.66666667 57.66666667 57.33333333 57.66666667 60.
 57.33333333 58.33333333 56.         51.         54.         55.
 54.66666667 53.66666667 58.

In [498]:
"New Dec 29 Run"

fold_files = ["mml-mml-off-sim.fold.1.test.predictions.step.1600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-mml-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-mml-off-sim.fold.1.test.predictions.step.1600.csv
74.95
[74.95]


In [481]:
"New Dec 29 Run"
fold_files = ["mml_pgg_off_sim.fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]
fold_files += ["mml-pgg-off-sim.fold.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(107, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_2/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.2.dev.predictions.step.11000.csv
73.66666666666667
[64.33333333 64.66666667 70.33333333 68.33333333 65.66666667 70.66666667
 70.         70.66666667 71.66666667 70.66666667 71.33333333 69.66666667
 65.66666667 69.         67.         69.66666667 68.66666667 69.
 70.33333333 70.         65.33333333 66.66666667 67.66666667 68.
 71.         67.33333333 66.66666667 69.66666667 68.66666667 70.33333333
 68.33333333 70.33333333 70.         71.         68.33333333 69.
 70.66666667 68.66666667 66.66666667 69.66666667 68.66666667 69.33333333
 69.         69.66666667 69.         69.66666667 69.33333333 69.
 69.66666667 69.33333333 70.         69.         68.33333333 69.33333333
 69.66666667 68.33333333 69.         71.         68.66666667 70.66666667
 70.33333333 72.33333333 69.         73.66666667 71.         69.66666667
 71.         68.66666667 67.33333333 67.         66.         69.33333333
 68.         72.33333333 69.66666667 69.         70.         72.33333333
 70.3333333

In [483]:
"New Dec 29 Run"
fold_files = ["mml_pgg_off_sim.fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]
fold_files += ["mml-pgg-off-sim.fold.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(107, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_3/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.3.dev.predictions.step.22600.csv
67.66666666666666
[58.66666667 60.66666667 61.33333333 58.66666667 59.         59.33333333
 57.66666667 55.66666667 57.33333333 59.         60.         60.
 61.66666667 61.66666667 63.66666667 57.66666667 59.33333333 59.66666667
 62.66666667 59.66666667 62.         56.         56.33333333 60.
 59.33333333 58.66666667 58.66666667 61.66666667 60.33333333 59.33333333
 58.66666667 61.         59.         60.         61.33333333 59.33333333
 58.33333333 59.         59.66666667 57.33333333 59.33333333 58.
 60.         57.66666667 60.66666667 59.         58.66666667 56.66666667
 57.         59.33333333 57.66666667 58.         56.66666667 56.33333333
 57.66666667 58.         57.33333333 58.33333333 57.         56.66666667
 58.         59.66666667 58.33333333 59.         56.         56.
 53.66666667 57.66666667 57.         54.33333333 58.         58.33333333
 55.66666667 56.         54.         56.33333333 56.         57.33333333
 60.       

In [484]:
"New Dec 29 Run"
fold_files = ["mml_pgg_off_sim.fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]
fold_files += ["mml-pgg-off-sim.fold.4.dev.predictions.step.{}.csv".format(100 * i) for i in range(107, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_4/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.4.dev.predictions.step.12300.csv
76.33333333333333
[65.33333333 68.         67.         69.         66.33333333 66.66666667
 67.         65.33333333 70.         64.66666667 66.66666667 67.66666667
 64.66666667 65.66666667 68.         70.66666667 69.66666667 64.
 70.         68.66666667 69.         63.         66.66666667 68.
 67.66666667 68.66666667 69.         72.         70.33333333 71.
 69.         62.         69.33333333 69.         68.33333333 72.33333333
 69.66666667 72.         73.66666667 72.33333333 75.         69.66666667
 73.66666667 76.         70.         68.         68.33333333 71.66666667
 67.         69.         68.33333333 68.33333333 68.         69.
 68.66666667 69.66666667 68.66666667 69.         67.66666667 67.33333333
 67.33333333 67.66666667 70.         64.33333333 66.33333333 66.66666667
 68.33333333 69.66666667 70.         66.33333333 69.33333333 69.66666667
 71.66666667 73.         71.66666667 70.66666667 71.66666667 67.33333333
 70.       

In [478]:
"New Dec 29 Run"
fold_files = ["mml_pgg_off_sim.fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 107, 1)]
fold_files += ["mml-pgg-off-sim.fold.5.dev.predictions.step.{}.csv".format(100 * i) for i in range(107, 262, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_5/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.5.dev.predictions.step.4100.csv
79.66666666666666
[67.         69.         70.33333333 67.66666667 73.66666667 74.
 73.33333333 72.33333333 73.66666667 76.66666667 74.         76.33333333
 75.33333333 75.         73.33333333 73.66666667 74.33333333 74.
 72.         69.66666667 73.33333333 73.66666667 75.66666667 71.66666667
 72.33333333 74.66666667 75.         74.66666667 73.66666667 72.33333333
 71.         75.66666667 73.66666667 76.         75.33333333 75.
 76.         75.         74.66666667 74.33333333 79.66666667 77.66666667
 79.         76.66666667 77.33333333 73.66666667 75.33333333 74.66666667
 74.66666667 73.         77.         75.         76.66666667 75.
 73.33333333 70.         74.66666667 73.66666667 71.66666667 74.
 71.         77.33333333 74.66666667 76.66666667 76.33333333 75.
 76.33333333 75.         72.         73.         76.         75.33333333
 75.33333333 76.33333333 74.         70.66666667 75.         76.
 76.66666667 77.         74.        

In [447]:
"New Dec 29 Run"

fold_files = ["mml_pgg_off_sim.fold.5.test.predictions.step.4100.csv", "base-base.fold.5.test.predictions.step.4100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_5/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.4"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.5.test.predictions.step.4100.csv
67.68333333333332
[67.68333333 35.75      ]


In [486]:
"New Dec 29 Run"

fold_files = ["mml-pgg-off-sim.fold.4.test.predictions.step.12300.csv", "base-base.fold.4.test.predictions.step.4100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_4/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.3"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.4.test.predictions.step.12300.csv
71.2
[71.2        32.41666667]


In [488]:
"New Dec 29 Run"

fold_files = ["mml-pgg-off-sim.fold.3.test.predictions.step.22600.csv", "base-base.fold.3.test.predictions.step.4100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_3/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.2"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.3.test.predictions.step.22600.csv
67.2
[67.2        34.58333333]


In [490]:
"New Dec 29 Run"

fold_files = ["mml-pgg-off-sim.fold.2.test.predictions.step.11000.csv", "base-base.fold.2.test.predictions.step.4100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_2/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.1"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml-pgg-off-sim.fold.2.test.predictions.step.11000.csv
59.03333333333334
[59.03333333 26.03333333]


In [444]:
"New Dec 29 Run"

fold_files = ["mml_pgg_off_sim.fold.1.test.predictions.step.1300.csv", "base-base.fold.1.test.predictions.step.4100.csv"]
fold_0_path = "~/t5-small-exps/naacl-2022/dec_29/fold_1/mml-pgg-off-sim/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

mml_pgg_off_sim.fold.1.test.predictions.step.1300.csv
75.9
[75.9]


In [None]:
FewRL Data.

In [506]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

"mml_pgg_off_sim.epoch.2.run.1.dev.predictions.step.200.csv"

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.100.csv
0.42514285714285716

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.200.csv
0.45971428571428574

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.300.csv
0.4742857142857143

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.400.csv
0.4665714285714286

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.500.csv
0.5025714285714286

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.600.csv
0.48857142857142855

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.700.csv
0.5028571428571429

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.800.csv
0.49685714285714283

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.900.csv
0.48428571428571426

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.1000.csv
0.5137142857142857

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.1100.csv
0.5134285714285715

#
mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.1200.csv
0.5297142857142857

#
mml_pgg_off_sim.ep

0.6208571428571429

#
mml_pgg_off_sim.epoch.3.run.1.dev.predictions.step.2500.csv
0.6151428571428571

#
mml_pgg_off_sim.epoch.3.run.1.dev.predictions.step.2600.csv
0.6254285714285714



In [76]:
fold_files = ["mml_pgg_off_sim.epoch.2.run.1.test.predictions.step.200.csv", "base-base.run.1.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.epoch.2.run.1.test.predictions.step.200.csv
0.5387619047619048

#
base-base.run.1.test.predictions.step.2600.csv
0.3498095238095238



In [507]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")
    
"mml_pgg_off_sim.epoch.1.run.2.dev.predictions.step.900.csv"

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.100.csv
0.3802857142857143

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.200.csv
0.43657142857142855

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.300.csv
0.44085714285714284

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.400.csv
0.47485714285714287

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.500.csv
0.47485714285714287

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.600.csv
0.4602857142857143

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.700.csv
0.44771428571428573

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.800.csv
0.48942857142857144

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.900.csv
0.4665714285714286

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.1000.csv
0.5002857142857143

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.1100.csv
0.49228571428571427

#
mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.1200.csv
0.48942857142857144

#
mml_pgg_off_sim

In [515]:
fold_files = ["mml_pgg_off_sim.epoch.1.run.2.test.predictions.step.900.csv", "base-base.run.2.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.epoch.1.run.2.test.predictions.step.900.csv
0.5092380952380953

#
base-base.run.2.test.predictions.step.2600.csv
0.33676190476190476



In [516]:
fold_files = ["mml_pgg_off_sim.epoch.3.run.3.test.predictions.step.2100.csv", "base-base.run.3.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
mml_pgg_off_sim.epoch.3.run.3.test.predictions.step.2100.csv
0.5785714285714286

#
base-base.run.3.test.predictions.step.2600.csv
0.35714285714285715



In [508]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

"mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.2100.csv"

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.100.csv
0.45971428571428574

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.200.csv
0.44685714285714284

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.300.csv
0.48342857142857143

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.400.csv
0.45

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.500.csv
0.5034285714285714

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.600.csv
0.492

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.700.csv
0.5114285714285715

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.800.csv
0.5194285714285715

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.900.csv
0.5217142857142857

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.1000.csv
0.5288571428571428

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.1100.csv
0.5008571428571429

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.1200.csv
0.5134285714285715

#
mml_pgg_off_sim.epoch.0.run.3.dev.predictions.s

In [38]:
fold_files = ["mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml-pgg-off-sim.run.3.dev.predictions.step.2600.csv
0.18342857142857144


In [50]:
fold_files = ["mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml-pgg-off-sim.run.3.dev.predictions.step.2600.csv
0.18342857142857144


In [51]:
fold_files = ["mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_5/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_1300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml-pgg-off-sim.run.3.dev.predictions.step.2600.csv
0.18542857142857141


In [49]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_111.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.2100.csv
0.6245714285714286


In [56]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_12321.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml_pgg_off_sim.epoch.2.run.1.dev.predictions.step.200.csv
0.6311428571428571


In [59]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_943.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml_pgg_off_sim.epoch.1.run.2.dev.predictions.step.900.csv
0.5108571428571429


In [60]:
fold_files = ["mml_pgg_off_sim.epoch.0.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.1.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.2.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_111.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml_pgg_off_sim.epoch.3.run.3.dev.predictions.step.2100.csv
0.6245714285714286


In [64]:
fold_files = ["mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml-pgg-off-sim.run.3.dev.predictions.step.2300.csv
0.7231428571428572


In [65]:
fold_files = ["mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_5/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_1300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

mml-pgg-off-sim.run.3.dev.predictions.step.2400.csv
0.64


In [71]:
fold_files = ["mml-pgg-off-sim.run.3.test.predictions.step.2300.csv", "base-base.run.0.test.predictions.step.0.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print(num)
    print(file)
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

0.5196190476190476
mml-pgg-off-sim.run.3.test.predictions.step.2300.csv
0.18676190476190477
base-base.run.0.test.predictions.step.0.csv
mml-pgg-off-sim.run.3.test.predictions.step.2300.csv
0.5196190476190476


In [75]:
fold_files = ["mml-pgg-off-sim.run.3.test.predictions.step.2400.csv", "base-base.run.0.test.predictions.step.0.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_5/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_1300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print(num)
    print(file)
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

0.5898095238095238
mml-pgg-off-sim.run.3.test.predictions.step.2400.csv
0.20438095238095239
base-base.run.0.test.predictions.step.0.csv
mml-pgg-off-sim.run.3.test.predictions.step.2400.csv
0.5898095238095238


In [81]:
fold_files = ["base-base.run.0.test.predictions.step.0.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_12321.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print(num)
    print(file)
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

0.19028571428571428
base-base.run.0.test.predictions.step.0.csv
base-base.run.0.test.predictions.step.0.csv
0.19028571428571428


In [82]:
fold_files = ["base-base.run.0.test.predictions.step.0.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_943.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print(num)
    print(file)
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

0.1636190476190476
base-base.run.0.test.predictions.step.0.csv
base-base.run.0.test.predictions.step.0.csv
0.1636190476190476


In [83]:
fold_files = ["base-base.run.0.test.predictions.step.0.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_111.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print(num)
    print(file)
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

0.17095238095238094
base-base.run.0.test.predictions.step.0.csv
base-base.run.0.test.predictions.step.0.csv
0.17095238095238094


In [54]:
# concat
fold_files = ["concat.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_5/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_1300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

concat.run.3.dev.predictions.step.1700.csv
0.6397142857142857


In [24]:
# concat
fold_files = ["concat.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_300.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

concat.run.2.dev.predictions.step.1800.csv
0.7205714285714285


In [61]:
# concat
fold_files = ["concat.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_111.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

concat.run.1.dev.predictions.step.2600.csv
0.6211428571428571


In [58]:
# concat
fold_files = ["concat.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_943.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

concat.run.2.dev.predictions.step.1700.csv
0.5048571428571429


In [57]:
# concat
fold_files = ["concat.run.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.1.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.2.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]
fold_files += ["concat.run.3.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 27, 1)]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/val_data_12321.csv"

max_file = ""
max_num = 0.0
for file in fold_files:
    num = float(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    if num >= max_num:
        max_num = num
        max_file = file

print(max_file)
print(max_num)

concat.run.3.dev.predictions.step.2300.csv
0.5762857142857143


In [30]:
fold_files = ["concat.run.3.test.predictions.step.1700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_5/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_1300.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.3.test.predictions.step.1700.csv
0.5647619047619048



In [31]:
fold_files = ["concat.run.2.test.predictions.step.1800.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_4/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_300.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.2.test.predictions.step.1800.csv
0.49685714285714283



In [32]:
fold_files = ["concat.run.1.test.predictions.step.2600.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_3/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_111.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.1.test.predictions.step.2600.csv
0.5564761904761905



In [33]:
fold_files = ["concat.run.2.test.predictions.step.1700.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_2/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_943.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.2.test.predictions.step.1700.csv
0.5381904761904762



In [34]:
fold_files = ["concat.run.3.test.predictions.step.2300.csv"]

fold_0_path = "~/t5-small-exps/naacl-2022/fewrl/concat_run_1/"
fold_0_gold_file = "~/codes/dreamscape-qa/fewrl_data/test_data_12321.csv"

for file in fold_files:
    print("#")
    print(file)
    print(fewrl_eval_zero_re(fold_0_gold_file, fold_0_path + file))
    print("")

#
concat.run.3.test.predictions.step.2300.csv
0.5598095238095238

