In [3]:
def compute_macro_PRF(predicted_idx, gold_idx, i=-1, empty_label=None):
    '''
    This evaluation function follows work from Sorokin and Gurevych(https://www.aclweb.org/anthology/D17-1188.pdf)
    code borrowed from the following link:
    https://github.com/UKPLab/emnlp2017-relation-extraction/blob/master/relation_extraction/evaluation/metrics.py
    '''
    if i == -1:
        i = len(predicted_idx)

    complete_rel_set = set(gold_idx) - {empty_label}
    avg_prec = 0.0
    avg_rec = 0.0

    for r in complete_rel_set:
        r_indices = (predicted_idx[:i] == r)
        tp = len((predicted_idx[:i][r_indices] == gold_idx[:i][r_indices]).nonzero()[0])
        tp_fp = len(r_indices.nonzero()[0])
        tp_fn = len((gold_idx == r).nonzero()[0])
        prec = (tp / tp_fp) if tp_fp > 0 else 0
        rec = tp / tp_fn
        #print(id_to_labels[r], prec, rec, 2.0 * prec * rec / (prec + rec))
        avg_prec += prec
        avg_rec += rec
    f1 = 0
    avg_prec = avg_prec / len(set(predicted_idx[:i]))
    avg_rec = avg_rec / len(complete_rel_set)
    if (avg_rec+avg_prec) > 0:
        f1 = 2.0 * avg_prec * avg_rec / (avg_prec + avg_rec)

    return avg_prec, avg_rec, f1

In [3]:
#RelationPrompt Results.

results_no_added_data = {
    "12321": {
        "precision": 0.563572016705731,
        "recall": 0.3225714285714286,
        "score": 0.41029978047129695,

    },
    "111": {
        "precision": 0.3782919141331947,
        "recall": 0.2439047619047619,
        "score": 0.2965853171530759,
    },
    "943": {
        "precision": 0.5670379080898744,
        "recall": 0.424,
        "score": 0.48519652188380946,
    },
    "300": {
        "precision": 0.4160565573101449,
        "recall": 0.3947619047619048,
        "score": 0.4051295986347155,
    },
    "1300": {
        "precision": 0.46327796415447897,
        "recall": 0.4160952380952381,
        "score": 0.43842080769796876,
    }
}

results_with_added_data = {
    "12321": {
        "precision": 0.6335737530573299,
        "recall": 0.6580952380952382,
        "score": 0.6456017334551125,
    },
    "943": {
        "precision": 0.6568560306481417,
        "recall": 0.6818095238095239,
        "score": 0.6691002035217998,
    },
    "111": {
        "precision": 0.5521802938349817,
        "recall": 0.534952380952381,
        "score": 0.5434298310641532,
    },
    "300": {
        "precision": 0.42557058135412607,
        "recall": 0.3803809523809524,
        "score": 0.4017088776805001,
    },
    "1300": {
        "precision": 0.5140686017779429,
        "recall": 0.5191428571428571,
        "score": 0.5165932692484626,
    }
}

no_added_avg_f1 = 0.0
no_added_avg_p = 0.0
no_added_avg_r = 0.0
for seed, scores in results_no_added_data.items():
    no_added_avg_r += scores["recall"]
    no_added_avg_p += scores["precision"]
    no_added_avg_f1 += scores["score"]

no_added_avg_f1 = no_added_avg_f1 / len(results_no_added_data.keys())
no_added_avg_p = no_added_avg_p / len(results_no_added_data.keys())
no_added_avg_r = no_added_avg_r / len(results_no_added_data.keys())
print("RelationPrompt p: {}, r: {}, f1: {}".format(no_added_avg_p, no_added_avg_r, no_added_avg_f1))

with_added_avg_f1 = 0.0
with_added_avg_p = 0.0
with_added_avg_r = 0.0
for seed, scores in results_with_added_data.items():
    with_added_avg_r += scores["recall"]
    with_added_avg_p += scores["precision"]
    with_added_avg_f1 += scores["score"]

with_added_avg_f1 = with_added_avg_f1 / len(results_with_added_data.keys())
with_added_avg_p = with_added_avg_p / len(results_with_added_data.keys())
with_added_avg_r = with_added_avg_r / len(results_with_added_data.keys())
print("RelationPrompt with added data p: {}, r: {}, f1: {}".format(with_added_avg_p, with_added_avg_r, with_added_avg_f1))


RelationPrompt p: 0.4776472720786848, r: 0.3602666666666667, f1: 0.4071264051681733
RelationPrompt with added data p: 0.5564498521345045, r: 0.5548761904761905, f1: 0.5552867829940056


In [None]:
# RelationPrompt Results on the test data without sentences having multiple triplets.

results_no_added_data = {
    "12321": {
       "precision": 0.5288569302012683,
        "recall": 0.4371477910978992,
        "score": 0.4786490867940637,
    },
    "111": {
       "precision": 0.5514520480423039,
        "recall": 0.45100775119994635,
        "score": 0.4961977493165534,
    },
    "943": {
       "precision": 0.583743934277585,
        "recall": 0.48417201001017734,
        "score": 0.5293159550659702,
    },
    "300": {
        "precision": 0.5138102385350568,
        "recall": 0.4780500550525526,
        "score": 0.4952855042312534,
    },
    "1300": {
        "precision": 0.5446269710553917,
        "recall": 0.4770284827169461,
        "score": 0.5085913782185381,
    }
}

results_with_added_data = {
    "12321": {
        "precision": 0.6134127769859357,
        "recall": 0.5869961902111368,
        "score": 0.5999138176355603,   
    },
    "943": {
       "precision": 0.6887865911137259,
        "recall": 0.6922624338057392,
        "score": 0.6905201384360007,
    },
    "111": {
        "precision": 0.6750761729608312,
        "recall": 0.7013992255134261,
        "score": 0.6879860045477596,
    },
    "300": {
        "precision": 0.5140957915191029,
        "recall": 0.5013268888307134,
        "score": 0.5076310559351425,
    },
    "1300": {
        "precision": 0.5696408693992706,
        "recall": 0.6290561164980496,
        "score": 0.5978759892094577,
    }
}

no_added_avg_f1 = 0.0
no_added_avg_p = 0.0
no_added_avg_r = 0.0
for seed, scores in results_no_added_data.items():
    no_added_avg_r += scores["recall"]
    no_added_avg_p += scores["precision"]
    no_added_avg_f1 += scores["score"]

no_added_avg_f1 = no_added_avg_f1 / len(results_no_added_data.keys())
no_added_avg_p = no_added_avg_p / len(results_no_added_data.keys())
no_added_avg_r = no_added_avg_r / len(results_no_added_data.keys())
print("RelationPrompt p: {}, r: {}, f1: {}".format(no_added_avg_p, no_added_avg_r, no_added_avg_f1))

with_added_avg_f1 = 0.0
with_added_avg_p = 0.0
with_added_avg_r = 0.0
for seed, scores in results_with_added_data.items():
    with_added_avg_r += scores["recall"]
    with_added_avg_p += scores["precision"]
    with_added_avg_f1 += scores["score"]

with_added_avg_f1 = with_added_avg_f1 / len(results_with_added_data.keys())
with_added_avg_p = with_added_avg_p / len(results_with_added_data.keys())
with_added_avg_r = with_added_avg_r / len(results_with_added_data.keys())
print("RelationPrompt with added data p: {}, r: {}, f1: {}".format(with_added_avg_p, with_added_avg_r, with_added_avg_f1))


In [2]:
# RelationPrompt Results on the test data with negs data.

results_no_added_data = {
    "12321": {
       "precision": 0.459251382294596,
        "recall": 0.11276190476190477,
        "score": 0.18106593606788426,
    },
    "111": {
       "precision": 0.3305721717039897,
        "recall": 0.17371428571428568,
        "score": 0.22774797078061565,
    },
    "943": {
       "precision": 0.48672764408023866,
        "recall": 0.25685714285714284,
        "score": 0.3362615110052447,
    },
    "300": {
       "precision": 0.5159171136238571,                                                                                               
        "recall": 0.27723809523809523,                                                                                                 
        "score": 0.3606655451132733,         
    },
    "1300": {
        "precision": 0.5479954101203502,
        "recall": 0.22552380952380954,
        "score": 0.3195421893944839,
    }
}

results_with_added_data = {
    "12321": {
        "precision": 0.5501224678769095,
        "recall": 0.5651428571428572,
        "score": 0.5575315152363876, 
    },
    "943": {
       "precision": 0.543395939080254,
        "recall": 0.4959047619047619,
        "score": 0.518565288244702,
    },
    "111": {
       "precision": 0.4962629947119975,
        "recall": 0.523142857142857,
        "score": 0.5093485395939629,
    },
    "300": {
        "precision": 0.40499570471570406,
         "recall": 0.3205714285714285,
        "score": 0.35787192023932907,
    },
    "1300": {
        "precision": 0.49916096086203887,
        "recall": 0.5193333333333333,
        "score": 0.5090473793555202,
    }
}

no_added_avg_f1 = 0.0
no_added_avg_p = 0.0
no_added_avg_r = 0.0
for seed, scores in results_no_added_data.items():
    no_added_avg_r += scores["recall"]
    no_added_avg_p += scores["precision"]
    no_added_avg_f1 += scores["score"]

no_added_avg_f1 = no_added_avg_f1 / len(results_no_added_data.keys())
no_added_avg_p = no_added_avg_p / len(results_no_added_data.keys())
no_added_avg_r = no_added_avg_r / len(results_no_added_data.keys())
print("RelationPrompt p: {}, r: {}, f1: {}".format(no_added_avg_p, no_added_avg_r, no_added_avg_f1))

with_added_avg_f1 = 0.0
with_added_avg_p = 0.0
with_added_avg_r = 0.0
for seed, scores in results_with_added_data.items():
    with_added_avg_r += scores["recall"]
    with_added_avg_p += scores["precision"]
    with_added_avg_f1 += scores["score"]

with_added_avg_f1 = with_added_avg_f1 / len(results_with_added_data.keys())
with_added_avg_p = with_added_avg_p / len(results_with_added_data.keys())
with_added_avg_r = with_added_avg_r / len(results_with_added_data.keys())
print("RelationPrompt with added data p: {}, r: {}, f1: {}".format(with_added_avg_p, with_added_avg_r, with_added_avg_f1))


RelationPrompt p: 0.46809274436460635, r: 0.20921904761904758, f1: 0.2850566304723004
RelationPrompt with added data p: 0.49878761344938083, r: 0.48481904761904754, f1: 0.4904729285339803


In [3]:
# Dev prediction for the model on the fewrel dataset using the concat model without the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/val_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/val_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/val_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/val_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/val_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/val_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/val_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-1/fewrel/concat_run_{}/relation.concat.run.{}.epoch.0.dev.predictions.step.{}.csv".format(seed, seed, step * 100) for step in range(1, 106, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-1/fewrel/concat_run_12321/relation.concat.run.12321.epoch.0.dev.predictions.step.9900.csv 0.6225757946913466
943 ~/sep-1/fewrel/concat_run_943/relation.concat.run.943.epoch.0.dev.predictions.step.1300.csv 0.5099570516870796
111 ~/sep-1/fewrel/concat_run_111/relation.concat.run.111.epoch.0.dev.predictions.step.5000.csv 0.5992184520328142
300 ~/sep-1/fewrel/concat_run_300/relation.concat.run.300.epoch.0.dev.predictions.step.5500.csv 0.7290633687525613
1300 ~/sep-1/fewrel/concat_run_1300/relation.concat.run.1300.epoch.0.dev.predictions.step.1300.csv 0.6430064368535299


In [2]:
# Dev prediction for the model on the wikizsl dataset using the concat model without the negative examples.
from re import I
import pandas as pd
import numpy as np
import json

gold_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data/val_data_12321.csv.sampled.csv",
    943: "~/codes/QA-ZRE/wikizsl_data/val_data_943.csv.sampled.csv",
    111: "~/codes/QA-ZRE/wikizsl_data/val_data_111.csv.sampled.csv",
    300: "~/codes/QA-ZRE/wikizsl_data/val_data_300.csv.sampled.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data/val_data_1300.csv.sampled.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/wikizsl_data/val_ids_943.csv",
    111: "~/codes/QA-ZRE/wikizsl_data/val_ids_111.csv",
    300: "~/codes/QA-ZRE/wikizsl_data/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-28/wikizsl/concat_run_{}/relation.concat.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 48, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    gold_entity_relations = df["entity_relations"].tolist()
    gold_relations = [row.split("<SEP>")[1].strip() for row in gold_entity_relations][:5]

    label_to_id = {}
    with open("./relation_descriptions.json", "r") as fd:
        re_desc_data = json.load(fd)
        for row in re_desc_data:
            re_label = row["relation_label"]
            re_id = row["relation_id"]
            label_to_id[re_label] = re_id

    ids = {label_to_id[rel_label]: i for i, rel_label in enumerate(gold_relations)}

    # ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}

    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-28/wikizsl/concat_run_12321/relation.concat.run.epoch.0.dev.predictions.step.1700.csv 0.4742412814446165
943 ~/sep-28/wikizsl/concat_run_943/relation.concat.run.epoch.0.dev.predictions.step.4000.csv 0.6433634224082675
111 ~/sep-28/wikizsl/concat_run_111/relation.concat.run.epoch.0.dev.predictions.step.2000.csv 0.462240973169985
300 ~/sep-28/wikizsl/concat_run_300/relation.concat.run.epoch.0.dev.predictions.step.4700.csv 0.6777896183831733
1300 ~/sep-28/wikizsl/concat_run_1300/relation.concat.run.epoch.0.dev.predictions.step.4400.csv 0.48692508873389795


In [3]:
# Dev prediction for the model on the wikizsl dataset using the concat model with the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_12321.csv.sampled.csv",
    943: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_943.csv.sampled.csv",
    111: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_111.csv.sampled.csv",
    300: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_300.csv.sampled.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_1300.csv.sampled.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data_unks/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/wikizsl_data_unks/val_ids_943.csv",
    111: "~/codes/QA-ZRE/wikizsl_data_unks/val_ids_111.csv",
    300: "~/codes/QA-ZRE/wikizsl_data_unks/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data_unks/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-28/wikizsl/concat_run_{}_with_unks/relation.concat.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 94, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')

    gold_entity_relations = df["entity_relations"].tolist()
    gold_relations = [row.split("<SEP>")[1].strip() for row in gold_entity_relations][:5]

    label_to_id = {}
    with open("./relation_descriptions.json", "r") as fd:
        re_desc_data = json.load(fd)
        for row in re_desc_data:
            re_label = row["relation_label"]
            re_id = row["relation_id"]
            label_to_id[re_label] = re_id

    ids = {label_to_id[rel_label]: i for i, rel_label in enumerate(gold_relations)}

    #ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}

    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-28/wikizsl/concat_run_12321_with_unks/relation.concat.run.epoch.0.dev.predictions.step.8100.csv 0.7789526506917566
943 ~/sep-28/wikizsl/concat_run_943_with_unks/relation.concat.run.epoch.0.dev.predictions.step.3900.csv 0.7846069028486728
111 ~/sep-28/wikizsl/concat_run_111_with_unks/relation.concat.run.epoch.0.dev.predictions.step.7700.csv 0.5954701697186493
300 ~/sep-28/wikizsl/concat_run_300_with_unks/relation.concat.run.epoch.0.dev.predictions.step.8200.csv 0.8075851218740074
1300 ~/sep-28/wikizsl/concat_run_1300_with_unks/relation.concat.run.epoch.0.dev.predictions.step.4200.csv 0.710696564194337


In [6]:
# Dev prediction for the model on the fewrel dataset using the concat model with the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/val_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/val_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/val_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/val_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/val_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

avg_f1_dev = 0.0
for seed in seeds:
    predictions = ["~/sep-1/fewrel/concat_run_{}_with_unks/relation.concat.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 210, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    avg_f1_dev += max_f1
    print(seed, max_file, max_f1)

print(avg_f1_dev/5.0)

12321 ~/sep-1/fewrel/concat_run_12321_with_unks/relation.concat.run.epoch.0.dev.predictions.step.8600.csv 0.8560795626010362
943 ~/sep-1/fewrel/concat_run_943_with_unks/relation.concat.run.epoch.0.dev.predictions.step.3400.csv 0.7936263560920236
111 ~/sep-1/fewrel/concat_run_111_with_unks/relation.concat.run.epoch.0.dev.predictions.step.4600.csv 0.8865212316090652
300 ~/sep-1/fewrel/concat_run_300_with_unks/relation.concat.run.epoch.0.dev.predictions.step.6100.csv 0.8899964022223363
1300 ~/sep-1/fewrel/concat_run_1300_with_unks/relation.concat.run.epoch.0.dev.predictions.step.13700.csv 0.8507511193598049
0.8553949343768531


In [7]:
# Test prediction for the model on the fewrel dataset using the concat model without the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/test_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/test_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/test_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/test_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/test_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/test_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/test_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/test_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/test_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/test_ids_1300.csv",
}

prediction_files = {
    12321: "~/sep-1/fewrel/concat_run_12321/relation.concat.run.12321.epoch.0.test.predictions.step.9900.csv",
    943: "~/sep-1/fewrel/concat_run_943/relation.concat.run.943.epoch.0.test.predictions.step.1300.csv",
    111: "~/sep-1/fewrel/concat_run_111/relation.concat.run.111.epoch.0.test.predictions.step.5000.csv",
    300: "~/sep-1/fewrel/concat_run_300/relation.concat.run.300.epoch.0.test.predictions.step.5500.csv",
    1300: "~/sep-1/fewrel/concat_run_1300/relation.concat.run.1300.epoch.0.test.predictions.step.1300.csv",
}
seeds = [12321, 943, 111, 300, 1300]

avg_f1 = 0.0
avg_p = 0.0
avg_r = 0.0
for seed in seeds:
    predictions = [prediction_files[seed]]
    max_f1 = 0.0
    max_file = None
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in predictions:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 15)), axis=1)
        prec, rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        avg_f1 += f1
        avg_p += prec
        avg_r += rec
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

print(avg_f1/5.0)
print(avg_p/5.0)
print(avg_r/5.0)

12321 ~/sep-1/fewrel/concat_run_12321/relation.concat.run.12321.epoch.0.test.predictions.step.9900.csv 0.34701733359825704
943 ~/sep-1/fewrel/concat_run_943/relation.concat.run.943.epoch.0.test.predictions.step.1300.csv 0.3671554049998069
111 ~/sep-1/fewrel/concat_run_111/relation.concat.run.111.epoch.0.test.predictions.step.5000.csv 0.256033773197929
300 ~/sep-1/fewrel/concat_run_300/relation.concat.run.300.epoch.0.test.predictions.step.5500.csv 0.3014579520878535
1300 ~/sep-1/fewrel/concat_run_1300/relation.concat.run.1300.epoch.0.test.predictions.step.1300.csv 0.3067879888834918
0.31569049055346765
0.3082683125936674
0.32371428571428573


In [2]:
# Test prediction for the model on the fewrel dataset using the concat model with the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/test_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/test_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/test_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/test_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/test_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_1300.csv",
}

prediction_files = {
    12321: "~/sep-1/fewrel/concat_run_12321_with_unks/relation.concat.run.12321.epoch.0.test.predictions.step.8600.csv",
    943: "~/sep-1/fewrel/concat_run_943_with_unks/relation.concat.run.943.epoch.0.test.predictions.step.3400.csv",
    111: "~/sep-1/fewrel/concat_run_111_with_unks/relation.concat.run.111.epoch.0.test.predictions.step.4600.csv",
    300: "~/sep-1/fewrel/concat_run_300_with_unks/relation.concat.run.300.epoch.0.test.predictions.step.6100.csv",
    1300: "~/sep-1/fewrel/concat_run_1300_with_unks/relation.concat.run.1300.epoch.0.test.predictions.step.13700.csv",
}
seeds = [12321, 943, 111, 300, 1300]

avg_f1 = 0.0
avg_p = 0.0
avg_r = 0.0
for seed in seeds:
    predictions = [prediction_files[seed]]
    max_f1 = 0.0
    max_file = None
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in predictions:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 15)), axis=1)
        prec, rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        avg_f1 += f1
        avg_p += prec
        avg_r += rec
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

print("f1: ", avg_f1/5.0)
print("p: ", avg_p/5.0)
print("r: ", avg_r/5.0)

12321 ~/sep-1/fewrel/concat_run_12321_with_unks/relation.concat.run.12321.epoch.0.test.predictions.step.8600.csv 0.6744073305090348
943 ~/sep-1/fewrel/concat_run_943_with_unks/relation.concat.run.943.epoch.0.test.predictions.step.3400.csv 0.649950240610028
111 ~/sep-1/fewrel/concat_run_111_with_unks/relation.concat.run.111.epoch.0.test.predictions.step.4600.csv 0.5539922978879085
300 ~/sep-1/fewrel/concat_run_300_with_unks/relation.concat.run.300.epoch.0.test.predictions.step.6100.csv 0.4930803340711556
1300 ~/sep-1/fewrel/concat_run_1300_with_unks/relation.concat.run.1300.epoch.0.test.predictions.step.13700.csv 0.6530094576640766
f1:  0.6048879321484406
p:  0.6280573304427337
r:  0.5836190476190477


In [5]:
# Test prediction for the model on the wikizsl dataset using the concat model without the negative examples.
from re import I
import pandas as pd
import numpy as np
import json

gold_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data/test_data_12321.csv",
    943: "~/codes/QA-ZRE/wikizsl_data/test_data_943.csv",
    111: "~/codes/QA-ZRE/wikizsl_data/test_data_111.csv",
    300: "~/codes/QA-ZRE/wikizsl_data/test_data_300.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data/test_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data/test_ids_12321.csv",
    943: "~/codes/QA-ZRE/wikizsl_data/test_ids_943.csv",
    111: "~/codes/QA-ZRE/wikizsl_data/test_ids_111.csv",
    300: "~/codes/QA-ZRE/wikizsl_data/test_ids_300.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data/test_ids_1300.csv",
}

prediction_files = {
    12321: "~/sep-28/wikizsl/concat_run_12321/relation.concat.run.12321.epoch.0.test.predictions.step.1700.csv",
    943: "~/sep-28/wikizsl/concat_run_943/relation.concat.run.943.epoch.0.test.predictions.step.4000.csv",
    111: "~/sep-28/wikizsl/concat_run_111/relation.concat.run.111.epoch.0.test.predictions.step.2000.csv",
    300: "~/sep-28/wikizsl/concat_run_300/relation.concat.run.300.epoch.0.test.predictions.step.4700.csv",
    1300: "~/sep-28/wikizsl/concat_run_1300/relation.concat.run.1300.epoch.0.test.predictions.step.4400.csv",
}
seeds = [12321, 943, 111, 300, 1300]

avg_f1 = 0.0
avg_p = 0.0
avg_r = 0.0
for seed in seeds:
    predictions = [prediction_files[seed]]
    max_f1 = 0.0
    max_file = None
    df = pd.read_csv(gold_files[seed], sep=',')

    gold_entity_relations = df["entity_relations"].tolist()
    gold_relations = [row.split("<SEP>")[1].strip() for row in gold_entity_relations][:15]

    label_to_id = {}
    with open("./relation_descriptions.json", "r") as fd:
        re_desc_data = json.load(fd)
        for row in re_desc_data:
            re_label = row["relation_label"]
            re_id = row["relation_id"]
            label_to_id[re_label] = re_id

    ids = {label_to_id[rel_label]: i for i, rel_label in enumerate(gold_relations)}


    #ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in predictions:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 15)), axis=1)
        prec, rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        avg_f1 += f1
        avg_p += prec
        avg_r += rec
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

print("f1: ", avg_f1/5.0)
print("p: ", avg_p/5.0)
print("r: ", avg_r/5.0)

12321 ~/sep-28/wikizsl/concat_run_12321/relation.concat.run.12321.epoch.0.test.predictions.step.1700.csv 0.287491358493333
943 ~/sep-28/wikizsl/concat_run_943/relation.concat.run.943.epoch.0.test.predictions.step.4000.csv 0.33523016814229356
111 ~/sep-28/wikizsl/concat_run_111/relation.concat.run.111.epoch.0.test.predictions.step.2000.csv 0.25736755828754043
300 ~/sep-28/wikizsl/concat_run_300/relation.concat.run.300.epoch.0.test.predictions.step.4700.csv 0.46728152259303596
1300 ~/sep-28/wikizsl/concat_run_1300/relation.concat.run.1300.epoch.0.test.predictions.step.4400.csv 0.34425972150396533
f1:  0.3383260658040336
p:  0.3310861160390882
r:  0.3464293234111697


In [3]:
# Dev prediction for the model on the fewrel dataset using the offmml-g model without the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/val_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/val_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/val_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/val_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/val_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/val_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/val_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-1/fewrel/run_{}/relation.offmml-pgg.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 106, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 5, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-1/fewrel/run_12321/relation.offmml-pgg.run.epoch.0.dev.predictions.step.1800.csv 0.6498331456500441
943 ~/sep-1/fewrel/run_943/relation.offmml-pgg.run.epoch.0.dev.predictions.step.1900.csv 0.5659540894916784
111 ~/sep-1/fewrel/run_111/relation.offmml-pgg.run.epoch.0.dev.predictions.step.8800.csv 0.6217527243186586
300 ~/sep-1/fewrel/run_300/relation.offmml-pgg.run.epoch.0.dev.predictions.step.1900.csv 0.7017740927559872
1300 ~/sep-1/fewrel/run_1300/relation.offmml-pgg.run.epoch.0.dev.predictions.step.7900.csv 0.6402843822685743


In [6]:
# Dev prediction for the model on the wikizsl dataset using the offmml-g model without the negative examples.
from re import I
import pandas as pd
import numpy as np
import json

gold_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data/val_data_12321.csv.sampled.csv",
    943: "~/codes/QA-ZRE/wikizsl_data/val_data_943.csv.sampled.csv",
    111: "~/codes/QA-ZRE/wikizsl_data/val_data_111.csv.sampled.csv",
    300: "~/codes/QA-ZRE/wikizsl_data/val_data_300.csv.sampled.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data/val_data_1300.csv.sampled.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-28/wikizsl/run_{}/relation.offmml-pgg.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 46, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    gold_entity_relations = df["entity_relations"].tolist()
    gold_relations = [row.split("<SEP>")[1].strip() for row in gold_entity_relations][:5]

    label_to_id = {}
    with open("./relation_descriptions.json", "r") as fd:
        re_desc_data = json.load(fd)
        for row in re_desc_data:
            re_label = row["relation_label"]
            re_id = row["relation_id"]
            label_to_id[re_label] = re_id

    ids = {label_to_id[rel_label]: i for i, rel_label in enumerate(gold_relations)}

    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 5, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-28/wikizsl/run_12321/relation.offmml-pgg.run.epoch.0.dev.predictions.step.800.csv 0.4203692048623931
943 ~/sep-28/wikizsl/run_943/relation.offmml-pgg.run.epoch.0.dev.predictions.step.3000.csv 0.6375243450837228
111 ~/sep-28/wikizsl/run_111/relation.offmml-pgg.run.epoch.0.dev.predictions.step.500.csv 0.5169302461492905
300 ~/sep-28/wikizsl/run_300/relation.offmml-pgg.run.epoch.0.dev.predictions.step.1500.csv 0.6790189264600169
1300 ~/sep-28/wikizsl/run_1300/relation.offmml-pgg.run.epoch.0.dev.predictions.step.2400.csv 0.4979349584311499


In [8]:
# Dev prediction for the model on the wikizsl dataset using the offmml-g model with the negative examples.
from re import I
import pandas as pd
import numpy as np
import json

gold_files = {
    12321: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_12321.csv.sampled.csv",
    943: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_943.csv.sampled.csv",
    111: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_111.csv.sampled.csv",
    300: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_300.csv.sampled.csv",
    1300: "~/codes/QA-ZRE/wikizsl_data_unks/val_data_1300.csv.sampled.csv",
}

seeds = [12321, 943, 111, 300, 1300]

for seed in seeds:
    predictions = ["~/sep-28/wikizsl/run_{}_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 94, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    gold_entity_relations = df["entity_relations"].tolist()
    gold_relations = [row.split("<SEP>")[1].strip() for row in gold_entity_relations][:5]

    label_to_id = {}
    with open("./relation_descriptions.json", "r") as fd:
        re_desc_data = json.load(fd)
        for row in re_desc_data:
            re_label = row["relation_label"]
            re_id = row["relation_id"]
            label_to_id[re_label] = re_id

    ids = {label_to_id[rel_label]: i for i, rel_label in enumerate(gold_relations)}

    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 5, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

12321 ~/sep-28/wikizsl/run_12321_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.8900.csv 0.8487020087551942
943 ~/sep-28/wikizsl/run_943_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.3700.csv 0.7728171239157858
111 ~/sep-28/wikizsl/run_111_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.6900.csv 0.7598997604137664
300 ~/sep-28/wikizsl/run_300_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.600.csv 0.7626723708239196
1300 ~/sep-28/wikizsl/run_1300_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.4800.csv 0.7242507938718629


In [5]:
# Test prediction for the model on the fewrel dataset using the offmml-g model without the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/test_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/test_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/test_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/test_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/test_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data/test_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data/test_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data/test_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data/test_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data/test_ids_1300.csv",
}

prediction_files = {
    12321: "~/sep-1/fewrel/run_12321/relation.offmml-pgg.run.epoch.0.test.predictions.step.1800.csv",
    943: "~/sep-1/fewrel/run_943/relation.offmml-pgg.run.epoch.0.test.predictions.step.1900.csv",
    111: "~/sep-1/fewrel/run_111/relation.offmml-pgg.run.epoch.0.test.predictions.step.8800.csv",
    300: "~/sep-1/fewrel/run_300/relation.offmml-pgg.run.epoch.0.test.predictions.step.1900.csv",
    1300: "~/sep-1/fewrel/run_1300/relation.offmml-pgg.run.epoch.0.test.predictions.step.7900.csv",
}
seeds = [12321, 943, 111, 300, 1300]

avg_f1 = 0.0
avg_p = 0.0
avg_r = 0.0
for seed in seeds:
    predictions = [prediction_files[seed]]
    max_f1 = 0.0
    max_file = None
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in predictions:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 15, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        prec, rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        avg_f1 += f1
        avg_p += prec
        avg_r += rec
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

print("f:", avg_f1/5.0)
print("p:", avg_p/5.0)
print("r:", avg_r/5.0)

12321 ~/sep-1/fewrel/run_12321/relation.offmml-pgg.run.epoch.0.test.predictions.step.1800.csv 0.32358858387038925
943 ~/sep-1/fewrel/run_943/relation.offmml-pgg.run.epoch.0.test.predictions.step.1900.csv 0.3172183349253462
111 ~/sep-1/fewrel/run_111/relation.offmml-pgg.run.epoch.0.test.predictions.step.8800.csv 0.284561011851722
300 ~/sep-1/fewrel/run_300/relation.offmml-pgg.run.epoch.0.test.predictions.step.1900.csv 0.2525921583604101
1300 ~/sep-1/fewrel/run_1300/relation.offmml-pgg.run.epoch.0.test.predictions.step.7900.csv 0.3175353309188262
f: 0.2990990839853388
p: 0.2939819684227859
r: 0.3044952380952381


In [5]:
# Dev prediction for the model on the fewrel dataset using the offmml-g model with the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/val_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/val_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/val_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/val_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/val_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/val_ids_1300.csv",
}

seeds = [12321, 943, 111, 300, 1300]

avg_f1_dev = 0.0
for seed in seeds:
    predictions = ["~/sep-1/fewrel/run_{}_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.{}.csv".format(seed, step * 100) for step in range(1, 202, 1)]
    max_f1 = 0.0
    max_file = None
    prediction_files = predictions
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 5, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    avg_f1_dev += max_f1
    print(seed, max_file, max_f1)

print(avg_f1_dev / 5.0)

12321 ~/sep-1/fewrel/run_12321_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.7700.csv 0.8984147957805454
943 ~/sep-1/fewrel/run_943_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.5900.csv 0.8066627814635562
111 ~/sep-1/fewrel/run_111_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.11000.csv 0.9040280878278296
300 ~/sep-1/fewrel/run_300_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.10700.csv 0.912398403495852
1300 ~/sep-1/fewrel/run_1300_with_unks/relation.offmml-pgg.run.epoch.0.dev.predictions.step.15200.csv 0.8778187449916541
0.8798645627118875


In [3]:
# Test prediction for the model on the fewrel dataset using the offmml-g model with the negative examples.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/test_data_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/test_data_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/test_data_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/test_data_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/test_data_1300.csv",
}
id_files = {
    12321: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_12321.csv",
    943: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_943.csv",
    111: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_111.csv",
    300: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_300.csv",
    1300: "~/codes/QA-ZRE/fewrl_data_unks/test_ids_1300.csv",
}

prediction_files = {
    12321: "~/sep-1/fewrel/run_12321_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.7700.csv",
    943: "~/sep-1/fewrel/run_943_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.5900.csv",
    111: "~/sep-1/fewrel/run_111_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.11000.csv",
    300: "~/sep-1/fewrel/run_300_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.10700.csv",
    1300: "~/sep-1/fewrel/run_1300_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.15200.csv",
}
seeds = [12321, 943, 111, 300, 1300]

avg_f1 = 0.0
avg_p = 0.0
avg_r = 0.0
for seed in seeds:
    predictions = [prediction_files[seed]]
    max_f1 = 0.0
    max_file = None
    df = pd.read_csv(gold_files[seed], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[seed], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in predictions:
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
        pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 15, 8)), axis=2))
        pred_ids = np.argmax(pred_log_ps, axis=1)
        prec, rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        avg_f1 += f1
        avg_p += prec
        avg_r += rec
        if max_f1 <= f1:
            max_f1 = f1
            max_file = prediction_file

    print(seed, max_file, max_f1)

print("f:", avg_f1/5.0)
print("p:", avg_p/5.0)
print("r:", avg_r/5.0)

12321 ~/sep-1/fewrel/run_12321_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.7700.csv 0.6539281780314811
943 ~/sep-1/fewrel/run_943_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.5900.csv 0.6459036663934058
111 ~/sep-1/fewrel/run_111_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.11000.csv 0.6397429356638897
300 ~/sep-1/fewrel/run_300_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.10700.csv 0.5094849804267173
1300 ~/sep-1/fewrel/run_1300_with_unks/relation.offmml-pgg.run.epoch.0.test.predictions.step.15200.csv 0.6165207370401674
f: 0.6131160995111322
p: 0.637325315840368
r: 0.5915238095238096


In [None]:
# Eval of the RE-QA for relation extraction using the concat model.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.concat.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        prediction_file = "~/reqa-predictions/fold_{}/concat/relation.concat.dev.predictions.fold.{}.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 12)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if f1 >= max_f1:
            max_f1 = f1
            max_file = prediction_file

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

In [None]:
# Test prediction for the RelationPrompt on the RE-QA dataset.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    1: "~/codes/RelationPrompt/train_reqa_models/fold_1/extractor/pred_in_single.jsonl",
}

prediction_arrs = {
    1: ["~/codes/RelationPrompt/train_reqa_models/fold_1/extractor/pred_out_single.jsonl"]
}

for fold_id in range(1, 2, 1):
    prediction_files = prediction_arrs[fold_id]
    df = pd.read_csv(gold_files[fold_id], sep=',')
    answers = [ans.replace("</s>", "").strip() for ans in df["answers"].tolist()]
    all_classes = set(answers)
    ids = {val:i for i, val in enumerate(list(all_classes))}
    actual_ids = [ids[ans] for ans in answers]
    gold_indices = np.array(actual_ids)
    for prediction_file in prediction_files:
        prediction_ids = []
        for pred_class in pd.read_csv(prediction_file, sep=',')["predictions_str"].tolist():
            if pred_class.strip() in ids:
                prediction_ids.append(ids[pred_class.strip()])
            else:
                prediction_ids.append(-1)
        pred_ids = np.array(prediction_ids)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if f1 > max_f1:
            max_f1 = f1
            max_file = prediction_file
        print(prediction_file, avg_prec, avg_rec, f1)

print(max_f1, max_file)

In [6]:
# Eval of the RE-QA for relation extraction using the concat model.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.concat.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        prediction_file = "~/reqa-predictions/fold_{}/concat/relation.concat.dev.predictions.fold.{}.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 12)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if f1 >= max_f1:
            max_f1 = f1
            max_file = prediction_file

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

1
0.6645079529607614
~/reqa-predictions/fold_1/concat/relation.concat.dev.predictions.fold.1.step.3600.csv


2
0.7355692801363015
~/reqa-predictions/fold_2/concat/relation.concat.dev.predictions.fold.2.step.4300.csv


3
0.816466070295921
~/reqa-predictions/fold_3/concat/relation.concat.dev.predictions.fold.3.step.5200.csv


4
0.820538067780762
~/reqa-predictions/fold_4/concat/relation.concat.dev.predictions.fold.4.step.1600.csv


5
0.7970665456384882
~/reqa-predictions/fold_5/concat/relation.concat.dev.predictions.fold.5.step.2900.csv


6
0.9100498471715361
~/reqa-predictions/fold_6/concat/relation.concat.dev.predictions.fold.6.step.1400.csv


7
0.7789862082105365
~/reqa-predictions/fold_7/concat/relation.concat.dev.predictions.fold.7.step.2500.csv


8
0.7585498509710629
~/reqa-predictions/fold_8/concat/relation.concat.dev.predictions.fold.8.step.400.csv


9
0.7690369496615103
~/reqa-predictions/fold_9/concat/relation.concat.dev.predictions.fold.9.step.2600.csv


10
0.7394406760740089


In [2]:
# Eval of the RE-QA using the Gold Templates on the dev data over all the folds.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        try:
            prediction_file = "~/reqa-predictions/fold_{}/gold/relation.gold.dev.predictions.fold.{}.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 12)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if f1 >= max_f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(checkpoint_i, fold_i)

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

157 1
159 1
162 1
163 1
164 1
165 1
167 1
168 1
169 1
171 1
1
0.7958029874558785
~/reqa-predictions/fold_1/gold/relation.gold.dev.predictions.fold.1.step.600.csv


2
0.8055480874453981
~/reqa-predictions/fold_2/gold/relation.gold.dev.predictions.fold.2.step.1900.csv


3
0.8088784230714176
~/reqa-predictions/fold_3/gold/relation.gold.dev.predictions.fold.3.step.200.csv


4
0.7950547270773886
~/reqa-predictions/fold_4/gold/relation.gold.dev.predictions.fold.4.step.9500.csv


5
0.8218222460881007
~/reqa-predictions/fold_5/gold/relation.gold.dev.predictions.fold.5.step.15300.csv


6
0.9343882793208368
~/reqa-predictions/fold_6/gold/relation.gold.dev.predictions.fold.6.step.1100.csv


7
0.7977715930389874
~/reqa-predictions/fold_7/gold/relation.gold.dev.predictions.fold.7.step.2600.csv


8
0.8869445616734918
~/reqa-predictions/fold_8/gold/relation.gold.dev.predictions.fold.8.step.1000.csv


9
0.8353253359450881
~/reqa-predictions/fold_9/gold/relation.gold.dev.predictions.fold.9.step.1900.cs

In [3]:
# MML-OFF-PGG performance for Relation Extraction on all the dev folds.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.qq.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        try:
            prediction_file = "~/reqa-predictions/fold_{}/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_{}.dev.predictions.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
            pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 12, 8)), axis=2))
            pred_ids = np.argmax(pred_log_ps, axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if f1 >= max_f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(checkpoint_i)

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

1
0.7021862131500355
~/reqa-predictions/fold_1/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_1.dev.predictions.step.4700.csv


2
0.7318462713898469
~/reqa-predictions/fold_2/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_2.dev.predictions.step.400.csv


3
0.7766533200558716
~/reqa-predictions/fold_3/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_3.dev.predictions.step.3600.csv


4
0.8437707696480834
~/reqa-predictions/fold_4/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_4.dev.predictions.step.800.csv


5
0.8300206299665337
~/reqa-predictions/fold_5/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_5.dev.predictions.step.7900.csv


6
0.8906375171815566
~/reqa-predictions/fold_6/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_6.dev.predictions.step.700.csv


197
198
199
7
0.7827607798234402
~/reqa-predictions/fold_7/mml-pgg-off-sim/relation.mml-pgg-off-sim.run.fold_7.dev.predictions.step.2100.csv


198
199
8
0.795102231532206
~/reqa-predictions/fold_8/mml-pgg-off-sim/rela

In [11]:
# Test set performance over the 10 folds of the RE-QA dataset for the concat and gold models.
import pandas as pd
import numpy as np
import json

gold_files = {
    1: "relation.gold.test.predictions.fold.1.step.600.csv",
    2: "relation.gold.test.predictions.fold.2.step.1900.csv",
    3: "relation.gold.test.predictions.fold.3.step.200.csv",
    4: "relation.gold.test.predictions.fold.4.step.9500.csv",
    5: "relation.gold.test.predictions.fold.5.step.15300.csv",
    6: "relation.gold.test.predictions.fold.6.step.1100.csv",
    7: "relation.gold.test.predictions.fold.7.step.2600.csv",
    8: "relation.gold.test.predictions.fold.8.step.1000.csv",
    9: "relation.gold.test.predictions.fold.9.step.1900.csv",
    10: "relation.gold.test.predictions.fold.10.step.4000.csv"
}

concat_files = {
    1: "relation.concat.test.predictions.fold.1.step.3600.csv",
    2: "relation.concat.test.predictions.fold.2.step.4300.csv",
    3: "relation.concat.test.predictions.fold.3.step.5200.csv",
    4: "relation.concat.test.predictions.fold.4.step.1600.csv",
    5: "relation.concat.test.predictions.fold.5.step.2900.csv",
    6: "relation.concat.test.predictions.fold.6.step.1400.csv",
    7: "relation.concat.test.predictions.fold.7.step.2500.csv",
    8: "relation.concat.test.predictions.fold.8.step.400.csv",
    9: "relation.concat.test.predictions.fold.9.step.2600.csv",
    10: "relation.concat.test.predictions.fold.10.step.800.csv"
}

gold_alone_p_r_f = {'f': [], 'r': [], 'p': []}
concat_alone_p_r_f = {'f': [], 'r': [], 'p': []}

for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/test.{}.relation_data.csv".format(str(fold_i-1))
    fewrel_file = "./zero-shot-extraction/relation_splits/test.{}.fewrel_format.json".format(str(fold_i-1))

    example_indices_to_consider = set()
    with open(fewrel_file, 'r') as fin:
        short_examples = json.load(fin)
        for key, val in short_examples.items():
            for row in val:
                example_indices_to_consider.add(row["example_index"])


    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 24))

    gold_indices_to_consider = []
    for i, index in enumerate(gold_indices):
        if i in example_indices_to_consider:
            gold_indices_to_consider.append(index)

    num_examples = len(correct_indices) // 24
    gold_indices_to_consider = np.array(gold_indices_to_consider)
    
    concat_prediction_file = "~/may-20/fold_{}/concat/{}".format(fold_i, concat_files[fold_i])
    concat_pred_log_ps = pd.read_csv(concat_prediction_file, sep=',')["relation_log_p"].tolist()
    concat_pred_log_ps = np.reshape(np.array(concat_pred_log_ps), (num_examples, 24))
    concat_pred_ids = np.argmax(concat_pred_log_ps, axis=1)
    concat_pred_ids_to_consider = []
    for i, index in enumerate(concat_pred_ids):
        if i in example_indices_to_consider:
            concat_pred_ids_to_consider.append(index)
    
    concat_pred_ids_to_consider = np.array(concat_pred_ids_to_consider)
    avg_prec, avg_rec, f1 = compute_macro_PRF(concat_pred_ids_to_consider, gold_indices_to_consider)
    concat_alone_p_r_f["f"].append(f1)
    concat_alone_p_r_f["r"].append(avg_rec)
    concat_alone_p_r_f["p"].append(avg_prec)
    print(fold_i, "concat alone", f1, avg_prec, avg_rec)
    
    gold_prediction_file = "~/may-20/fold_{}/gold/{}".format(fold_i, gold_files[fold_i])
    gold_pred_log_ps = pd.read_csv(gold_prediction_file, sep=',')["relation_log_p"].tolist()
    gold_pred_log_ps = np.reshape(np.array(gold_pred_log_ps), (num_examples, 24))
    gold_pred_ids = np.argmax(gold_pred_log_ps, axis=1)

    gold_pred_ids_to_consider = []
    for i, index in enumerate(gold_pred_ids):
        if i in example_indices_to_consider:
            gold_pred_ids_to_consider.append(index)
    
    gold_pred_ids_to_consider = np.array(gold_pred_ids_to_consider)
    avg_prec, avg_rec, f1 = compute_macro_PRF(gold_pred_ids_to_consider, gold_indices_to_consider)
    print(fold_i, "gold alone", f1, avg_prec, avg_rec)
    gold_alone_p_r_f["f"].append(f1)
    gold_alone_p_r_f["r"].append(avg_rec)
    gold_alone_p_r_f["p"].append(avg_prec)
    print("\n")

print("gold alone p:", np.mean(np.array(gold_alone_p_r_f["p"])))
print("gold alone r:", np.mean(np.array(gold_alone_p_r_f["r"])))
print("gold alone f:", np.mean(np.array(gold_alone_p_r_f["f"])))

print("concat alone p:", np.mean(np.array(concat_alone_p_r_f["p"])))
print("concat alone r:", np.mean(np.array(concat_alone_p_r_f["r"])))
print("concat alone f:", np.mean(np.array(concat_alone_p_r_f["f"])))

1 concat alone 0.6868200485458184 0.6926748847477643 0.6810633587961235
1 gold alone 0.7404285767936459 0.7531358825049224 0.728142964768295


2 concat alone 0.5913023110477719 0.5975156340730365 0.585216877806697
2 gold alone 0.6586987557834644 0.6757540315314027 0.6424831986356715


3 concat alone 0.6324106523285894 0.6578301488771372 0.60888256050775
3 gold alone 0.676931710052006 0.6806536959107133 0.6732502083282599


4 concat alone 0.5794697498339418 0.6063706954690022 0.5548542716952589
4 gold alone 0.6728400404740734 0.6895175863911674 0.656950210374016


5 concat alone 0.6191377271586156 0.6268067958412001 0.6116540543838874
5 gold alone 0.49349253129168735 0.4986497121305824 0.48844093262952004


6 concat alone 0.5782347137534999 0.6001076131310151 0.5579002025330327
6 gold alone 0.6607776835391415 0.6691998506324075 0.6525648747509227


7 concat alone 0.6735910798575685 0.6934384410876097 0.6548482345074617
7 gold alone 0.6474800154241827 0.6618318212248525 0.633737435642919

In [4]:
import pandas as pd
import numpy as np

mml_files = {
    1: "relation.mml-pgg-off-sim.run.fold_1.test.predictions.step.4700.csv",
    2: "relation.mml-pgg-off-sim.run.fold_2.test.predictions.step.400.csv",
    3: "relation.mml-pgg-off-sim.run.fold_3.test.predictions.step.3600.csv",
    4: "relation.mml-pgg-off-sim.run.fold_4.test.predictions.step.800.csv",
    5: "relation.mml-pgg-off-sim.run.fold_5.test.predictions.step.7900.csv",
    6: "relation.mml-pgg-off-sim.run.fold_6.test.predictions.step.700.csv",
    7: "relation.mml-pgg-off-sim.run.fold_7.test.predictions.step.2100.csv",
    8: "relation.mml-pgg-off-sim.run.fold_8.test.predictions.step.6800.csv",
    9: "relation.mml-pgg-off-sim.run.fold_9.test.predictions.step.4300.csv",
    10: "relation.mml-pgg-off-sim.run.fold_10.test.predictions.step.1600.csv"
}

mml_pgg_p_r_f = {'f': [], 'r': [], 'p': []}

for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/test.{}.qq.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 24))

    num_examples = len(correct_indices) // 24
    gold_indices = np.array(gold_indices)
    
    mml_prediction_file = "~/may-20/fold_{}/{}".format(fold_i, mml_files[fold_i])
    mml_pred_log_ps = pd.read_csv(mml_prediction_file, sep=',')["answer_log_p"].tolist()

    pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(mml_pred_log_ps)), (num_examples, 24, 8)), axis=2))
    pred_ids = np.argmax(pred_log_ps, axis=1)
    avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
    mml_pgg_p_r_f["f"].append(f1)
    mml_pgg_p_r_f["r"].append(avg_rec)
    mml_pgg_p_r_f["p"].append(avg_prec)
    print(fold_i, "mml pgg off", f1, avg_prec, avg_rec)
    print("\n")

print("mml pgg off p:", np.mean(np.array(mml_pgg_p_r_f["p"])))
print("mml pgg off r:", np.mean(np.array(mml_pgg_p_r_f["r"])))
print("mml pgg off f:", np.mean(np.array(mml_pgg_p_r_f["f"])))

1 mml 0.7324313594407584 0.7347101443139104 0.7301666666666667




ValueError: cannot reshape array of size 692139 into shape (6000,24,8)