In [1]:
def compute_macro_PRF(predicted_idx, gold_idx, i=-1, empty_label=None):
    '''
    This evaluation function follows work from Sorokin and Gurevych(https://www.aclweb.org/anthology/D17-1188.pdf)
    code borrowed from the following link:
    https://github.com/UKPLab/emnlp2017-relation-extraction/blob/master/relation_extraction/evaluation/metrics.py
    '''
    if i == -1:
        i = len(predicted_idx)

    complete_rel_set = set(gold_idx) - {empty_label}
    avg_prec = 0.0
    avg_rec = 0.0

    for r in complete_rel_set:
        r_indices = (predicted_idx[:i] == r)
        tp = len((predicted_idx[:i][r_indices] == gold_idx[:i][r_indices]).nonzero()[0])
        tp_fp = len(r_indices.nonzero()[0])
        tp_fn = len((gold_idx == r).nonzero()[0])
        prec = (tp / tp_fp) if tp_fp > 0 else 0
        rec = tp / tp_fn
        avg_prec += prec
        avg_rec += rec
    f1 = 0
    avg_prec = avg_prec / len(set(predicted_idx[:i]))
    avg_rec = avg_rec / len(complete_rel_set)
    if (avg_rec+avg_prec) > 0:
        f1 = 2.0 * avg_prec * avg_rec / (avg_prec + avg_rec)

    return avg_prec, avg_rec, f1

In [12]:
# Test prediction for the prompt model on the RE-QA dataset.
from re import I
import pandas as pd
import numpy as np

gold_files = {
    1: "~/codes/QA-ZRE/zero-shot-extraction/relation_splits/dev.0.prompt_data.csv",
}

prediction_arrs = {
    1: ["~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.{}.csv".format(step * 100) for step in range(1, 100, 1)]
}

max_f1 = 0.0
max_file = None
for fold_id in range(1, 2, 1):
    prediction_files = prediction_arrs[fold_id]
    df = pd.read_csv(gold_files[fold_id], sep=',')
    answers = [ans.replace("</s>", "").strip() for ans in df["answers"].tolist()]
    all_classes = set(answers)
    ids = {val:i for i, val in enumerate(list(all_classes))}
    actual_ids = [ids[ans] for ans in answers]
    gold_indices = np.array(actual_ids)
    for prediction_file in prediction_files:
        prediction_ids = []
        for pred_class in pd.read_csv(prediction_file, sep=',')["predictions_str"].tolist():
            if pred_class.strip() in ids:
                prediction_ids.append(ids[pred_class.strip()])
            else:
                prediction_ids.append(-1)
        pred_ids = np.array(prediction_ids)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if f1 > max_f1:
            max_f1 = f1
            max_file = prediction_file
        print(prediction_file, avg_prec, avg_rec, f1)

print(max_f1, max_file)

~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.100.csv 0.522874895000289 0.4833333333333334 0.5023271700636752
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.200.csv 0.5541007180264474 0.48333333333333334 0.5163033673226487
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.300.csv 0.5348535919442112 0.4566666666666667 0.49267739083828904
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.400.csv 0.5294456032134146 0.47333333333333333 0.49981953759556286
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.500.csv 0.6019570158523647 0.5166666666666667 0.5560603261262776
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.600.csv 0.5664639894419307 0.48666666666666675 0.5235421453726604
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.700.csv 0.6018064633260711 0.4833333333333334 0.5361025829784979
~/codes/prompt_models/fold_1/dev.0.prompt_data.predicted.step.800.csv 0.5454121021649936 0.5 0.521719713245

In [3]:
# Eval of the RE-QA using the Concat Templates on the dev data over all the folds.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.concat.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        prediction_file = "~/may-20/fold_{}/concat/relation.concat.dev.predictions.fold.{}.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
        pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
        pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 12)), axis=1)
        avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
        if f1 >= max_f1:
            max_f1 = f1
            max_file = prediction_file

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

1
0.6645079529607614
~/may-20/fold_1/concat/relation.concat.dev.predictions.fold.1.step.3600.csv


2
0.7355692801363015
~/may-20/fold_2/concat/relation.concat.dev.predictions.fold.2.step.4300.csv


3
0.816466070295921
~/may-20/fold_3/concat/relation.concat.dev.predictions.fold.3.step.5200.csv


4
0.820538067780762
~/may-20/fold_4/concat/relation.concat.dev.predictions.fold.4.step.1600.csv


5
0.7970665456384882
~/may-20/fold_5/concat/relation.concat.dev.predictions.fold.5.step.2900.csv


6
0.9100498471715361
~/may-20/fold_6/concat/relation.concat.dev.predictions.fold.6.step.1400.csv


7
0.7789862082105365
~/may-20/fold_7/concat/relation.concat.dev.predictions.fold.7.step.2500.csv


8
0.7585498509710629
~/may-20/fold_8/concat/relation.concat.dev.predictions.fold.8.step.400.csv


9
0.7690369496615103
~/may-20/fold_9/concat/relation.concat.dev.predictions.fold.9.step.2600.csv


10
0.7394406760740089
~/may-20/fold_10/concat/relation.concat.dev.predictions.fold.10.step.800.csv


0.779021144

In [3]:
# Eval of the RE-QA using the Gold Templates on the dev data over all the folds.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        try:
            prediction_file = "~/may-20/fold_{}/gold/relation.gold.dev.predictions.fold.{}.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 12)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if f1 >= max_f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(checkpoint_i, fold_i)

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

157 1
159 1
162 1
163 1
164 1
165 1
167 1
168 1
169 1
171 1
1
0.7958029874558785
~/may-20/fold_1/gold/relation.gold.dev.predictions.fold.1.step.600.csv


2
0.8055480874453981
~/may-20/fold_2/gold/relation.gold.dev.predictions.fold.2.step.1900.csv


3
0.8088784230714176
~/may-20/fold_3/gold/relation.gold.dev.predictions.fold.3.step.200.csv


4
0.7950547270773886
~/may-20/fold_4/gold/relation.gold.dev.predictions.fold.4.step.9500.csv


5
0.8218222460881007
~/may-20/fold_5/gold/relation.gold.dev.predictions.fold.5.step.15300.csv


6
0.9343882793208368
~/may-20/fold_6/gold/relation.gold.dev.predictions.fold.6.step.1100.csv


7
0.7977715930389874
~/may-20/fold_7/gold/relation.gold.dev.predictions.fold.7.step.2600.csv


8
0.8869445616734918
~/may-20/fold_8/gold/relation.gold.dev.predictions.fold.8.step.1000.csv


9
0.8353253359450881
~/may-20/fold_9/gold/relation.gold.dev.predictions.fold.9.step.1900.csv


10
0.8742971188094225
~/may-20/fold_10/gold/relation.gold.dev.predictions.fold.10.step

In [3]:
# MML-OFF-PGG performance for Relation Extraction on all the dev folds.
import pandas as pd
import numpy as np

mean_f1 = 0.0
for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/dev.{}.qq.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 12))

    num_examples = len(correct_indices) // 12
    gold_indices = np.array(gold_indices)

    max_file = None
    max_f1 = 0.0
    for checkpoint_i in range(1, 200, 1):
        try:
            prediction_file = "~/may-20/fold_{}/relation.mml-pgg-off-sim.run.fold_{}.dev.predictions.step.{}.csv".format(str(fold_i), str(fold_i), str(100 * checkpoint_i))
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
            pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(pred_log_ps)), (num_examples, 12, 8)), axis=2))
            pred_ids = np.argmax(pred_log_ps, axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if f1 >= max_f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(checkpoint_i)

    print(fold_i)
    print(max_f1)
    print(max_file)
    print("\r\n")
    mean_f1 += max_f1

print(mean_f1/10.0)

1
0.7021862131500355
~/may-20/fold_1/relation.mml-pgg-off-sim.run.fold_1.dev.predictions.step.4700.csv


2
0.7318462713898469
~/may-20/fold_2/relation.mml-pgg-off-sim.run.fold_2.dev.predictions.step.400.csv


3
0.7766533200558716
~/may-20/fold_3/relation.mml-pgg-off-sim.run.fold_3.dev.predictions.step.3600.csv


4
0.8437707696480834
~/may-20/fold_4/relation.mml-pgg-off-sim.run.fold_4.dev.predictions.step.800.csv


5
0.8300206299665337
~/may-20/fold_5/relation.mml-pgg-off-sim.run.fold_5.dev.predictions.step.7900.csv


6
0.8906375171815566
~/may-20/fold_6/relation.mml-pgg-off-sim.run.fold_6.dev.predictions.step.700.csv


197
198
199
7
0.7827607798234402
~/may-20/fold_7/relation.mml-pgg-off-sim.run.fold_7.dev.predictions.step.2100.csv


198
199
8
0.795102231532206
~/may-20/fold_8/relation.mml-pgg-off-sim.run.fold_8.dev.predictions.step.6800.csv


185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
9
0.7819016572177454
~/may-20/fold_9/relation.mml-pgg-off-sim.run.fold_9.dev.predict

In [5]:
# Test set performance over the 10 folds of the RE-QA dataset for the concat and gold models.
import pandas as pd
import numpy as np

gold_files = {
    1: "relation.gold.test.predictions.fold.1.step.600.csv",
    2: "relation.gold.test.predictions.fold.2.step.1900.csv",
    3: "relation.gold.test.predictions.fold.3.step.200.csv",
    4: "relation.gold.test.predictions.fold.4.step.9500.csv",
    5: "relation.gold.test.predictions.fold.5.step.15300.csv",
    6: "relation.gold.test.predictions.fold.6.step.1100.csv",
    7: "relation.gold.test.predictions.fold.7.step.2600.csv",
    8: "relation.gold.test.predictions.fold.8.step.1000.csv",
    9: "relation.gold.test.predictions.fold.9.step.1900.csv",
    10: "relation.gold.test.predictions.fold.10.step.4000.csv"
}

concat_files = {
    1: "relation.concat.test.predictions.fold.1.step.3600.csv",
    2: "relation.concat.test.predictions.fold.2.step.4300.csv",
    3: "relation.concat.test.predictions.fold.3.step.5200.csv",
    4: "relation.concat.test.predictions.fold.4.step.1600.csv",
    5: "relation.concat.test.predictions.fold.5.step.2900.csv",
    6: "relation.concat.test.predictions.fold.6.step.1400.csv",
    7: "relation.concat.test.predictions.fold.7.step.2500.csv",
    8: "relation.concat.test.predictions.fold.8.step.400.csv",
    9: "relation.concat.test.predictions.fold.9.step.2600.csv",
    10: "relation.concat.test.predictions.fold.10.step.800.csv"
}

gold_alone_p_r_f = {'f': [], 'r': [], 'p': []}
concat_alone_p_r_f = {'f': [], 'r': [], 'p': []}

for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/test.{}.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 24))

    num_examples = len(correct_indices) // 24
    gold_indices = np.array(gold_indices)
    
    concat_prediction_file = "~/may-20/fold_{}/concat/{}".format(fold_i, concat_files[fold_i])
    concat_pred_log_ps = pd.read_csv(concat_prediction_file, sep=',')["relation_log_p"].tolist()
    concat_pred_log_ps = np.reshape(np.array(concat_pred_log_ps), (num_examples, 24))
    concat_pred_ids = np.argmax(concat_pred_log_ps, axis=1)
    avg_prec, avg_rec, f1 = compute_macro_PRF(concat_pred_ids, gold_indices)
    concat_alone_p_r_f["f"].append(f1)
    concat_alone_p_r_f["r"].append(avg_rec)
    concat_alone_p_r_f["p"].append(avg_prec)
    print(fold_i, "concat alone", f1, avg_prec, avg_rec)
    
    gold_prediction_file = "~/may-20/fold_{}/gold/{}".format(fold_i, gold_files[fold_i])
    gold_pred_log_ps = pd.read_csv(gold_prediction_file, sep=',')["relation_log_p"].tolist()
    gold_pred_log_ps = np.reshape(np.array(gold_pred_log_ps), (num_examples, 24))
    gold_pred_ids = np.argmax(gold_pred_log_ps, axis=1)
    avg_prec, avg_rec, f1 = compute_macro_PRF(gold_pred_ids, gold_indices)
    print(fold_i, "gold alone", f1, avg_prec, avg_rec)
    gold_alone_p_r_f["f"].append(f1)
    gold_alone_p_r_f["r"].append(avg_rec)
    gold_alone_p_r_f["p"].append(avg_prec)
    print("\n")

print("gold alone p:", np.mean(np.array(gold_alone_p_r_f["p"])))
print("gold alone r:", np.mean(np.array(gold_alone_p_r_f["r"])))
print("gold alone f:", np.mean(np.array(gold_alone_p_r_f["f"])))

print("concat alone p:", np.mean(np.array(concat_alone_p_r_f["p"])))
print("concat alone r:", np.mean(np.array(concat_alone_p_r_f["r"])))
print("concat alone f:", np.mean(np.array(concat_alone_p_r_f["f"])))

1 concat alone 0.6851779135044868 0.689066127748157 0.6813333333333333
1 gold alone 0.7433432938934399 0.7562889888849941 0.7308333333333333


2 concat alone 0.5901669782971629 0.5989194167218461 0.5816666666666667
2 gold alone 0.6642954078337273 0.6842181964463796 0.6455000000000001


3 concat alone 0.6373039703281808 0.6626408187363189 0.6138333333333333
3 gold alone 0.681873757976071 0.6871616100666138 0.6766666666666667


4 concat alone 0.5858742474657468 0.6150594189021233 0.5593333333333333
4 gold alone 0.6824352310400019 0.7000704484368208 0.6656666666666667


5 concat alone 0.622864337200164 0.6281502269876816 0.6176666666666667
5 gold alone 0.5005601199498438 0.5060723602043087 0.4951666666666666


6 concat alone 0.5752377444829445 0.5954721054962936 0.5563333333333333
6 gold alone 0.6651754396604047 0.6719876078130175 0.6585


7 concat alone 0.6683183924580341 0.6834597978850874 0.6538333333333333
7 gold alone 0.6543720994745615 0.6683140048618551 0.641


8 concat alone 0.597

In [4]:
import pandas as pd
import numpy as np

mml_files = {
    1: "relation.mml-pgg-off-sim.run.fold_1.test.predictions.step.4700.csv",
    2: "relation.mml-pgg-off-sim.run.fold_2.test.predictions.step.400.csv",
    3: "relation.mml-pgg-off-sim.run.fold_3.test.predictions.step.3600.csv",
    4: "relation.mml-pgg-off-sim.run.fold_4.test.predictions.step.800.csv",
    5: "relation.mml-pgg-off-sim.run.fold_5.test.predictions.step.7900.csv",
    6: "relation.mml-pgg-off-sim.run.fold_6.test.predictions.step.700.csv",
    7: "relation.mml-pgg-off-sim.run.fold_7.test.predictions.step.2100.csv",
    8: "relation.mml-pgg-off-sim.run.fold_8.test.predictions.step.6800.csv",
    9: "relation.mml-pgg-off-sim.run.fold_9.test.predictions.step.4300.csv",
    10: "relation.mml-pgg-off-sim.run.fold_10.test.predictions.step.1600.csv"
}

mml_mml_p_r_f = {'f': [], 'r': [], 'p': []}

for fold_i in range(1, 11, 1):
    gold_file = "./zero-shot-extraction/relation_splits/test.{}.qq.relation_data.csv".format(str(fold_i-1))
    gold_indices = []
    df = pd.read_csv(gold_file, sep=',')
    correct_indices = df["correct_indices"].tolist()
    for i, index in enumerate(correct_indices):
        if index:
            gold_indices.append(int(i % 24))

    num_examples = len(correct_indices) // 24
    gold_indices = np.array(gold_indices)
    
    mml_prediction_file = "~/may-20/fold_{}/{}".format(fold_i, mml_files[fold_i])
    mml_pred_log_ps = pd.read_csv(mml_prediction_file, sep=',')["answer_log_p"].tolist()

    pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(mml_pred_log_ps)), (num_examples, 24, 8)), axis=2))
    pred_ids = np.argmax(pred_log_ps, axis=1)
    avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
    mml_mml_p_r_f["f"].append(f1)
    mml_mml_p_r_f["r"].append(avg_rec)
    mml_mml_p_r_f["p"].append(avg_prec)
    print(fold_i, "mml", f1, avg_prec, avg_rec)
    print("\n")

print("mml p:", np.mean(np.array(mml_mml_p_r_f["p"])))
print("mml r:", np.mean(np.array(mml_mml_p_r_f["r"])))
print("mml f:", np.mean(np.array(mml_mml_p_r_f["f"])))

1 mml 0.7324313594407584 0.7347101443139104 0.7301666666666667




ValueError: cannot reshape array of size 692139 into shape (6000,24,8)

In [3]:
# Dev prediction for concat model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/fewrl_data/val_ids_12321.csv",
    2: "~/codes/QA-ZRE/fewrl_data/val_ids_943.csv",
    3: "~/codes/QA-ZRE/fewrl_data/val_ids_111.csv",
    4: "~/codes/QA-ZRE/fewrl_data/val_ids_300.csv",
    5: "~/codes/QA-ZRE/fewrl_data/val_ids_1300.csv"
}
gold_files = {   
    1: "~/codes/QA-ZRE/fewrl_data/val_data_12321.csv",
    2: "~/codes/QA-ZRE/fewrl_data/val_data_943.csv",
    3: "~/codes/QA-ZRE/fewrl_data/val_data_111.csv",
    4: "~/codes/QA-ZRE/fewrl_data/val_data_300.csv",
    5: "~/codes/QA-ZRE/fewrl_data/val_data_1300.csv",
}

for run_id in range(1, 6, 1):
    prediction_files = ["~/may-29/fewrl/concat_run_{}/relation.concat.run.0.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/may-29/fewrl/concat_run_{}/relation.concat.run.1.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/may-29/fewrl/concat_run_{}/relation.concat.run.2.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/may-29/fewrl/concat_run_{}/relation.concat.run.3.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]

    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        try:
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if max_f1 <= f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(prediction_file)

    print(run_id, max_file, max_f1)

1 ~/may-29/fewrl/concat_run_1/relation.concat.run.0.dev.predictions.step.600.csv 0.5478163622378379
2 ~/may-29/fewrl/concat_run_2/relation.concat.run.0.dev.predictions.step.200.csv 0.4564517056171136
3 ~/may-29/fewrl/concat_run_3/relation.concat.run.0.dev.predictions.step.800.csv 0.6302340196983017
4 ~/may-29/fewrl/concat_run_4/relation.concat.run.1.dev.predictions.step.1000.csv 0.7054889125319359
5 ~/may-29/fewrl/concat_run_5/relation.concat.run.0.dev.predictions.step.800.csv 0.6725607501052342


In [3]:
# Test prediction for concat model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/fewrl_data/test_ids_12321.csv",
    2: "~/codes/QA-ZRE/fewrl_data/test_ids_943.csv",
    3: "~/codes/QA-ZRE/fewrl_data/test_ids_111.csv",
    4: "~/codes/QA-ZRE/fewrl_data/test_ids_300.csv",
    5: "~/codes/QA-ZRE/fewrl_data/test_ids_1300.csv"
}
gold_files = {
    1: "~/codes/QA-ZRE/fewrl_data/test_data_12321.csv",
    2: "~/codes/QA-ZRE/fewrl_data/test_data_943.csv",
    3: "~/codes/QA-ZRE/fewrl_data/test_data_111.csv",
    4: "~/codes/QA-ZRE/fewrl_data/test_data_300.csv",
    5: "~/codes/QA-ZRE/fewrl_data/test_data_1300.csv",
}
test_files = {
    1: "~/june-12/fewrl/concat_run_1/relation.concat.run.0.test.predictions.step.2000.csv",
    2: "~/june-12/fewrl/concat_run_2/relation.concat.run.0.test.predictions.step.400.csv",
    3: "~/june-12/fewrl/concat_run_3/relation.concat.run.1.test.predictions.step.800.csv",
    4: "~/june-12/fewrl/concat_run_4/relation.concat.run.0.test.predictions.step.2400.csv",
    5: "~/june-12/fewrl/concat_run_5/relation.concat.run.0.test.predictions.step.200.csv",
}

mean_f1 = 0.0
mean_p = 0.0
mean_r = 0.0
for run_id in range(1, 6, 1):
    prediction_files = [test_files[run_id]]

    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)

    for prediction_file in prediction_files:
        try:
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 15)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            print(run_id, avg_prec, avg_rec, f1)
            mean_f1 += f1
            mean_p += avg_prec
            mean_r += avg_rec
        except:
            print(prediction_file)

mean_f1 /= 5
mean_p /= 5
mean_r /= 5
print("mean_p", mean_p)
print("mean_r", mean_r)
print("mean_f", mean_f1)

1 0.36408830731147435 0.37638095238095237 0.37013259379216645
2 0.4049919630602685 0.40514285714285714 0.4050673960489486
3 0.34249083430854455 0.3451428571428572 0.34381173164737605
4 0.2948596112396407 0.3235238095238095 0.30852736829582333
5 0.3796415763339939 0.40228571428571425 0.39063576509021747
mean_p 0.3572144584507844
mean_r 0.3704952380952381
mean_f 0.3636349709749064


In [9]:
# Dev prediction for off-mml-pgg model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/small_fewrl_data/val_ids_12321.csv",
    2: "~/codes/QA-ZRE/small_fewrl_data/val_ids_943.csv",
    3: "~/codes/QA-ZRE/small_fewrl_data/val_ids_111.csv",
    4: "~/codes/QA-ZRE/small_fewrl_data/val_ids_300.csv",
    5: "~/codes/QA-ZRE/small_fewrl_data/val_ids_1300.csv"
}
gold_files = {   
    1: "~/codes/QA-ZRE/small_fewrl_data/val_data_12321.csv",
    2: "~/codes/QA-ZRE/small_fewrl_data/val_data_943.csv",
    3: "~/codes/QA-ZRE/small_fewrl_data/val_data_111.csv",
    4: "~/codes/QA-ZRE/small_fewrl_data/val_data_300.csv",
    5: "~/codes/QA-ZRE/small_fewrl_data/val_data_1300.csv",
}

for run_id in range(1, 6, 1):
    prediction_files = ["~/june-16/fewrl/run_{}/relation.mml-pgg-off-sim.run.0.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/june-16/fewrl/run_{}/relation.mml-pgg-off-sim.run.1.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/june-16/fewrl/run_{}/relation.mml-pgg-off-sim.run.2.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]
    prediction_files += ["~/june-16/fewrl/run_{}/relation.mml-pgg-off-sim.run.3.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 13, 1)]

    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        try:
            mml_pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
            pred_log_ps = np.log(np.mean(np.reshape(np.exp(np.array(mml_pred_log_ps)), (num_examples, 5, 8)), axis=2))
            pred_ids = np.argmax(pred_log_ps, axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if max_f1 <= f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(prediction_file)

    print(run_id, max_file, max_f1)

1 ~/june-16/fewrl/run_1/relation.mml-pgg-off-sim.run.0.dev.predictions.step.1000.csv 0.6866699556484906
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.1000.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.1200.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.1400.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.1600.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.1800.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.2000.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.2200.csv
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.3.dev.predictions.step.2400.csv
2 ~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.2.dev.predictions.step.2200.csv 0.5213218904876981
3 ~/june-16/fewrl/run_3/relation.mml-pgg-off-sim.run.2.dev.predictions.step.1200.csv 0.6660949360255928
4 ~/june-16/fewrl/run_4/

In [2]:
# Test prediction for off-mml-pgg model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/small_fewrl_data/test_ids_12321.csv",
    2: "~/codes/QA-ZRE/small_fewrl_data/test_ids_943.csv",
    3: "~/codes/QA-ZRE/small_fewrl_data/test_ids_111.csv",
    4: "~/codes/QA-ZRE/small_fewrl_data/test_ids_300.csv",
    5: "~/codes/QA-ZRE/small_fewrl_data/test_ids_1300.csv"
}
gold_files = {   
    1: "~/codes/QA-ZRE/small_fewrl_data/test_data_12321.csv",
    2: "~/codes/QA-ZRE/small_fewrl_data/test_data_943.csv",
    3: "~/codes/QA-ZRE/small_fewrl_data/test_data_111.csv",
    4: "~/codes/QA-ZRE/small_fewrl_data/test_data_300.csv",
    5: "~/codes/QA-ZRE/small_fewrl_data/test_data_1300.csv",
}

test_files = {
    1: "~/june-16/fewrl/run_1/relation.mml-pgg-off-sim.run.0.test.predictions.step.1000.csv",
    2: "~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.2.test.predictions.step.2200.csv",
    3: "~/june-16/fewrl/run_3/relation.mml-pgg-off-sim.run.2.test.predictions.step.1200.csv",
    4: "~/june-16/fewrl/run_4/relation.mml-pgg-off-sim.run.1.test.predictions.step.1400.csv",
    5: "~/june-16/fewrl/run_5/relation.mml-pgg-off-sim.run.0.test.predictions.step.2400.csv",
}

mean_f1 = 0.0
mean_p = 0.0
mean_r = 0.0
for run_id in range(1, 6, 1):
    prediction_files = [test_files[run_id]]
    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)
    for prediction_file in prediction_files:
        try:
            mml_pred_log_ps = pd.read_csv(prediction_file, sep=',')["answer_log_p"].tolist()
            pred_log_ps = np.log(np.sum(np.reshape(np.exp(np.array(mml_pred_log_ps)), (num_examples, 15, 8)), axis=2))
            pred_ids = np.argmax(pred_log_ps, axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            mean_f1 += f1
            mean_p += avg_prec
            mean_r += avg_rec
            print(prediction_file, avg_prec, avg_rec, f1)
        except:
            print(prediction_file)

mean_f1 /= 5
mean_p /= 5
mean_r /= 5
print("mean_p", mean_p)
print("mean_r", mean_r)
print("mean_f", mean_f1)

~/june-16/fewrl/run_1/relation.mml-pgg-off-sim.run.0.test.predictions.step.1000.csv 0.39090262621490907 0.3941904761904762 0.39253966669598334
~/june-16/fewrl/run_2/relation.mml-pgg-off-sim.run.2.test.predictions.step.2200.csv 0.38823952774766773 0.396 0.39208136684878714
~/june-16/fewrl/run_3/relation.mml-pgg-off-sim.run.2.test.predictions.step.1200.csv 0.33109424738588195 0.338 0.33451148639718353
~/june-16/fewrl/run_4/relation.mml-pgg-off-sim.run.1.test.predictions.step.1400.csv 0.29031332227659645 0.31657142857142856 0.3028743201670405
~/june-16/fewrl/run_5/relation.mml-pgg-off-sim.run.0.test.predictions.step.2400.csv 0.3921389098579965 0.42447619047619045 0.4076672854222021
mean_p 0.3585377266966104
mean_r 0.37384761904761904
mean_f 0.3659348251062393


In [2]:
# Dev prediction for off-mml-pgg model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/wikizsl_dataset/val_ids_12321.csv",
    2: "~/codes/QA-ZRE/wikizsl_dataset/val_ids_943.csv",
    3: "~/codes/QA-ZRE/wikizsl_dataset/val_ids_111.csv",
    4: "~/codes/QA-ZRE/wikizsl_dataset/val_ids_300.csv",
    5: "~/codes/QA-ZRE/wikizsl_dataset/val_ids_1300.csv"
}

gold_files = {
    1: "~/codes/QA-ZRE/wikizsl_dataset/small.val_data_12321.csv",
    2: "~/codes/QA-ZRE/wikizsl_dataset/small.val_data_943.csv",
    3: "~/codes/QA-ZRE/wikizsl_dataset/small.val_data_111.csv",
    4: "~/codes/QA-ZRE/wikizsl_dataset/small.val_data_300.csv",
    5: "~/codes/QA-ZRE/wikizsl_dataset/small.val_data_1300.csv",
}

for run_id in range(1, 6, 1):
    prediction_files = ["~/june-19/wikizsl/concat_run_{}/relation.concat.run.0.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 23, 1)]
    prediction_files += ["~/june-19/wikizsl/concat_run_{}/relation.concat.run.1.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 23, 1)]
    prediction_files += ["~/june-19/wikizsl/concat_run_{}/relation.concat.run.2.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 23, 1)]
    prediction_files += ["~/june-19/wikizsl/concat_run_{}/relation.concat.run.3.dev.predictions.step.{}.csv".format(run_id, 200 * i) for i in range(1, 23, 1)]

    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        try:
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if max_f1 <= f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(prediction_file)

    print(run_id, max_file, max_f1)

1 ~/june-19/wikizsl/concat_run_1/relation.concat.run.1.dev.predictions.step.4000.csv 0.2950228237597085
~/june-19/wikizsl/concat_run_2/relation.concat.run.3.dev.predictions.step.1200.csv
2 ~/june-19/wikizsl/concat_run_2/relation.concat.run.0.dev.predictions.step.2400.csv 0.37060460613882945
3 ~/june-19/wikizsl/concat_run_3/relation.concat.run.0.dev.predictions.step.1400.csv 0.3002193280315908
4 ~/june-19/wikizsl/concat_run_4/relation.concat.run.1.dev.predictions.step.1400.csv 0.25564976384906496
~/june-19/wikizsl/concat_run_5/relation.concat.run.3.dev.predictions.step.3600.csv
5 ~/june-19/wikizsl/concat_run_5/relation.concat.run.0.dev.predictions.step.2200.csv 0.40583545137171356


In [None]:
# Test prediction for concat model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/wikizsl_dataset/test_ids_12321.csv",
    2: "~/codes/QA-ZRE/wikizsl_dataset/test_ids_943.csv",
    3: "~/codes/QA-ZRE/wikizsl_dataset/test_ids_111.csv",
    4: "~/codes/QA-ZRE/wikizsl_dataset/test_ids_300.csv",
    5: "~/codes/QA-ZRE/wikizsl_dataset/test_ids_1300.csv"
}
gold_files = {
    1: "~/codes/QA-ZRE/wikizsl_dataset/test_data_12321.csv",
    2: "~/codes/QA-ZRE/wikizsl_dataset/test_data_943.csv",
    3: "~/codes/QA-ZRE/wikizsl_dataset/test_data_111.csv",
    4: "~/codes/QA-ZRE/wikizsl_dataset/test_data_300.csv",
    5: "~/codes/QA-ZRE/wikizsl_dataset/test_data_1300.csv",
}
test_files = {
    1: "~/june-19/wikizsl/concat_run_1/relation.concat.run.1.test.predictions.step.4000.csv",
    2: "~/june-19/wikizsl/concat_run_2/relation.concat.run.0.test.predictions.step.2400.csv",
    3: "~/june-19/wikizsl/concat_run_3/relation.concat.run.0.test.predictions.step.1400.csv",
    4: "~/june-19/wikizsl/concat_run_4/relation.concat.run.1.test.predictions.step.1400.csv",
    5: "~/june-19/wikizsl/concat_run_5/relation.concat.run.0.test.predictions.step.2200.csv",
}

mean_f1 = 0.0
mean_p = 0.0
mean_r = 0.0
for run_id in range(1, 6, 1):
    prediction_files = [test_files[run_id]]
    df = pd.read_csv(gold_files[run_id], sep=',')
    ids = {val:i for i, val in enumerate(pd.read_csv(id_files[run_id], sep=',')["relation_ids"].tolist())}
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 15

    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 15)), axis=1)


    for prediction_file in prediction_files:
        try:
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 15)), axis=1)
            print("Samosh")
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            print(run_id, avg_prec, avg_rec, f1)
            mean_f1 += f1
            mean_p += avg_prec
            mean_r += avg_rec
        except:
            print(prediction_file)

mean_f1 /= 5
mean_p /= 5
mean_r /= 5
print("mean_p", mean_p)
print("mean_r", mean_r)
print("mean_f", mean_f1)

[ 6  2 12 12  0  0  0 12 10  6  6 10 14  8  5  0 11  2 11 14  1  8  0  2
  2  4 13  8  3  6  8 12  7  3  9 10  5  6  2  7 11 11  7 14 12  8 10  9
  9  2  3 11  0  9  0  5 13  9 11  1  8 11 11 10 12  3  8  9 14 14  8  0
  5 14 12  3 11 12  9 12  6  4  9 10 12  6 13  3 11  0  0  1  6  1  7  9
  0 12  9  0]
[ 4 10  8  8 11 10 14  8 13  2  4 10  6 13  5 10 13  0 14  5  4 14 10  2
 12  3  0 14  2  4 14  4  2  2 12 12  4  4  0  8 13  0  2 10  6 11 12  7
 10 11  6  0 10 12  7 12  3 11  7 11  0 13  5 12  8  5 11  1 12 10 14  0
  4 10 11  6 13 10 12  8  8  3  7 12  6  4  0  6 13  2 12  9 11  4  2 11
 10 13 12  7]
Samosh
~/june-19/wikizsl/concat_run_1/relation.concat.run.1.test.predictions.step.4000.csv
[ 9  5  5 10 14 14 13  9 13 14  5 11 10  3 13  3  9  9  2  9  5  7  8 13
  2  8 13  4 13 10 11  8 11  8  4  3 10  9 12 12  7 13  6  2  2  7 11  7
  0  0  5  9  8 10 13  9 14 12 10  4  4  4  9  9 13 12  3 13 10  5  6  5
  2  8 10 14 10  7 13  6  8 10 14 12  0  2 14 11  9 10  9  9  6  5  6  2
  0 1

In [15]:
def compute_prediction_f1s(prediction_files, gold_file, id_file):
    df = pd.read_csv(gold_file, sep=',')
    actual_ids = df["actual_ids"].tolist()
    num_examples = len(actual_ids) // 5

    ids = {val:i for i, val in enumerate(pd.read_csv(id_file, sep=',')["relation_ids"].tolist())}
    gold_indices = []
    for each_relation_id in actual_ids:
        gold_indices.append(ids[each_relation_id])

    gold_indices = np.max(np.reshape(np.array(gold_indices), (num_examples, 5)), axis=1)

    max_f1 = 0.0
    max_file = "None"
    for prediction_file in prediction_files:
        try:
            pred_log_ps = pd.read_csv(prediction_file, sep=',')["relation_log_p"].tolist()
            pred_ids = np.argmax(np.reshape(np.array(pred_log_ps), (num_examples, 5)), axis=1)
            avg_prec, avg_rec, f1 = compute_macro_PRF(pred_ids, gold_indices)
            if max_f1 < f1:
                max_f1 = f1
                max_file = prediction_file
        except:
            print(prediction_file)

    print(run_id, max_file, max_f1)

In [16]:
# Investigate Ada-factor with different learning rates using the concat model on the fewrel dataset.
import pandas as pd
import numpy as np

id_files = {
    1: "~/codes/QA-ZRE/fewrl_data/val_ids_12321.csv",
}
gold_files = {   
    1: "~/codes/QA-ZRE/fewrl_data/val_data_12321.csv",
}

for run_id in range(1, 2, 1):
    prediction_files = ["~/august_29/fewrel/concat_run_12321/relation.concat.run.12321.epoch.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 105, 1)]
    df = pd.read_csv(gold_files[run_id], sep=',')
    compute_prediction_f1s(prediction_files, gold_files[run_id], id_files[run_id])

for run_id in range(1, 2, 1):
    prediction_files = ["~/august_29/fewrel/concat_run_12321_lr_0.00005/relation.concat.run.12321.epoch.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 105, 1)]
    df = pd.read_csv(gold_files[run_id], sep=',')
    compute_prediction_f1s(prediction_files, gold_files[run_id], id_files[run_id])

for run_id in range(1, 2, 1):
    prediction_files = ["~/august_29/fewrel/concat_run_12321_adam/relation.concat.run.12321.epoch.0.dev.predictions.step.{}.csv".format(100 * i) for i in range(1, 105, 1)]
    df = pd.read_csv(gold_files[run_id], sep=',')
    compute_prediction_f1s(prediction_files, gold_files[run_id], id_files[run_id])

1 ~/august_29/fewrel/concat_run_12321/relation.concat.run.12321.epoch.0.dev.predictions.step.9900.csv 0.6225757946913466
1 ~/august_29/fewrel/concat_run_12321_lr_0.00005/relation.concat.run.12321.epoch.0.dev.predictions.step.3700.csv 0.598789217937264
1 ~/august_29/fewrel/concat_run_12321_adam/relation.concat.run.12321.epoch.0.dev.predictions.step.700.csv 0.5939651344395138
