In [1]:
import pandas as pd
import os
import codecs
import re
import string
import sys
import numpy as np
PUNCTUATION = set(string.punctuation)

def eval_zero_re(test_file, answer_file):
    precision = read_results(test_file, answer_file)
    return precision

def read_results(test_set, answer_file):
    with codecs.open(test_set, 'r', 'utf-8') as fin:
        data = [line.strip().split('\t') for line in fin]
    metadata = [x[:4] for x in data]
    gold = [x[4:] for x in data]

    new_gold = []
    new_metadata = []
    for i in range(len(gold)):
        if len(gold[i]) > 0:
            new_gold.append(simplify(' and '.join(gold[i])))
            new_metadata.append(metadata[i])

    with codecs.open(answer_file, 'r', 'utf-8') as fin:
        answers = [line.strip() for line in fin]

    new_answers = [simplify(answer) for answer in answers[1:]]
    
    correct = 0.0
    for i in range(len(new_gold)):
        ref = ' '.join(new_gold[i])
        candidate = ' '.join(new_answers[i])
        if (ref == candidate):# or (ref in candidate) or (candidate in ref):
            correct += 1
        #else:
        #    print("Ref: " + ref)
        #    print("Candidate: " + candidate)
        #    print(new_metadata[i])
        #    print("\nnext\n")
    return correct / len(new_gold)

def simplify(answer):
    return [''.join(c for c in t if c not in PUNCTUATION) for t in answer.strip().lower().split()]

In [3]:
def preprocess_the_prediction_files(main_path, list_of_files):
    for file in list_of_files:
        df = pd.read_csv(os.path.join(main_path, file), sep=',')
        df["predictions_str"].to_csv(os.path.join("/tmp/", file), sep='\t', header=True, index=False)

def eval_the_prediction_files(list_of_files, gold_file):
    scores = {}
    scores_list = []
    for file in list_of_files:
        score = eval_zero_re(gold_file, os.path.join("/tmp/", file))
        scores[score] = file
        scores_list.append(score)
    
    f1s = np.array(scores_list)
    max_f1 = max(scores.keys())
    return scores[max_f1],  max_f1 * 100, f1s * 100, scores

In [15]:
# Gold Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_1.dev.predictions.0.step.0.csv"]
fold_files += ["gold_fold_1.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 66, 1)]
fold_files.append("gold_fold_1.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/nov-6/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)


print("\n\nTest Predictions\n\n")
# Gold Predictions on the test 1 fold 1. Only on positive samples.
fold_files = ["gold_fold_1.test.predictions.0.step.900.csv", "gold_fold_1.test.predictions.0.step.0.csv"]
fold_0_path = "~/t5-small-exps/nov-6/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

[40.72, 53.22, 51.25, 53.53, 50.4, 50.11, 51.81, 51.78, 55.67, 50.46, 46.48, 49.0, 42.7, 47.66, 47.41, 45.59, 44.3, 51.36, 46.56, 49.21, 46.6, 41.39, 48.52, 36.95, 52.47, 48.94, 44.86, 43.05, 47.75, 39.19, 37.89, 42.11, 44.36, 38.7, 45.51, 38.96, 47.53, 34.8, 38.53, 43.12, 39.3, 39.29, 38.85, 49.9, 46.15, 40.8, 42.6, 43.41, 40.08, 40.42, 38.91, 41.43, 44.53, 43.2, 36.28, 38.22, 37.09, 43.84, 36.05, 38.32, 38.98, 38.67, 37.25, 38.98, 34.48, 37.09, 37.42, 40.56, 39.68, 36.0, 42.0, 40.41, 42.75, 43.76, 40.96, 42.35, 41.94, 42.4, 40.82, 40.83, 40.0, 37.64, 39.59, 38.48, 40.32, 40.5, 35.51, 42.54, 40.57, 40.48, 37.38, 36.99, 33.85, 37.11, 35.04, 37.76, 30.43, 33.85, 33.56, 34.39, 35.95, 40.48, 36.14, 40.67, 36.96, 38.25, 39.01, 37.3, 37.95, 36.03, 39.53, 32.67, 33.06, 31.12, 36.15, 33.0, 31.42, 28.57, 33.33, 31.91, 33.95, 37.05, 30.84, 36.73, 37.55, 36.44, 36.4, 36.51, 38.33, 38.16, 38.49, 38.52, 40.0, 33.27, 36.09, 37.66, 38.08, 39.55, 36.55, 39.05, 35.93, 40.71, 43.41, 37.08, 38.79, 36.51

In [8]:
# Concat Predictions on the dev 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_1.dev.predictions.0.step.0.csv"]
fold_files += ["concat_fold_1.dev.predictions.0.step.{}.csv".format(100 * i) for i in range(1, 66, 1)]
fold_files.append("concat_fold_1.dev.predictions.0.step.full.csv")

fold_0_path = "~/t5-small-exps/nov-6/concat_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/dev.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

print("\n\nTest Predictions\n\n")

# Concat Predictions on the test 1 fold 1. Only on positive samples.
fold_files = ["concat_fold_1.test.predictions.0.step.100.csv", "concat_fold_1.test.predictions.0.step.0.csv"]
fold_0_path = "~/t5-small-exps/nov-6/concat_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
max_file,  max_f1, f1s, scores = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(max_file)
print(max_f1)
print(f1s)

[66.27, 65.0, 64.69, 63.98, 62.7, 64.73, 60.6, 64.29, 62.74, 61.91, 61.73, 62.61, 63.93, 64.2, 59.19, 60.9, 62.88, 60.62, 61.5, 63.31, 60.96, 64.14, 54.75, 61.92, 63.44, 59.55, 61.01, 58.36, 59.05, 62.32, 60.26, 60.85, 60.97, 59.67, 61.54, 59.26, 59.74, 59.31, 58.67, 59.89, 61.08, 62.24, 62.69, 60.59, 60.63, 61.8, 60.0, 61.21, 61.08, 59.3, 60.15, 60.65, 59.9, 59.97, 60.17, 61.15, 62.54, 59.87, 59.89, 60.55, 58.18, 58.74, 60.31, 57.27, 58.18, 59.49, 58.48, 60.47, 59.68, 59.21, 60.09, 59.52, 58.91, 59.29, 59.68, 59.46, 59.1, 59.53, 59.39, 58.42, 59.42, 60.11, 57.86, 62.03, 61.18, 58.49, 59.25, 60.71, 60.96, 60.01, 59.24, 57.48, 59.29, 58.68, 61.11, 58.83, 58.56, 56.03, 59.44, 59.19, 60.45, 58.8, 59.31, 60.1, 58.14, 58.91, 59.47, 57.43, 57.46, 61.11, 58.17, 60.52, 57.35, 59.73, 58.33, 58.08, 58.8, 57.13, 55.37, 58.06, 58.78, 57.95, 57.88, 59.04, 58.9, 59.82, 60.83, 58.85, 59.88, 58.44, 57.95]


In [14]:
# GOLD Predictions on the dev 1 fold 1.
fold_files = ["gold_fold_1.test.predictions.0.step.800.csv"]
fold_0_path = "~/gold_fold_1/"
fold_0_gold_file = "./zero-shot-extraction/relation_splits/test.0"

preprocess_the_prediction_files(fold_0_path, fold_files)
fold_0_gold_scores_test, scores_list = eval_the_prediction_files(fold_files, fold_0_gold_file)
print(fold_0_gold_scores_test)

{'Precision: 62.43% \t Recall: 66.25% \t F1: 64.28%': 'gold_fold_1.test.predictions.0.step.800.csv'}
