In [None]:
import os
import numpy as np
import pandas as pd

from sacremoses import MosesDetokenizer
from tqdm import tqdm


md = MosesDetokenizer(lang='en')
type_map = {
    "name": "Name",
    "eatType": "Eat type",
    "food": "Food",
    "customerRating": "Customer rating",
    "area": "Area",
    "familyFriendly": "Family friendly",
    "priceRange": "Price range",
    "near": "Near"
}

In [None]:
def read_from_file(fname, strip=True):
    with open(fname, "r") as f:
        if strip:
            return [line.strip() for line in f]
        else:
            return [line for line in f]

def postprocess_e2e_preds(preds):
    processed_preds = []
    for pred in tqdm(preds):
        processed_preds.append(md.detokenize(pred.replace("_", " ").split()))
    return processed_preds

def process_type_lines(type_lines):
    processed_types = []
    for type_line in type_lines:
        processed_types.append([type_map[type] for type in type_line.split()])
    return processed_types

def process_value(value):
        if value == "family_friendly":
            return "yes"
        elif value == "not_family_friendly":
            return "no"
        else:
            return value.replace("_", " ")

def process_value_lines(value_lines):
    processed_values = []
    for value_line in value_lines:
        processed_values.append([process_value(value) for value in value_line.split()])
    return processed_values


In [None]:
data_dir = "/projects/ogma2/users/andrewsi/control-data2text/DTG-SI/e2e_data/val"
baseline_preds_file = "/projects/ogma2/users/andrewsi/control-data2text/transformers/examples/seq2seq/exp/e2e/e2e_t5_small_01/checkpoint-10000/validation_preds.txt"
dtg_si_preds_file = "/projects/ogma2/users/andrewsi/control-data2text/DTG-SI/e2e_dtg_si/ckpt/hypos.step1200.val.txt"
our_preds_file = "/projects/ogma2/users/andrewsi/control-data2text/transformers/examples/seq2seq/exp/e2e/e2e_k3_t5_small_01/checkpoint-8295/validation_preds.txt"

x_types = process_type_lines(read_from_file(f"{data_dir}/x_type.valid.txt"))
x_values = process_value_lines(read_from_file(f"{data_dir}/x_value.valid.txt"))
y_refs = postprocess_e2e_preds(read_from_file(f"{data_dir}/y_ref.valid.txt"))

baseline_preds = postprocess_e2e_preds(read_from_file(baseline_preds_file))
dtg_si_preds = postprocess_e2e_preds(read_from_file(dtg_si_preds_file))
our_preds = postprocess_e2e_preds(read_from_file(our_preds_file))


In [None]:
eval_sheet = []
eval_sample = np.random.choice(len(x_type_lines), 100, replace=False)
for i in eval_sample:
    eval_sheet.append(["Table", "", "Exemplar", "Generations", "Factual Consistency", "Style Embodiment", "Fluency", "ID", str(i)])
    gens = [baseline_preds[i], dtg_si_preds[i], our_preds[i]]
    gen_idx_perm = np.random.permutation(range(3))
    shuffled_gens = [gens[i] for i in gen_idx_perm]
    eval_sheet.append([x_types[i][0], x_values[i][0], y_refs[i], shuffled_gens[0], "", "", "", str(gen_idx_perm[0]), ""])
    eval_sheet.append([x_types[i][1], x_values[i][1], "", shuffled_gens[1], "", "", "", str(gen_idx_perm[1]), ""])
    eval_sheet.append([x_types[i][2], x_values[i][2], "", shuffled_gens[2], "", "", "", str(gen_idx_perm[2]), ""])
    for j in range(3, len(x_types[i])):
        eval_sheet.append([x_types[i][j], x_values[i][j], "", "", "", "", "", "", ""])
    eval_sheet.append([""] * 9)
    eval_sheet.append([""] * 9)


In [None]:
eval_df = pd.DataFrame(eval_sheet)

In [None]:
eval_df[:20]

In [None]:
eval_df.to_csv("e2e_human_eval.csv", index=False, header=False)