In [None]:
import pandas as pd
import os

def read_prompt_dict(filename: str) -> dict:
    result = {}
    df = pd.read_csv(
        filename, header=None, sep="\t",
        names=["task_name", "task_prefix", "prompt", "prompt_len", "io_sep"])
    for _, row in df.iterrows():
        result[row.task_prefix] = (row.prompt, row.io_sep)
    return result

PROMPT_DICT = read_prompt_dict("data/prompt/prompt.tsv")

TEST_FILE = "data/test-train_clf_test_clf.tsv"
TASK_NAME = "superglue-cb"

df = pd.read_csv(TEST_FILE, header=None, sep="\t")
df = df[range(4)]  # Only take the first 4 columns.
df.columns = ["task_name", "task_prefix", "input", "target"]
df = df[:267].reset_index(drop=True)

with open(os.path.join("data/predictions/gptj", TASK_NAME)) as fin:
    predictions = fin.readlines()
predictions = [x.replace("\n", "") for x in predictions]

for i, row in df.iterrows():
    _, io_sep = PROMPT_DICT[row.task_prefix]    
    predictions[i] = predictions[i].split(io_sep)[-1].strip()

In [None]:
predictions

In [None]:
with open(os.path.join("data/predictions/gptj", TASK_NAME), "w") as f:
    f.write("\n".join(predictions))

In [None]:
from sklearn.metrics import f1_score
import metrics

df["prediction"] = predictions

df = df.groupby(["task_name", "task_prefix"]).agg({'target': lambda x: list(x), 'prediction': lambda x: list(x)}).reset_index()

def compute_f1(row):
    return f1_score(row.target, row.prediction, average='macro', labels=metrics.LABELS[row.task_name])

df["f1_score"] = df.apply(compute_f1, axis=1)
df.groupby(["task_name"]).agg({'f1_score': ["mean", "var"]})