In [4]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-base", model_max_length=1024)
model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")

  from .autonotebook import tqdm as notebook_tqdm
2022-12-19 10:08:18.371331: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [1]:
import pandas as pd

df = pd.read_csv("data/test-train_non_nli_test_nli.tsv", header=None, sep="\t")
n_col = df.shape[1]
all_targets = df[range(4, n_col)].apply(
    lambda row: row[~row.isnull()].tolist(), axis=1)
df = df[range(4)]
df.columns = ["task_name", "task_prefix", "input", "target"]
df["all_targets"] = all_targets

In [2]:
def read_prompt_dict(filename: str) -> dict:
    result = {}
    df = pd.read_csv(
        filename, header=None, sep="\t",
        names=["task_name", "task_prefix", "prompt", "prompt_len", "io_sep"])
    for _, row in df.iterrows():
        result[row.task_prefix] = (row.prompt, row.io_sep)
    return result

PROMPT_DICT = read_prompt_dict("data/prompt/prompt.tsv")

In [3]:
df = df[0:10]

In [9]:
def apply_prompt(row):
    prompt_prefix, io_sep = PROMPT_DICT[row.task_prefix]
    return prompt_prefix + " " + row.input + " " + io_sep

BSZ = 8
predictions = []
for i in range(0, df.shape[0], BSZ):
    inputs = [apply_prompt(row) for _, row in df[i:i+BSZ].iterrows()]
    batch = tokenizer(inputs, padding=True, truncation=True, max_length=1024, return_tensors="pt")
    outs = model.generate(
        input_ids=batch['input_ids'], 
        attention_mask=batch['attention_mask'],
        max_length=8,
        early_stopping=True)
    predictions.extend([tokenizer.decode(ids, skip_special_tokens=True) for ids in outs])


df["prediction"] = predictions

In [10]:
df

Unnamed: 0,task_name,task_prefix,input,target,all_targets,prediction
0,anli,anli_16_100,premise: Linguistics is the scientific study o...,contradiction,[contradiction],: premise:
1,anli,anli_16_100,"premise: Franco Zeffirelli, KBE Grande Ufficia...",entailment,[entailment],: premise:
2,anli,anli_16_100,premise: Eme 15 is the self-titled debut studi...,contradiction,[contradiction],: premise:
3,anli,anli_16_100,premise: Almost Sunrise is a 2016 American doc...,neutral,[neutral],: a. answer:
4,anli,anli_16_100,premise: Sergei Mikhailovich Grinkov (Russian:...,contradiction,[contradiction],: premise:
5,anli,anli_16_100,premise: Lee Hong-gi (; ] ; Japanese:i*hongi) ...,neutral,[neutral],: premise:
6,anli,anli_16_100,premise: Lost Moon: The Perilous Voyage of Apo...,entailment,[entailment],: premise:
7,anli,anli_16_100,"premise: Will Wheaton, born Willie Mack Wheato...",entailment,[entailment],: premise:
8,anli,anli_16_100,"premise: La Cygne (pronounced ""luh SEEN"") is a...",contradiction,[contradiction],: premise:
9,anli,anli_16_100,premise: 3096 (3096 Tage) is a 2013 German dra...,neutral,[neutral],: premise:


In [None]:
import metrics

for _, row in df[["task_name", "task_prefix"]].drop_duplicates().iterrows():
    targets = df[df.task_prefix == row.task_prefix]["all_targets"].values
    predictions = df[df.task_prefix == row.task_prefix]["prediction"].values
    test_performance = metrics.evaluate(predictions, targets, metrics.METRICS[row.task_name])
    print('Task: {}; Test score: {}; Metric: {}'.format(row.task_prefix, test_performance, metrics.METRICS[row.task_name]))