Get task instructions from [PromptSource](https://github.com/bigscience-workshop/promptsource).

To install the library, run `pip install promptsource`.

# See existing templates

In [None]:
from promptsource.templates import TemplateCollection
collection = TemplateCollection()
print("Num of existing templates: {}".format(len(collection.datasets_templates)))
for key, val in collection.datasets_templates.items():
    print(key)

# Get instructions

In [None]:
import pandas as pd
import re
from datasets import load_dataset
from promptsource.templates import DatasetTemplates


INPUT_FILE = "data/prompt/dataset_list.tsv"
OUTPUT_FILE = "data/prompt/promptsource_templates.tsv"


def read_input(filename: str) -> pd.DataFrame:
    df = pd.read_csv(filename, sep="\t", header=None, names=["task_name", "dataset_name", "subset_name"])
    df = df.where(pd.notnull(df), None)
    return df

dataset_df = read_input(INPUT_FILE)
has_no_templates = []
results = []

for _, row in dataset_df.iterrows():
    # Skip tasks with no defined templates.
    if (row.dataset_name, row.subset_name) not in collection.datasets_templates:
        has_no_templates.append(row.task_name)
        continue

    templates = DatasetTemplates(row.dataset_name, row.subset_name).templates

    # Load HuggingFace datasets to render the templates.
    if row.task_name == "anli":
        split = "train_r1"
    elif row.task_name == "mc_taco":
        split = "validation"
    else:
        split = "train"
    dataset = load_dataset(row.dataset_name, row.subset_name, split=split)
    example = dataset[0]

    results.append(row.task_name + "\t" + "\t".join([re.sub("\s+", " ", t.apply(example)[0]) for t in templates.values()]))

# Save results.
with open(OUTPUT_FILE, "w") as fout:
    fout.write("\n".join(results))

print("Tasks with no defined templates:")
print(has_no_templates)