diff --git a/API_DOCUMENTATION.md b/API_DOCUMENTATION.md new file mode 100644 index 000000000..394b11bf8 --- /dev/null +++ b/API_DOCUMENTATION.md @@ -0,0 +1,40 @@ +# Manipulating prompts +PromptSource implements 4 classes to store, manipulate and use prompts and their metadata: `Template`, `Metadata`, `DatasetTemplates` and `TemplateCollection`. All of them are implemented in [`templates.py`](promptsource/templates.py) + +## Class `Template` and `Metadata` +`Template` is a class that wraps a prompt, its associated metadata, and implements the helper functions to use the prompt. + +Instances of `Template` have the following main methods that will come handy: +* `apply(example, truncate=True, highlight_variables=False)`: Create a prompted example by applying the template to the given example + - `example` (Dict): the dataset example to create a prompt for + - `truncate` (Bool, default to `True`): if True, example fields will be truncated to `TEXT_VAR_LENGTH` chars + - `highlight_variables`(Bool, default to `False`): highlight the added variables (internal use for the app rendering) +* `get_id()`: Get the uuid of the prompt +* `get_name()`: Get the name of the prompt +* `get_reference()`: Get any additional information about the prompt (such as bibliographic reference) +* `get_answer_choices_list(example)`: If applicable, returns a list of answer choices for a given example. + +Each `Template` also has a `metadata` attribute, an instance of the class `Metadata` that encapsulates the following 3 attributes: +* `original_task`: If True, this prompt asks a model to perform the original task designed for this dataset. +* `choices_in_prompt`: If True, the answer choices are included in the templates such that models see those choices in the input. Only applicable to classification tasks. +* `metrics`: List of strings denoting metrics to use for evaluation + +## Class `DatasetTemplates` +`DatasetTemplates` is a class that wraps all the prompts (each of them are instances of `Template`) for a specific dataset/subset and implements all the helper functions necessary to read/write to the YAML file in which the prompts are saved. + +You will likely mainly be interested in getting the existing prompts and their names for a given dataset. You can do that with the following instantiation: +```python +>>> template_key = f"{dataset_name}/{subset_name}" if subset_name is not None else dataset_name +>>> prompts = DatasetTemplates(template_key) +>>> len(prompts) # Returns the number of prompts for the given dataset +>>> prompts.all_template_names # Returns a sorted list of all templates names for this dataset +``` + +## Class `TemplateCollection` +`TemplateCollection` is a class that encapsulates all the prompts available under PromptSource by wrapping the `DatasetTemplates` class. It initializes the `DatasetTemplates` for all existing template folders, gives access to each `DatasetTemplates`, and provides aggregated counts overall `DatasetTemplates`. + +The main methods are: +* `get_dataset(dataset_name, subset_name)`: Return the DatasetTemplates object corresponding to the dataset name + - `dataset_name` (Str): name of the dataset to get + - `subset_name` (Str, default to None): name of the subset +* `get_templates_count()`: Return the overall number count over all datasets. NB: we don't breakdown datasets into subsets for the count, i.e subsets count are included into the dataset count diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..2157a3faf --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,118 @@ +cff-version: "0.2.0" +date-released: 2022-02 +message: "If you use this software, please cite it using these metadata." +title: "PromptSource" +url: "https://github.com/bigscience-workshop/promptsource" +authors: + - family-names: Bach + given-names: "Stephen H." + - family-names: Sanh + given-names: Victor + - family-names: Yong + given-names: Zheng-Xin + - family-names: Webson + given-names: Albert + - family-names: Raffel + given-names: Colin + - family-names: Nayak + given-names: "Nihal V." + - family-names: Sharma + given-names: Abheesht + - family-names: Kim + given-names: Taewoon + - family-names: Bari + given-names: "M Saiful" + - family-names: Fevry + given-names: Thibault + - family-names: Alyafeaiu + given-names: Zaid + - family-names: Dey + given-names: Manan + - family-names: Santilli + given-names: Andrea + - family-names: Sun + given-names: Zhiqing + - family-names: Ben-David + given-names: Srulik + - family-names: Xu + given-names: Canwen + - family-names: Chhablani + given-names: Gunjan + - family-names: Wang + given-names: Han + - family-names: Fries + given-names: "Jason Alan" + - family-names: Al-shaibani + given-names: "Maged S." + - family-names: Sharma + given-names: Shanya + - family-names: Thakker + given-names: Urmish + - family-names: Almubarak + given-names: Khalid + - family-names: Tang + given-names: Xiangru + - family-names: Tian-Jian + given-names: Mike + - family-names: Rush + given-names: "Alexander M." +preferred-citation: + type: article + authors: + - family-names: Bach + given-names: "Stephen H." + - family-names: Sanh + given-names: Victor + - family-names: Yong + given-names: Zheng-Xin + - family-names: Webson + given-names: Albert + - family-names: Raffel + given-names: Colin + - family-names: Nayak + given-names: "Nihal V." + - family-names: Sharma + given-names: Abheesht + - family-names: Kim + given-names: Taewoon + - family-names: Bari + given-names: "M Saiful" + - family-names: Fevry + given-names: Thibault + - family-names: Alyafeaiu + given-names: Zaid + - family-names: Dey + given-names: Manan + - family-names: Santilli + given-names: Andrea + - family-names: Sun + given-names: Zhiqing + - family-names: Ben-David + given-names: Srulik + - family-names: Xu + given-names: Canwen + - family-names: Chhablani + given-names: Gunjan + - family-names: Wang + given-names: Han + - family-names: Fries + given-names: "Jason Alan" + - family-names: Al-shaibani + given-names: "Maged S." + - family-names: Sharma + given-names: Shanya + - family-names: Thakker + given-names: Urmish + - family-names: Almubarak + given-names: Khalid + - family-names: Tang + given-names: Xiangru + - family-names: Tian-Jian + given-names: Mike + - family-names: Rush + given-names: "Alexander M." + title: "PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts" + year: 2022 + publisher: "arXiv" + url: "https://arxiv.org/abs/2202.01279" + address: "Online" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e11e719e2..6285fb61b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,10 @@ # Contributing -One of the best ways to contribute is by writing prompts! +The best way to contribute growing P3 is by writing prompts for new datasets! ### What are Prompts? -A prompt consists of a template(input template and target template, along with collection of associated metadata. A template is a piece of code written in a templating language called +A prompt consists of a template: input template and target template, along with collection of associated metadata. A template is a piece of code written in a templating language called [Jinja](https://jinja.palletsprojects.com/en/3.0.x/). A template defines a function that maps an example from a dataset in the [Hugging Face datasets library](https://huggingface.co/datasets) to two strings of @@ -17,7 +17,7 @@ prompt. 1. **Set up the app.** Fork the app and set up using the [README](https://github.com/bigscience-workshop/promptsource/blob/main/README.md). -1. **Examine the dataset.** Select or type the dataset into the dropdown in the app. +1. **Examine the dataset.** In the "Sourcing" mode, select or type the dataset into the dropdown. If the dataset has subsets (subsets are not the same as splits), you can select which one to work on. Note that prompts are subset-specific. You can find out background information on the dataset by reading the information in the @@ -29,15 +29,17 @@ You can always update the name later. If you want to cancel the prompt, select 1. **Write the prompt**. In the box labeled "Template," enter a Jinja expression. See the [getting started guide](#getting-started-using-jinja-to-write-prompts) and [cookbook](#jinja-cookbook) for details on how to write templates. +1. **Fill in metadata**. Fill in the metadata for the current prompt: reference, original task, choices in templates, and answer choices. +See [Metadata](#metadata) for more details about these fields. 1. **Save the prompt**. Hit the "Save" button. The output of the prompt applied to the current example will appear in the right sidebar. 1. **Verify the prompt**. Check that you didn't miss any case by scrolling through a handful of examples of the prompted dataset using the "Prompted dataset viewer" mode. -1. **Write between 5 and 10 prompts**. Repeat the steps 4 to 8 to create between 5 +1. **Write between 5 and 10 prompts**. Repeat the steps 4 to 9 to create between 5 and 10 (more if you want!) prompts per dataset/subset. Feel free to introduce a mix of formats, some that follow the templates listed in the [best practices](#best-practices) -and some that are more diverse in the format and the formulation. +and some that are more diverse in the format and the formulation. 1. **Duplicate the prompts(s).** If the dataset you have chosen bear the same format as other datasets (for instance, `MNLI` and `SNLI` have identical formats), you can simply duplicate the prompts you have written to these additional datasets. @@ -108,8 +110,9 @@ it has the answer. Can you tell me the answer? {{answers["text"][0]}}' ``` -## Options -In addition to the template itself, you can fill out several other fields in the app. +## Metadata +In addition to the template itself, you need to fill out several other fields. +These metadata facilitate finding and using the prompts. * **Prompt Reference.** If your template was inspired by a paper, note the reference in the "Prompt Reference" section. You can also add a description of what your template does. @@ -166,8 +169,7 @@ introduce some diversity by prompting a given dataset into multiple tasks and pr description in the "Template Reference" text box. An example is given in the already prompted `movie_rationales`. * **Filtering prompts.** If a prompt is applied to an example and produces an -empty string, that prompt/example pair will be skipped. (Either the entire target -is whitespace or the text on either side of the separator `|||` is whitespace. +empty string, that prompt/example pair will be skipped. You can therefore create prompts that only apply to a subset of the examples by wrapping them in Jinja if statements. For example, in the `TREC` dataset, there are fine-grained categories that are only applicable to certain coarse-grained categories. @@ -180,6 +182,17 @@ Is this question asking for a {{"definition"}}, a {{"description"}}, a {{"manner {{ {0: "Manner", 7: "Defintion", 9: "Reason", 12: "Description"}[label_fine] }} {% endif %} ``` +For datasets that have splits with no labels (for instance test split without ground truth labels), you can wrap the conditional statement on the target side. +For instance for `super_glue/boolq`, the following prompt would return an empty target on the test split, but not an empty prompted example: +```jinja2 +{{ passage }} +Question: {{ question }} +Answer: +||| +{% if label != -1 %} +{{ answer_choices[label] }} +{% endif %} +``` * **Conditional generation format.** Always specify the target and separate it from the prompt by indicating the vertical bars `|||`. The target will be generated by a generative model conditioned on the input you wrote. You can always transform an "infix" prompt format @@ -226,15 +239,15 @@ First, {{ ctx_a.lower() }} Then, {{ ctx_b.lower() }}... Complete the above description with a chosen ending: -Ending 1: {{ endings[0] }} +(a) {{ answer_choices[0] }} -Ending 2: {{ endings[1] }} +(b) {{ answer_choices[1] }} -Ending 3: {{ endings[2] }} +(c) {{ answer_choices[2] }} -Ending 4: {{ endings[3] }} +(d) {{ answer_choices[3] }} -||| {{ {"0": "Ending 1", "1": "Ending 2", "2": "Ending 3", "3": "Ending 4"}[label] }} +||| {{ answer_choices[label | int()] }} ``` Notice how it uses functions to consistently capitalize the information and provides lots of context (referring explicitly to "description" and "chosen ending.") @@ -251,26 +264,17 @@ Which one is the most appropriate answer/completion for the paragraph that follo {%- endfor %} ``` Like above, it uses functions to present the choices in a readable way. Also, it -uses a for loop with conditions to handle the more intricate dataset schema. +uses a for loop with conditions to handle the more intricate dataset schema. Here's one for `paws`: ```jinja2 -{% if label == 0 or label == 1 %} Sentence 1: {{sentence1}} Sentence 2: {{sentence2}} Question: Does Sentence 1 paraphrase Sentence 2? Yes or No? -{% endif %} -||| -{% if label == 0 %} -No -{% elif label == 1 %} -Yes -{% endif %} - +||| +{{answer_choices[label]}} ``` -This template has to do a few things, even though it's a yes no question. First, -the label might be unknown, so the pieces are wrapped in if statements. -Second, notice that the choices `Yes or No` are not escaped. Yes/no, true/false +Notice that the choices `Yes or No` are not escaped. Yes/no, true/false are choices that do not need to be escaped (unlike categories). ## Uploading Prompts @@ -307,7 +311,7 @@ do_something_else ```jinja {% for a, b in zip(list_A, list_B) %} do_something_with_a_and_b -{% endfor %} +{% endfor %} ``` diff --git a/README.md b/README.md index 13ae843ea..6e77d12ce 100644 --- a/README.md +++ b/README.md @@ -1,119 +1,140 @@ # PromptSource -Promptsource is a toolkit for collecting and applying prompts to NLP datasets. +**PromptSource is a toolkit for creating, sharing and using natural language prompts.** -Promptsource uses a simple templating language to programatically map an example of a dataset into a text input and a text target. +Recent work has shown that large language models exhibit the ability to perform reasonable zero-shot generalization to new tasks. For instance, [GPT-3](https://arxiv.org/abs/2005.14165) demonstrated that large language models have strong zero- and few-shot abilities. [FLAN](https://arxiv.org/abs/2109.01652) and [T0](https://arxiv.org/abs/2110.08207) then demonstrated that pre-trained language models fine-tuned in a massively multitask fashion yield even stronger zero-shot performance. A common denominator in these works is the use of prompts which have gathered of interest among NLP researchers and engineers. This emphasizes the need for new tools to create, share and use natural language prompts. -Promptsource contains a growing collection of prompts (which we call **P3**: **P**ublic **P**ool of **P**rompts). As of October 18th, there are ~2'000 prompts for 170+ datasets in [P3](https://huggingface.co/datasets/bigscience/P3). -Feel free to use these prompts as they are (you'll find citation details [here](#Citation)). +Prompts are functions that map an example from a dataset to a natural language input and target output PromptSource contains a growing collection of prompts (which we call **P3**: **P**ublic **P**ool of **P**rompts). As of January 20, 2022, there are ~2'000 English prompts for 170+ English datasets in [P3](https://huggingface.co/datasets/bigscience/P3). -Note that a subset of the prompts are still *Work in Progress*. You'll find the list of the prompts which will potentially be modified in the near future [here](WIP.md). Modifications will in majority consist of metadata collection, but in some cases, will impact the templates themselves. To facilitate traceability, Promptsource is currently pinned at version `0.1.0`. +

+ +

-Propmtsource and P3 were originally developed as part of the paper [Multitask Prompted Training Enables Zero-Shot Task Generalization](https://arxiv.org/abs/2110.08207). We release T0* (pronounce "T Zero"), a series of model trained on [P3](https://huggingface.co/datasets/bigscience/P3). Checkpoints are available [here](https://huggingface.co/bigscience/T0pp). In particular, we recommend using T0++ (pronounce "T Zero Plus Plus") as it leads (on average) to the best performances on a variety of NLP tasks. +PromptSource provides the tools to create, and share natural language prompts (see [How to create prompts](#how-to-create-prompts), and then use the thousands of existing and newly created prompts through a simple API (see [How to use prompts](#how-to-use-prompts)). Prompts are saved in standalone structured files and are written in a simple templating language called Jinja. An example of prompt availabe in PromptSource for [SNLI](https://huggingface.co/datasets/snli) is: +```jinja2 +{{premise}} -**You will find the official repository to reproduce the results of T Zero here: https://github.com/bigscience-workshop/t-zero.** +Question: Does this imply that "{{hypothesis}}"? Yes, no, or maybe? ||| {{answer_choices[label]}} +``` + +**You can browse through existing prompts on the [hosted version of PromptSource](https://bigscience.huggingface.co/promptsource).** ## Setup 1. Download the repo -2. Navigate to root directory of the repo -3. Install requirements with `pip install -r requirements.txt` in a Python 3.7 environment -4. Run `pip install -e .` to install the `promptsource` module +1. Navigate to the root directory of the repo +1. Run `pip install -e .` to install the `promptsource` module -## Running -You can browse through existing prompts on the [hosted version of Promptsource](https://bigscience.huggingface.co/promptsource). +*Note: for stability reasons, you will currently need a Python 3.7 environment to run the last step. However, if you only intend to use the prompts, and not create new prompts through the interface, you can remove this constraint in the [`setup.py`](setup.py).* -If you want to launch a local version (in particular to write propmts, from the root directory of the repo, launch the editor with: +If you do not intend to modify prompts, you can simply run: +```python +pip install promptsource ``` -streamlit run promptsource/app.py + +## How to use prompts +You can apply prompts to examples from datasets of the [Hugging Face Datasets library](https://github.com/huggingface/datasets). +```python +# Load an example from the datasets ag_news +>>> from datasets import load_dataset +>>> dataset = load_dataset("ag_news", split="train") +>>> example = dataset[1] + +# Load prompts for this dataset +>>> from promptsource.templates import DatasetTemplates +>>> ag_news_prompts = DatasetTemplates('ag_news') + +# Print all the prompts available for this dataset. The keys of the dict are the uuids the uniquely identify each of the prompt, and the values are instances of `Template` which wraps prompts +>>> print(ag_news_prompts.templates) +{'24e44a81-a18a-42dd-a71c-5b31b2d2cb39': , '8fdc1056-1029-41a1-9c67-354fc2b8ceaf': , '918267e0-af68-4117-892d-2dbe66a58ce9': , '9345df33-4f23-4944-a33c-eef94e626862': , '98534347-fff7-4c39-a795-4e69a44791f7': , 'b401b0ee-6ffe-4a91-8e15-77ee073cd858': , 'cb355f33-7e8c-4455-a72b-48d315bd4f60': } + +# Select a prompt by its name +>>> prompt = ag_news_prompts["classify_question_first"] + +# Apply the prompt to the example +>>> result = prompt.apply(example) +>>> print("INPUT: ", result[0]) +INPUT: What label best describes this news article? +Carlyle Looks Toward Commercial Aerospace (Reuters) Reuters - Private investment firm Carlyle Group,\which has a reputation for making well-timed and occasionally\controversial plays in the defense industry, has quietly placed\its bets on another part of the market. +>>> print("TARGET: ", result[1]) +TARGET: Business ``` -There are 3 modes in the app: -- **Helicopter view**: aggregate high level metrics on the current state of the sourcing -- **Prompted dataset viewer**: check the templates you wrote or already written on entire dataset -- **Sourcing**: write new prompts +In the case that you are looking for the prompts available for a particular subset of a dataset, you should use the following syntax: +```python +dataset_name, subset_name = "super_glue", "rte" - +dataset = load_dataset(f"{dataset_name}/{subset_name}", split="train") +example = dataset[0] -## Running (read-only) -To host a public streamlit app, launch it with -```bash -streamlit run promptsource/app.py -- -r +prompts = DatasetTemplates(f"{dataset_name}/{subset_name}") ``` -## Prompting an Example: -You can use Promptsource with [Datasets](https://huggingface.co/docs/datasets/) to create -prompted examples: +You can also collect all the available prompts for their associated datasets: + ```python -# Get an example -from datasets import load_dataset -dataset = load_dataset("ag_news") -example = dataset["train"][0] - -# Prompt it -from promptsource.templates import DatasetTemplates -# Get all the AG News prompts -ag_news_prompts = DatasetTemplates('ag_news') -# Select a prompt by name -prompt = ag_news_prompts["classify_question_first"] -# Apply the prompt on the example -result = prompt.apply(example) -print("INPUT: ", result[0]) -print("TARGET: ", result[1]) +>>> from promptsource.templates import TemplateCollection + +# Get all the prompts available in PromptSource +>>> collection = TemplateCollection() + +# Print a dict where the key is the pair (dataset name, subset name) +# and the value is an instance of DatasetTemplates +>>> print(collection.datasets_templates) +{('poem_sentiment', None): , ('common_gen', None): , ('anli', None): , ('cc_news', None): , ('craigslist_bargains', None): ,...} ``` -You can collect all the available datasets and their associated prompts +You can learn more about PromptSource's API to store, manipulate and use prompts in the [documentation](API_DOCUMENTATION.md). -```python -from promptsource.templates import TemplateCollection +## How to create prompts +PromptSourcep provides a Web-based GUI that enables developers to write prompts in a templating language and immediately view their outputs on different examples. -# Get all the prompts -collection = TemplateCollection() +There are 3 modes in the app: +- **Sourcing**: create and write new prompts +- **Prompted dataset viewer**: check the prompts you wrote (or the existing ones) on the entire dataset +- **Helicopter view**: aggregate high-level metrics on the current state of P3 -# Return a dict where the key is the pair (dataset name, subset name) -# and the value is an instance of DatasetTemplates -print(collection.datasets_templates) +

+ +

+ +To launch the app locally, please first make sure you have followed the steps in [Setup](#setup), and from the root directory of the repo, run: +```bash +streamlit run promptsource/app.py ``` -## Running datasets that need manual download +You can also browse through existing prompts on the [hosted version of PromptSource](https://bigscience.huggingface.co/promptsource). Note the hosted version disables the Sourcing mode (`streamlit run promptsource/app.py -- --read-only`). -Some datasets are not handled automatically by `datasets` and require users to download the dataset manually. +### Writing prompts +Before creating new prompts, you should read the [contribution guidelines](CONTRIBUTING.md) which give an step-by-step description of how to contribute to the collection of prompts. -In order to handle those datasets as well, we require users to download the dataset and put it in `~/.cache/promptsource`. This is the root directory containing all manually downloaded datasets. +### Datasets that require manual downloads +Some datasets are not handled automatically by `datasets` and require users to download the dataset manually (`story_cloze` for instance ). -You can override this default path using `PROMPTSOURCE_MANUAL_DATASET_DIR` environment variable. This should point to the root directory. +To handle those datasets as well, we require users to download the dataset and put it in `~/.cache/promptsource`. This is the root directory containing all manually downloaded datasets. -## Contributing -Contribution guidelines and step-by-step *HOW TO* are described [here](CONTRIBUTING.md). +You can override this default path using `PROMPTSOURCE_MANUAL_DATASET_DIR` environment variable. This should point to the root directory. -## Writing Prompts -A prompt is expressed in [Jinja](https://jinja.palletsprojects.com/en/3.0.x/). +## Development structure +PropmtSource and P3 were originally developed as part of the [BigScience project for open research 🌸](https://bigscience.huggingface.co/), a year-long initiative targeting the study of large models and datasets. The goal of the project is to research language models in a public environment outside large technology companies. The project has 600 researchers from 50 countries and more than 250 institutions. -It is rendered using an example from the corresponding Hugging Face datasets library -(a dictionary). The separator ||| should appear once to divide the template into input -and target. Generally, the prompt should provide information on the desired behavior, -e.g., text passage and instructions, and the output should be a desired response. +In particular, PromptSource and P3 were the first steps for the paper [Multitask Prompted Training Enables Zero-Shot Task Generalization](https://arxiv.org/abs/2110.08207). -For more information, read the [Contribution guidelines](CONTRIBUTING.md). +**You will find the official repository to reproduce the results of the paper here: https://github.com/bigscience-workshop/t-zero.** We also released T0* (pronounce "T Zero"), a series of models trained on [P3](https://huggingface.co/datasets/bigscience/P3) and presented in the paper. Checkpoints are available [here](https://huggingface.co/bigscience/T0pp). ## Known Issues - **Warning or Error about Darwin on OS X:** Try downgrading PyArrow to 3.0.0. **ConnectionRefusedError: [Errno 61] Connection refused:** Happens occasionally. Try restarting the app. -## Development structure - -Promptsource was developed as part of the [BigScience project for open research 🌸](https://bigscience.huggingface.co/), a year-long initiative targeting the study of large models and datasets. The goal of the project is to research language models in a public environment outside large technology companies. The project has 600 researchers from 50 countries and more than 250 institutions. - ## Citation - -If you want to cite this P3 or Promptsource, you can use this bibtex: +If you find P3 or PromptSource useful, please cite the following reference: ```bibtex -@misc{sanh2021multitask, - title={Multitask Prompted Training Enables Zero-Shot Task Generalization}, - author={Victor Sanh and Albert Webson and Colin Raffel and Stephen H. Bach and Lintang Sutawika and Zaid Alyafeai and Antoine Chaffin and Arnaud Stiegler and Teven Le Scao and Arun Raja and Manan Dey and M Saiful Bari and Canwen Xu and Urmish Thakker and Shanya Sharma Sharma and Eliza Szczechla and Taewoon Kim and Gunjan Chhablani and Nihal Nayak and Debajyoti Datta and Jonathan Chang and Mike Tian-Jian Jiang and Han Wang and Matteo Manica and Sheng Shen and Zheng Xin Yong and Harshit Pandey and Rachel Bawden and Thomas Wang and Trishala Neeraj and Jos Rozen and Abheesht Sharma and Andrea Santilli and Thibault Fevry and Jason Alan Fries and Ryan Teehan and Stella Biderman and Leo Gao and Tali Bers and Thomas Wolf and Alexander M. Rush}, - year={2021}, - eprint={2110.08207}, +@misc{bach2022promptsource, + title={PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts}, + author={Stephen H. Bach and Victor Sanh and Zheng-Xin Yong and Albert Webson and Colin Raffel and Nihal V. Nayak and Abheesht Sharma and Taewoon Kim and M Saiful Bari and Thibault Fevry and Zaid Alyafeai and Manan Dey and Andrea Santilli and Zhiqing Sun and Srulik Ben-David and Canwen Xu and Gunjan Chhablani and Han Wang and Jason Alan Fries and Maged S. Al-shaibani and Shanya Sharma and Urmish Thakker and Khalid Almubarak and Xiangru Tang and Xiangru Tang and Mike Tian-Jian Jiang and Alexander M. Rush}, + year={2022}, + eprint={2202.01279}, archivePrefix={arXiv}, primaryClass={cs.LG} } ``` +#TODO: Update citation once we have the paper uploaded on arxiv + create a citation.cff file diff --git a/WIP.md b/WIP.md deleted file mode 100644 index 0a76a0048..000000000 --- a/WIP.md +++ /dev/null @@ -1,286 +0,0 @@ -# Which prompts are finalized? - -A subset of the prompts in P3 are still *Work in Progress*. For information, we provide the lists of the datasets for which prompts have been finalized and datasets for which prompts are suceptible to be modified in the near future. Modifications will in majority consist of metadata collection, but in some cases, will impact the templates themselves. - -To facilitate traceability, Promptsource is currently pinned at version `0.1.0`. - -# Finalized datasets - -|Dataset|Subset (optional)| -|-|-| -|adversarial_qa|dbert| -|adversarial_qa|dbidaf| -|adversarial_qa|droberta| -|adversarial_qa|adversarialQA| -|ag_news|| -|ai2_arc|ARC-Challenge| -|ai2_arc|ARC-Easy| -|amazon_polarity|| -|anli|| -|app_reviews|| -|circa|| -|cnn_dailymail|3.0.0| -|common_gen|| -|coqa|| -|cos_e|v1.11| -|cos_e|v1.0| -|cosmos_qa|| -|crows_pairs|| -|craffel/openai_lambada|| -|dbpedia_14|| -|dream|| -|drop|| -|duorc|ParaphraseRC| -|duorc|SelfRC| -|emo|| -|gigaword|| -|glue|cola| -|glue|mrpc| -|glue|qqp| -|glue|sst2| -|glue|stsb| -|hans|| -|hellaswag|| -|imdb|| -|jeopardy|| -|jigsaw_toxicity_pred|| -|kilt_tasks|nq| -|lambada|| -|mc_taco|| -|multi_news|| -|nq_open|| -|openbookqa|main| -|openbookqa|additional| -|paws|labeled_final| -|paws|labeled_swap| -|paws|unlabeled_final| -|paws-x|en| -|piqa|| -|qa_srl|| -|qasc|| -|quac|| -|quail|| -|quarel|| -|quartz|| -|quoref|| -|race|high| -|race|middle| -|race|all| -|ropes|| -|rotten_tomatoes|| -|samsum|| -|sciq|| -|scitail|snli_format| -|scitail|tsv_format| -|social_i_qa|| -|squad_v2|| -|super_glue|wsc.fixed| -|super_glue|boolq| -|super_glue|cb| -|super_glue|copa| -|super_glue|multirc| -|super_glue|record| -|super_glue|rte| -|super_glue|wic| -|swag|regular| -|trec|| -|trivia_qa|rc| -|tydiqa|| -|web_questions|| -|wiki_bio|| -|wiki_hop|original| -|wiki_qa|| -|winobias|*| -|winogender|| -|winogrande|winogrande_debiased| -|winogrande|winogrande_l| -|winogrande|winogrande_m| -|winogrande|winogrande_s| -|winogrande|winogrande_xl| -|winogrande|winogrande_xs| -|wiqa|| -|xsum|| -|yelp_review_full|| -|Zaid/coqa_expanded|| -|Zaid/quac_expanded|| - -# Work in Progress datasets - -|Dataset|Subset (optional)| -|-|-| -|acronym_identification|| -|ade_corpus_v2|Ade_corpus_v2_classification| -|ade_corpus_v2|Ade_corpus_v2_drug_ade_relation| -|ade_corpus_v2|Ade_corpus_v2_drug_dosage_relation| -|aeslc|| -|amazon_reviews_multi|en| -|amazon_us_reviews|Wireless_v1_00| -|ambig_qa|light| -|aqua_rat|raw| -|art|| -|asnq|| -|asset|ratings| -|asset|simplification| -|banking77|| -|billsum|| -|bing_coronavirus_query_set|| -|blended_skill_talk|| -|boolq|| -|cbt|CN| -|cbt|NE| -|cbt|P| -|cbt|raw| -|cbt|V| -|cc_news|| -|climate_fever|| -|codah|codah| -|codah|fold_0| -|codah|fold_1| -|codah|fold_2| -|codah|fold_3| -|codah|fold_4| -|commonsense_qa|| -|conv_ai|| -|conv_ai_2|| -|conv_ai_3|| -|coqa|| -|cord19|metadata| -|covid_qa_castorini|| -|craigslist_bargains|| -|discofuse|discofuse-sport| -|discofuse|discofuse-wikipedia| -|discovery|discovery| -|docred|| -|e2e_nlg_cleaned|| -|ecthr_cases|alleged-violation-prediction| -|emotion|| -|esnli|| -|evidence_infer_treatment|1.1| -|evidence_infer_treatment|2| -|fever|v1.0| -|fever|v2.0| -|financial_phrasebank|sentences_allagree| -|freebase_qa|| -|generated_reviews_enth|| -|glue|ax| -|glue|mnli| -|glue|mnli_matched| -|glue|mnli_mismatched| -|glue|qnli| -|glue|rte| -|glue|wnli| -|google_wellformed_query|| -|guardian_authorship|cross_genre_1| -|guardian_authorship|cross_topic_1| -|guardian_authorship|cross_topic_4| -|guardian_authorship|cross_topic_7| -|gutenberg_time|| -|head_qa|en| -|health_fact|| -|hlgd|| -|hotpot_qa|distractor| -|hotpot_qa|fullwiki| -|humicroedit|subtask-1| -|humicroedit|subtask-2| -|hyperpartisan_news_detection|byarticle| -|hyperpartisan_news_detection|bypublisher| -|jfleg|| -|kelm|| -|liar|| -|limit|| -|math_dataset|algebra__linear_1d| -|math_dataset|algebra__linear_1d_composed| -|math_dataset|algebra__linear_2d| -|math_dataset|algebra__linear_2d_composed| -|math_qa|| -|mdd|task1_qa| -|mdd|task2_recs| -|mdd|task3_qarecs| -|medical_questions_pairs|| -|meta_woz|dialogues| -|mocha|| -|movie_rationales|| -|multi_nli|| -|multi_nli_mismatch|| -|multi_x_science_sum|| -|mwsc|| -|narrativeqa|| -|ncbi_disease|| -|neural_code_search|evaluation_dataset| -|newspop|| -|nlu_evaluation_data|| -|numer_sense|| -|onestop_english|| -|poem_sentiment|| -|pubmed_qa|pqa_labeled| -|qa_zre|| -|qed|| -|quora|| -|samsum|| -|scan|addprim_jump| -|scan|addprim_turn_left| -|scan|filler_num0| -|scan|filler_num1| -|scan|filler_num2| -|scan|filler_num3| -|scan|length| -|scan|simple| -|scan|template_around_right| -|scan|template_jump_around_right| -|scan|template_opposite_right| -|scan|template_right| -|scicite|| -|scientific_papers|arxiv| -|scientific_papers|pubmed| -|scitldr|Abstract| -|selqa|answer_selection_analysis| -|sem_eval_2010_task_8|| -|sem_eval_2014_task_1|| -|sent_comp|| -|sick|| -|sms_spam|| -|snips_built_in_intents|| -|snli|| -|species_800|| -|spider|| -|squad|| -|squad_adversarial|AddSent| -|squadshifts|amazon| -|squadshifts|new_wiki| -|squadshifts|nyt| -|sst|default| -|stsb_multi_mt|en| -|subjqa|books| -|subjqa|electronics| -|subjqa|grocery| -|subjqa|movies| -|subjqa|restaurants| -|subjqa|tripadvisor| -|tab_fact|tab_fact| -|tmu_gfm_dataset|| -|turk|| -|tweet_eval|emoji| -|tweet_eval|emotion| -|tweet_eval|hate| -|tweet_eval|irony| -|tweet_eval|offensive| -|tweet_eval|sentiment| -|tweet_eval|stance_abortion| -|tweet_eval|stance_atheism| -|tweet_eval|stance_climate| -|tweet_eval|stance_feminist| -|tweet_eval|stance_hillary| -|tydiqa|primary_task| -|tydiqa|secondary_task| -|wiki_hop|masked| -|wiki_qa|| -|wiki_split|| -|winograd_wsc|wsc273| -|winograd_wsc|wsc285| -|xnli|en| -|xquad|xquad.en| -|xquad_r|en| -|yahoo_answers_qa|| -|yahoo_answers_topics|| -|yelp_polarity|| -|zest|| \ No newline at end of file diff --git a/assets/PromptSource ACL Demo Figure.png b/assets/PromptSource ACL Demo Figure.png new file mode 100644 index 000000000..f9309d276 Binary files /dev/null and b/assets/PromptSource ACL Demo Figure.png differ diff --git a/promptsource/templates.py b/promptsource/templates.py index 52425f266..33d63bfd4 100644 --- a/promptsource/templates.py +++ b/promptsource/templates.py @@ -8,7 +8,7 @@ import pandas as pd import pkg_resources import yaml -from jinja2 import BaseLoader, Environment, meta +from jinja2 import BaseLoader, Environment # Truncation of jinja template variables @@ -134,25 +134,6 @@ def get_answer_choices_list(self, example): rendered_choices = rtemplate.render(**protected_example) return [self._unescape_pipe(answer_choice.strip()) for answer_choice in rendered_choices.split("|||")] - def get_fixed_answer_choices_list(self): - """ - Returns a list of answer choices that is static across examples, if possible - - :return: list of strings, or None if no static list exists - """ - jinja = self.get_answer_choices_expr() - if jinja is None: - return None - - parse = env.parse(jinja) - variables = meta.find_undeclared_variables(parse) - if len(variables) == 0: - rtemplate = env.from_string(jinja) - rendered_choices = rtemplate.render() - return [answer_choice.strip() for answer_choice in rendered_choices.split("|||")] - else: - return None - def apply(self, example, truncate=True, highlight_variables=False): """ Creates a prompt by applying this template to an example