Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New prompts for Shades FR/EN #742

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
assets/PromptSource[[:space:]]ACL[[:space:]]Demo[[:space:]]Figure.png filter=lfs diff=lfs merge=lfs -text
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ You can always update the name later. If you want to cancel the prompt, select
1. **Write the prompt**. In the box labeled "Template," enter a Jinja expression.
See the [getting started guide](#getting-started-using-jinja-to-write-prompts)
and [cookbook](#jinja-cookbook) for details on how to write templates.
1. **Fill in metadata**. Fill in the metadata for the current prompt: reference, original task, choices in templates, and answer choices.
1. **Fill in metadata**. Fill in the metadata for the current prompt: reference, original task, choices in templates, metrics, languages, and answer choices.
See [Metadata](#metadata) for more details about these fields.
1. **Save the prompt**. Hit the "Save" button. The output of the prompt
applied to the current example will appear in the right sidebar.
Expand Down Expand Up @@ -124,6 +124,7 @@ to generate a question for a given answer would not.
the options for the possible outputs (regardless of whether `answer_choices` is used).
* **Metrics.** Use the multiselect widget to select all metrics commonly used to evaluate
this task. Choose “Other” if there is one that is not included in the list.
* **Languages.** Use the multiselect widget to select all languages used in the prompt. This is independent of what languages are used in the underlying dataset. For example, you could have an English prompt for a Spanish dataset.
* **Answer Choices.** If the prompt has a small set of possible outputs (e.g., Yes/No,
class labels, entailment judgements, etc.), then the prompt should define and use answer
choices as follows. This allows evaluation to consider just the possible targets for
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PromptSource provides the tools to create, and share natural language prompts (s
Question: Does this imply that "{{hypothesis}}"? Yes, no, or maybe? ||| {{answer_choices[label]}}
```

**You can browse through existing prompts on the [hosted version of PromptSource](https://bigscience.huggingface.co/promptsource).**
**You can browse through existing prompts on the [hosted version of PromptSource](https://huggingface.co/spaces/bigscience/promptsource).**

## Setup
If you do not intend to modify prompts, you can simply run:
Expand Down
Binary file modified assets/PromptSource ACL Demo Figure.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/promptsource_app.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 4 additions & 1 deletion promptsource/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
DEFAULT_PROMPTSOURCE_CACHE_HOME = "~/.cache/promptsource"
from pathlib import Path


DEFAULT_PROMPTSOURCE_CACHE_HOME = str(Path("~/.cache/promptsource").expanduser())
90 changes: 59 additions & 31 deletions promptsource/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import argparse
import functools
import multiprocessing
import os
import textwrap
from hashlib import sha256
from multiprocessing import Manager, Pool

import pandas as pd
import plotly.express as px
import streamlit as st
from datasets import get_dataset_infos
from datasets.info import DatasetInfosDict
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import DjangoLexer
from templates import INCLUDED_USERS

from promptsource import DEFAULT_PROMPTSOURCE_CACHE_HOME
from promptsource.session import _get_state
from promptsource.templates import DatasetTemplates, Template, TemplateCollection
from promptsource.templates import INCLUDED_USERS, LANGUAGES, METRICS, DatasetTemplates, Template, TemplateCollection
from promptsource.utils import (
get_dataset,
get_dataset_confs,
Expand All @@ -25,6 +28,9 @@
)


DATASET_INFOS_CACHE_DIR = os.path.join(DEFAULT_PROMPTSOURCE_CACHE_HOME, "DATASET_INFOS")
os.makedirs(DATASET_INFOS_CACHE_DIR, exist_ok=True)

# Python 3.8 switched the default start method from fork to spawn. OS X also has
# some issues related to fork, eee, e.g., https://github.com/bigscience-workshop/promptsource/issues/572
# so we make sure we always use spawn for consistency
Expand All @@ -38,7 +44,28 @@ def get_infos(all_infos, d_name):
:param all_infos: multiprocess-safe dictionary
:param d_name: dataset name
"""
all_infos[d_name] = get_dataset_infos(d_name)
d_name_bytes = d_name.encode("utf-8")
d_name_hash = sha256(d_name_bytes)
foldername = os.path.join(DATASET_INFOS_CACHE_DIR, d_name_hash.hexdigest())
if os.path.isdir(foldername):
infos_dict = DatasetInfosDict.from_directory(foldername)
else:
infos = get_dataset_infos(d_name)
infos_dict = DatasetInfosDict(infos)
os.makedirs(foldername)
infos_dict.write_to_directory(foldername)
all_infos[d_name] = infos_dict


def format_language(tag):
"""
Formats a language tag for display in the UI.

For example, if the tag is "en", then the function returns "en (English)"
:param tag: language tag
:return: formatted language name
"""
return tag + " (" + LANGUAGES[tag] + ")"


# add an argument for read-only
Expand Down Expand Up @@ -181,11 +208,13 @@ def show_text(t, width=WIDTH, with_markdown=False):
else:
subset_infos = infos[subset_name]

split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
try:
split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
except Exception:
# Fixing bug in some community datasets.
# For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
split_sizes = {}
else:
# Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
# so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
# For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
split_sizes = {}

# Collect template counts, original task counts and names
Expand Down Expand Up @@ -284,13 +313,18 @@ def show_text(t, width=WIDTH, with_markdown=False):
except OSError as e:
st.error(
f"Some datasets are not handled automatically by `datasets` and require users to download the "
f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
f"dataset manually. It is possibly the case for {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\n\nIf so, please download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
f"\n\nOriginal error:\n{str(e)}"
)
st.stop()
except Exception as e:
st.error(
f"An error occured while loading the dataset {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\\n\nOriginal error:\n{str(e)}"
)

splits = list(dataset.keys())
index = 0
Expand Down Expand Up @@ -403,6 +437,11 @@ def show_text(t, width=WIDTH, with_markdown=False):
st.text(template.metadata.choices_in_prompt)
st.markdown("##### Metrics")
st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None)
st.markdown("##### Prompt Languages")
if template.metadata.languages:
st.text(", ".join([format_language(tag) for tag in template.metadata.languages]))
else:
st.text(None)
st.markdown("##### Answer Choices")
if template.get_answer_choices_expr() is not None:
show_jinja(template.get_answer_choices_expr())
Expand Down Expand Up @@ -539,35 +578,24 @@ def show_text(t, width=WIDTH, with_markdown=False):
help="Prompt explicitly lists choices in the template for the output.",
)

# Metrics from here:
# https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
metrics_choices = [
"BLEU",
"ROUGE",
"Squad",
"Trivia QA",
"Accuracy",
"Pearson Correlation",
"Spearman Correlation",
"MultiRC",
"AUC",
"COQA F1",
"Edit Distance",
]
# Add mean reciprocal rank
metrics_choices.append("Mean Reciprocal Rank")
# Add generic other
metrics_choices.append("Other")
# Sort alphabetically
metrics_choices = sorted(metrics_choices)
state.metadata.metrics = st.multiselect(
"Metrics",
metrics_choices,
sorted(METRICS),
default=template.metadata.metrics,
help="Select all metrics that are commonly used (or should "
"be used if a new task) to evaluate this prompt.",
)

state.metadata.languages = st.multiselect(
"Prompt Languages",
sorted(LANGUAGES.keys()),
default=template.metadata.languages,
format_func=format_language,
help="Select all languages used in this prompt. "
"This annotation is independent from the language(s) "
"of the dataset.",
)

# Answer choices
if template.get_answer_choices_expr() is not None:
answer_choices = template.get_answer_choices_expr()
Expand Down
Loading