Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
unify turn annotation tasks (model chat and turn annotations static t…
Browse files Browse the repository at this point in the history
…asks) (#4162)

* annotation_buckets param to annotations_config_path to match mc

* unify the annotation buckets config file format in model chat and turn annotations static task

* remove task_description flag (not used)

* chat_title->task_title

* move task description to html file

* sample annotation buckets match previous ones for model chat task

* update tests

* add task config files

* PR fixes

* model chat task only supports checkboxes right now
  • Loading branch information
meganung committed Nov 24, 2021
1 parent 1fa8f7a commit 81494bb
Show file tree
Hide file tree
Showing 17 changed files with 85 additions and 72 deletions.
Expand Up @@ -28,11 +28,12 @@ function Checkboxes({
if (!askReason) {
reasonComponent = '';
}
let input_type = annotationBuckets.type !== undefined ? annotationBuckets.type : "checkbox";
// TODO: add support for radio input type
let input_type = "checkbox";
return (
<div key={'checkboxes_' + turnIdx}>
{
Object.keys(annotationBuckets).map(c => (
Object.keys(annotationBuckets.config).map(c => (
<span key={'span_' + c + '_' + turnIdx}>
<input
type={input_type}
Expand All @@ -41,13 +42,13 @@ function Checkboxes({
onChange={(evt) => {
let newVal = evt.target.checked;
let oldAnnotations = Object.assign({}, annotations);
oldAnnotations[annotationBuckets[c].value] = newVal;
oldAnnotations[c] = newVal;
onUpdateAnnotations(oldAnnotations);
}}
disabled={!enabled}
/>
<span style={{ marginRight: '15px' }}>
{annotationBuckets[c].name}
{annotationBuckets.config[c].name}
</span>
</span>
))
Expand Down
Expand Up @@ -44,7 +44,7 @@ function RenderChatMessage({ message, mephistoContext, appContext, idx }) {
let thisBoxAnnotations = checkboxValues[idx];
if (!thisBoxAnnotations) {
thisBoxAnnotations = Object.fromEntries(
annotationBuckets.map(bucket => [bucket.value, false])
Object.keys(annotationBuckets.config).map(bucket => [bucket, false])
)
}
checkboxes = <div style={{"fontStyle": "italic"}}>
Expand Down
Expand Up @@ -127,7 +127,7 @@ function OnboardingComponent({ onboardingData, annotationBuckets, annotationQues
} else {
const [currentTurnAnnotations, setCurrentAnnotations] = React.useState(
Array.from(Array(onboardingData.dialog.length), () => Object.fromEntries(
annotationBuckets.map(bucket => [bucket.value, false]))
Object.keys(annotationBuckets.config).map(bucket => [bucket, false]))
)
);
return (
Expand Down
2 changes: 1 addition & 1 deletion parlai/crowdsourcing/tasks/model_chat/frontend/main.js
Expand Up @@ -30,7 +30,7 @@ function MainApp() {
)}
renderSidePane={({ mephistoContext: { taskConfig }, appContext: { taskContext } }) => (
<DefaultTaskDescription
chatTitle={taskConfig.chat_title}
chatTitle={taskConfig.task_title}
taskDescriptionHtml={taskConfig.left_pane_text}
>
{(taskContext.hasOwnProperty('image_src') && taskContext['image_src']) ? (
Expand Down
Expand Up @@ -22,13 +22,6 @@ mephisto:
assignment_duration_in_seconds: 600
max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
maximum_units_per_worker: 3
task_description: |
<b><h4>Task Description</h4></b>
<br>
Dummy Task Description.
Lorem ipsum.
<br><br>
task_name: model_chat
task_reward: 3
task_tags: "chat,conversation,dialog,partner"
Expand Down
Expand Up @@ -18,13 +18,6 @@ mephisto:
assignment_duration_in_seconds: 600
max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
maximum_units_per_worker: 3
task_description: |
<b><h4>Task Description</h4></b>
<br>
Dummy Task Description.
Lorem ipsum.
<br><br>
task_name: model_image_chat
task_reward: 3
task_tags: "chat,conversation,dialog,partner,image"
Expand Down
@@ -1,32 +1,28 @@
[
{
"value": "bucket_0",
"name": "Bucket 0",
"description": "this response implies something...0"
},
{
"value": "bucket_1",
"name": "Bucket 1",
"description": "this response implies something...1"
},
{
"value": "bucket_2",
"name": "Bucket 2",
"description": "this response implies something...2"
},
{
"value": "bucket_3",
"name": "Bucket 3",
"description": "this response implies something...3"
},
{
"value": "bucket_4",
"name": "Bucket 4",
"description": "this response implies something...4"
},
{
"value": "none_all_good",
"name": "None, all good",
"description": "This response implies that there are no problems with the data"
{
"config": {
"bucket_0": {
"name": "Bucket 0",
"description": "this response implies something...0"
},
"bucket_1": {
"name": "Bucket 1",
"description": "this response implies something...1"
},
"bucket_2": {
"name": "Bucket 2",
"description": "this response implies something...2"
},
"bucket_3": {
"name": "Bucket 3",
"description": "this response implies something...3"
},
"bucket_4": {
"name": "Bucket 4",
"description": "this response implies something...4"
},
"none_all_good": {
"name": "None, all good",
"description": "This response implies that there are no problems with the data"
}
}
]
}
Expand Up @@ -11,7 +11,7 @@ Two variants of the blueprint are supported:

For both variants of the blueprint, it is required to pass in your own file of conversations with `mephisto.blueprint.data_jsonl=${PATH_TO_CONVERSATIONS}`.

See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotation_buckets` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation.
See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotations_config_path` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation.

The validation of the response field is handled by `validateFreetextResponse` function in `task_components.jsx` and checks for a minimum number of characters, words, and vowels specified by function parameters. To change this, modify the values passed in to the function call or override the function to set your own validation requirements.

Expand Down
Expand Up @@ -20,7 +20,7 @@ class TurnAnnotationsStaticResultsCompiler(AbstractTurnAnnotationResultsCompiler
"""
Class to compile results from static turn annotations.
Change PROBLEM_BUCKETS in task_config/annotation_buckets.json to be the buckets that
Change PROBLEM_BUCKETS in task_config/annotations_config.json to be the buckets that
you are asking crowdsource workers to annotate with.
"""

Expand Down
Expand Up @@ -11,20 +11,14 @@ mephisto:
extra_source_dir: ${task_dir}/webapp/src/static
units_per_assignment: 5
onboarding_qualification: turn_annotations_static
annotation_buckets: ${task_dir}/task_config/annotation_buckets.json
annotations_config_path: ${task_dir}/task_config/annotations_config.json
response_field: False
task_description_file: ${task_dir}/task_config/task_description.html
task:
allowed_concurrent: 1
assignment_duration_in_seconds: 600
max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
maximum_units_per_worker: 5
task_description: |
<b><h4>Task Description</h4></b>
<br>
Dummy Task Description.
Lorem ipsum.
<br><br>
task_name: turn_annotations_static
task_reward: 0.3
task_tags: "chat,conversation,dialog,partner"
Expand Down
5 changes: 5 additions & 0 deletions parlai/crowdsourcing/tasks/turn_annotations_static/run.py
Expand Up @@ -12,10 +12,15 @@
from mephisto.operations.hydra_config import register_script_config
from omegaconf import DictConfig

from parlai.crowdsourcing.tasks.turn_annotations_static.turn_annotations_blueprint import (
STATIC_BLUEPRINT_TYPE,
)
from parlai.crowdsourcing.tasks.turn_annotations_static.util import run_static_task
from parlai.crowdsourcing.utils.mturk import MTurkRunScriptConfig


_ = STATIC_BLUEPRINT_TYPE

TASK_DIRECTORY = os.path.dirname(os.path.abspath(__file__))

# To run the task with your own config outside this folder (recommended!)
Expand Down
@@ -0,0 +1,7 @@
<br>
<b><h4>Task Description</h4></b>
<br>
Dummy Task Description.
Lorem ipsum.
turnannotations
<br><br>
Expand Up @@ -84,10 +84,11 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs):
"help": "Path to data and answers for onboarding task in JSON format"
},
)
annotation_buckets: Optional[str] = field(
default=None,
annotations_config_path: str = field(
default="",
metadata={
"help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. If none provided, no checkboxes."
"help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. Set to "
" to disable checkboxes."
},
)
response_field: bool = field(
Expand All @@ -96,6 +97,10 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs):
"help": "If we want a freeform textbox input for the crowdworker to respond to the message."
},
)
task_description_file: str = field(
default=os.path.join(get_task_path(), 'task_config/task_description.html'),
metadata={"help": "Path to file of HTML to show on the task-description page"},
)


@register_mephisto_abstraction()
Expand Down Expand Up @@ -184,18 +189,28 @@ def get_frontend_args(self) -> Dict[str, Any]:
for use by the task's frontend.
"""

# Load task description from file
task_description = "<h1>" "You didn't specify a task_description_file" "</h1>"
if self.args.blueprint.get("task_description_file", None) is not None:
full_path = os.path.expanduser(self.args.blueprint.task_description_file)
assert os.path.exists(
full_path
), f"Target task description path {full_path} doesn't exist"
with open(full_path, "r") as description_fp:
task_description = description_fp.read()

with open(self.args.blueprint.onboarding_data, "r", encoding="utf-8-sig") as f:
onboarding_data = json.loads(f.read())

annotation_buckets = None
if self.args.blueprint.annotation_buckets:
if self.args.blueprint.get('annotations_config_path', ''):
with open(
self.args.blueprint.annotation_buckets, "r", encoding="utf-8-sig"
self.args.blueprint.annotations_config_path, "r", encoding="utf-8-sig"
) as f:
annotation_buckets = json.loads(f.read())

return {
"task_description": self.args.task.get('task_description', None),
"task_description": task_description,
"task_title": self.args.task.get('task_title', None),
"annotation_question": self.args.blueprint.annotation_question,
"onboarding_data": onboarding_data,
Expand Down
Expand Up @@ -44,7 +44,16 @@ function Checkboxes({ annotationBuckets, turnIdx, onUserInputUpdate, askReason }
<div key={'checkboxes_' + turnIdx}>
{
Object.keys(annotationBuckets.config).map(c => (
<span key={'span_' + c + '_' + turnIdx}><input type={input_type} id={c + '_' + turnIdx} name={'checkbox_group_' + turnIdx} onChange={(evt) => handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)} /><span style={{ marginRight: '15px' }}>{annotationBuckets.config[c].name}</span>
<span key={'span_' + c + '_' + turnIdx}>
<input
type={input_type}
id={c + '_' + turnIdx}
name={'checkbox_group_' + turnIdx}
onChange={(evt) => handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)}
/>
<span style={{ marginRight: '15px' }}>
{annotationBuckets.config[c].name}
</span>
</span>
))
}
Expand Down
Expand Up @@ -132,7 +132,7 @@
"1"
],
"task_description": {
"annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n",
"annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n",
"model_nickname": "fixed_response",
"model_file": "/private/home/ems/GitHub/facebookresearch/ParlAI_dev/data/models/fixed_response/model",
"model_opt": {
Expand Down
Expand Up @@ -357,7 +357,7 @@
"person2_seed_utterance": null,
"personas": null,
"task_description": {
"annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n",
"annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n",
"model_nickname": "fixed_response",
"model_opt": {
"init_opt": null,
Expand Down Expand Up @@ -515,7 +515,7 @@
"hit_ids": ["1"],
"assignment_ids": ["1"],
"task_description": {
"annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n",
"annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n",
"model_nickname": "fixed_response",
"model_file": null,
"model_opt": {
Expand Down

0 comments on commit 81494bb

Please sign in to comment.