From 81494bbe6cec8599de4e4cb522d805ee55f9676a Mon Sep 17 00:00:00 2001 From: Megan Ung <20617868+meganung@users.noreply.github.com> Date: Tue, 23 Nov 2021 16:45:44 -0800 Subject: [PATCH] unify turn annotation tasks (model chat and turn annotations static tasks) (#4162) * annotation_buckets param to annotations_config_path to match mc * unify the annotation buckets config file format in model chat and turn annotations static task * remove task_description flag (not used) * chat_title->task_title * move task description to html file * sample annotation buckets match previous ones for model chat task * update tests * add task config files * PR fixes * model chat task only supports checkboxes right now --- .../frontend/components/checkboxes.jsx | 9 +-- .../frontend/components/message.jsx | 2 +- .../components/onboarding_components.jsx | 2 +- .../tasks/model_chat/frontend/main.js | 2 +- .../hydra_configs/conf/example.yaml | 7 --- .../conf/example_image_chat.yaml | 7 --- .../task_config/annotations_config.json | 58 +++++++++---------- .../tasks/turn_annotations_static/README.md | 2 +- .../analysis/compile_results.py | 2 +- .../hydra_configs/conf/example.yaml | 10 +--- .../tasks/turn_annotations_static/run.py | 5 ++ ...n_buckets.json => annotations_config.json} | 0 .../task_config/task_description.html | 7 +++ .../turn_annotations_blueprint.py | 27 +++++++-- .../webapp/src/components/checkboxes.jsx | 11 +++- .../expected_states/final_chat_data.json | 2 +- .../model_chat/expected_states/state.json | 4 +- 17 files changed, 85 insertions(+), 72 deletions(-) rename parlai/crowdsourcing/tasks/turn_annotations_static/task_config/{annotation_buckets.json => annotations_config.json} (100%) create mode 100644 parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx index 745473ae513..79379d77854 100644 --- a/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx +++ b/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx @@ -28,11 +28,12 @@ function Checkboxes({ if (!askReason) { reasonComponent = ''; } - let input_type = annotationBuckets.type !== undefined ? annotationBuckets.type : "checkbox"; + // TODO: add support for radio input type + let input_type = "checkbox"; return (
{ - Object.keys(annotationBuckets).map(c => ( + Object.keys(annotationBuckets.config).map(c => ( { let newVal = evt.target.checked; let oldAnnotations = Object.assign({}, annotations); - oldAnnotations[annotationBuckets[c].value] = newVal; + oldAnnotations[c] = newVal; onUpdateAnnotations(oldAnnotations); }} disabled={!enabled} /> - {annotationBuckets[c].name} + {annotationBuckets.config[c].name} )) diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx index 064bd8b5ad7..fc37d9a6860 100644 --- a/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx +++ b/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx @@ -44,7 +44,7 @@ function RenderChatMessage({ message, mephistoContext, appContext, idx }) { let thisBoxAnnotations = checkboxValues[idx]; if (!thisBoxAnnotations) { thisBoxAnnotations = Object.fromEntries( - annotationBuckets.map(bucket => [bucket.value, false]) + Object.keys(annotationBuckets.config).map(bucket => [bucket, false]) ) } checkboxes =
diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx index c704df71d29..f197bcf33b5 100644 --- a/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx +++ b/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx @@ -127,7 +127,7 @@ function OnboardingComponent({ onboardingData, annotationBuckets, annotationQues } else { const [currentTurnAnnotations, setCurrentAnnotations] = React.useState( Array.from(Array(onboardingData.dialog.length), () => Object.fromEntries( - annotationBuckets.map(bucket => [bucket.value, false])) + Object.keys(annotationBuckets.config).map(bucket => [bucket, false])) ) ); return ( diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/main.js b/parlai/crowdsourcing/tasks/model_chat/frontend/main.js index f7fbe0d3be3..0c66c2861b2 100644 --- a/parlai/crowdsourcing/tasks/model_chat/frontend/main.js +++ b/parlai/crowdsourcing/tasks/model_chat/frontend/main.js @@ -30,7 +30,7 @@ function MainApp() { )} renderSidePane={({ mephistoContext: { taskConfig }, appContext: { taskContext } }) => ( {(taskContext.hasOwnProperty('image_src') && taskContext['image_src']) ? ( diff --git a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml index abe1628aebd..3dad128ad8c 100644 --- a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml +++ b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml @@ -22,13 +22,6 @@ mephisto: assignment_duration_in_seconds: 600 max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes maximum_units_per_worker: 3 - task_description: | -

Task Description

-
- Dummy Task Description. - - Lorem ipsum. -

task_name: model_chat task_reward: 3 task_tags: "chat,conversation,dialog,partner" diff --git a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml index f43e8c5516e..be2519643dc 100644 --- a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml +++ b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml @@ -18,13 +18,6 @@ mephisto: assignment_duration_in_seconds: 600 max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes maximum_units_per_worker: 3 - task_description: | -

Task Description

-
- Dummy Task Description. - - Lorem ipsum. -

task_name: model_image_chat task_reward: 3 task_tags: "chat,conversation,dialog,partner,image" diff --git a/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json b/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json index d933db83e2f..8d028c07a12 100644 --- a/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json +++ b/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json @@ -1,32 +1,28 @@ -[ - { - "value": "bucket_0", - "name": "Bucket 0", - "description": "this response implies something...0" - }, - { - "value": "bucket_1", - "name": "Bucket 1", - "description": "this response implies something...1" - }, - { - "value": "bucket_2", - "name": "Bucket 2", - "description": "this response implies something...2" - }, - { - "value": "bucket_3", - "name": "Bucket 3", - "description": "this response implies something...3" - }, - { - "value": "bucket_4", - "name": "Bucket 4", - "description": "this response implies something...4" - }, - { - "value": "none_all_good", - "name": "None, all good", - "description": "This response implies that there are no problems with the data" +{ + "config": { + "bucket_0": { + "name": "Bucket 0", + "description": "this response implies something...0" + }, + "bucket_1": { + "name": "Bucket 1", + "description": "this response implies something...1" + }, + "bucket_2": { + "name": "Bucket 2", + "description": "this response implies something...2" + }, + "bucket_3": { + "name": "Bucket 3", + "description": "this response implies something...3" + }, + "bucket_4": { + "name": "Bucket 4", + "description": "this response implies something...4" + }, + "none_all_good": { + "name": "None, all good", + "description": "This response implies that there are no problems with the data" + } } -] +} diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/README.md b/parlai/crowdsourcing/tasks/turn_annotations_static/README.md index 50d7a742762..cf9aa75bdf8 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/README.md +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/README.md @@ -11,7 +11,7 @@ Two variants of the blueprint are supported: For both variants of the blueprint, it is required to pass in your own file of conversations with `mephisto.blueprint.data_jsonl=${PATH_TO_CONVERSATIONS}`. -See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotation_buckets` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation. +See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotations_config_path` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation. The validation of the response field is handled by `validateFreetextResponse` function in `task_components.jsx` and checks for a minimum number of characters, words, and vowels specified by function parameters. To change this, modify the values passed in to the function call or override the function to set your own validation requirements. diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py b/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py index fbe1468476c..a976a8d4264 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py @@ -20,7 +20,7 @@ class TurnAnnotationsStaticResultsCompiler(AbstractTurnAnnotationResultsCompiler """ Class to compile results from static turn annotations. - Change PROBLEM_BUCKETS in task_config/annotation_buckets.json to be the buckets that + Change PROBLEM_BUCKETS in task_config/annotations_config.json to be the buckets that you are asking crowdsource workers to annotate with. """ diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml b/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml index df3605759d7..cfccd034ca7 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml @@ -11,20 +11,14 @@ mephisto: extra_source_dir: ${task_dir}/webapp/src/static units_per_assignment: 5 onboarding_qualification: turn_annotations_static - annotation_buckets: ${task_dir}/task_config/annotation_buckets.json + annotations_config_path: ${task_dir}/task_config/annotations_config.json response_field: False + task_description_file: ${task_dir}/task_config/task_description.html task: allowed_concurrent: 1 assignment_duration_in_seconds: 600 max_num_concurrent_units: 0 # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes maximum_units_per_worker: 5 - task_description: | -

Task Description

-
- Dummy Task Description. - - Lorem ipsum. -

task_name: turn_annotations_static task_reward: 0.3 task_tags: "chat,conversation,dialog,partner" diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/run.py b/parlai/crowdsourcing/tasks/turn_annotations_static/run.py index 102bf632667..d588274b39a 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/run.py +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/run.py @@ -12,10 +12,15 @@ from mephisto.operations.hydra_config import register_script_config from omegaconf import DictConfig +from parlai.crowdsourcing.tasks.turn_annotations_static.turn_annotations_blueprint import ( + STATIC_BLUEPRINT_TYPE, +) from parlai.crowdsourcing.tasks.turn_annotations_static.util import run_static_task from parlai.crowdsourcing.utils.mturk import MTurkRunScriptConfig +_ = STATIC_BLUEPRINT_TYPE + TASK_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) # To run the task with your own config outside this folder (recommended!) diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/annotation_buckets.json b/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/annotations_config.json similarity index 100% rename from parlai/crowdsourcing/tasks/turn_annotations_static/task_config/annotation_buckets.json rename to parlai/crowdsourcing/tasks/turn_annotations_static/task_config/annotations_config.json diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html b/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html new file mode 100644 index 00000000000..dc7bddcf2a6 --- /dev/null +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html @@ -0,0 +1,7 @@ +
+

Task Description

+
+Dummy Task Description. +Lorem ipsum. +turnannotations +

diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py b/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py index 9b3f387d7ca..5b573c0e7e6 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py @@ -84,10 +84,11 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs): "help": "Path to data and answers for onboarding task in JSON format" }, ) - annotation_buckets: Optional[str] = field( - default=None, + annotations_config_path: str = field( + default="", metadata={ - "help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. If none provided, no checkboxes." + "help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. Set to " + " to disable checkboxes." }, ) response_field: bool = field( @@ -96,6 +97,10 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs): "help": "If we want a freeform textbox input for the crowdworker to respond to the message." }, ) + task_description_file: str = field( + default=os.path.join(get_task_path(), 'task_config/task_description.html'), + metadata={"help": "Path to file of HTML to show on the task-description page"}, + ) @register_mephisto_abstraction() @@ -184,18 +189,28 @@ def get_frontend_args(self) -> Dict[str, Any]: for use by the task's frontend. """ + # Load task description from file + task_description = "

" "You didn't specify a task_description_file" "

" + if self.args.blueprint.get("task_description_file", None) is not None: + full_path = os.path.expanduser(self.args.blueprint.task_description_file) + assert os.path.exists( + full_path + ), f"Target task description path {full_path} doesn't exist" + with open(full_path, "r") as description_fp: + task_description = description_fp.read() + with open(self.args.blueprint.onboarding_data, "r", encoding="utf-8-sig") as f: onboarding_data = json.loads(f.read()) annotation_buckets = None - if self.args.blueprint.annotation_buckets: + if self.args.blueprint.get('annotations_config_path', ''): with open( - self.args.blueprint.annotation_buckets, "r", encoding="utf-8-sig" + self.args.blueprint.annotations_config_path, "r", encoding="utf-8-sig" ) as f: annotation_buckets = json.loads(f.read()) return { - "task_description": self.args.task.get('task_description', None), + "task_description": task_description, "task_title": self.args.task.get('task_title', None), "annotation_question": self.args.blueprint.annotation_question, "onboarding_data": onboarding_data, diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx b/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx index dea02734dd8..c328d633ecf 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx @@ -44,7 +44,16 @@ function Checkboxes({ annotationBuckets, turnIdx, onUserInputUpdate, askReason }
{ Object.keys(annotationBuckets.config).map(c => ( - handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)} />{annotationBuckets.config[c].name} + + handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)} + /> + + {annotationBuckets.config[c].name} + )) } diff --git a/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json b/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json index 6b3f860ff5d..a8d43fb8bbd 100644 --- a/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json +++ b/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json @@ -132,7 +132,7 @@ "1" ], "task_description": { - "annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n", + "annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n", "model_nickname": "fixed_response", "model_file": "/private/home/ems/GitHub/facebookresearch/ParlAI_dev/data/models/fixed_response/model", "model_opt": { diff --git a/tests/crowdsourcing/tasks/model_chat/expected_states/state.json b/tests/crowdsourcing/tasks/model_chat/expected_states/state.json index 20020318adf..8f4d83c082c 100644 --- a/tests/crowdsourcing/tasks/model_chat/expected_states/state.json +++ b/tests/crowdsourcing/tasks/model_chat/expected_states/state.json @@ -357,7 +357,7 @@ "person2_seed_utterance": null, "personas": null, "task_description": { - "annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n", + "annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n", "model_nickname": "fixed_response", "model_opt": { "init_opt": null, @@ -515,7 +515,7 @@ "hit_ids": ["1"], "assignment_ids": ["1"], "task_description": { - "annotations_config": "[\n {\n \"value\": \"bucket_0\",\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n {\n \"value\": \"bucket_1\",\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n {\n \"value\": \"bucket_2\",\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n {\n \"value\": \"bucket_3\",\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n {\n \"value\": \"bucket_4\",\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n {\n \"value\": \"none_all_good\",\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n]\n", + "annotations_config": "{\n \"config\": {\n \"bucket_0\": {\n \"name\": \"Bucket 0\",\n \"description\": \"this response implies something...0\"\n },\n \"bucket_1\": {\n \"name\": \"Bucket 1\",\n \"description\": \"this response implies something...1\"\n },\n \"bucket_2\": {\n \"name\": \"Bucket 2\",\n \"description\": \"this response implies something...2\"\n },\n \"bucket_3\": {\n \"name\": \"Bucket 3\",\n \"description\": \"this response implies something...3\"\n },\n \"bucket_4\": {\n \"name\": \"Bucket 4\",\n \"description\": \"this response implies something...4\"\n },\n \"none_all_good\": {\n \"name\": \"None, all good\",\n \"description\": \"This response implies that there are no problems with the data\"\n }\n }\n}\n", "model_nickname": "fixed_response", "model_file": null, "model_opt": {