unify turn annotation tasks (model chat and turn annotations static t…

…asks) (#4162) * annotation_buckets param to annotations_config_path to match mc * unify the annotation buckets config file format in model chat and turn annotations static task * remove task_description flag (not used) * chat_title->task_title * move task description to html file * sample annotation buckets match previous ones for model chat task * update tests * add task config files * PR fixes * model chat task only supports checkboxes right now
facebookresearch · Nov 24, 2021 · 81494bb · 81494bb
1 parent 1fa8f7a
commit 81494bb
Show file tree

Hide file tree

Showing 17 changed files with 85 additions and 72 deletions.
diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/checkboxes.jsx
@@ -28,11 +28,12 @@ function Checkboxes({
   if (!askReason) {
     reasonComponent = '';
   }
-  let input_type = annotationBuckets.type !== undefined ? annotationBuckets.type : "checkbox";
+  // TODO: add support for radio input type
+  let input_type = "checkbox";
   return (
     <div key={'checkboxes_' + turnIdx}>
       {
-        Object.keys(annotationBuckets).map(c => (
+        Object.keys(annotationBuckets.config).map(c => (
           <span key={'span_' + c + '_' + turnIdx}>
             <input 
               type={input_type}
@@ -41,13 +42,13 @@ function Checkboxes({
               onChange={(evt) => {
                 let newVal = evt.target.checked;
                 let oldAnnotations = Object.assign({}, annotations);
-                oldAnnotations[annotationBuckets[c].value] = newVal;
+                oldAnnotations[c] = newVal;
                 onUpdateAnnotations(oldAnnotations);
               }} 
               disabled={!enabled}
             />
             <span style={{ marginRight: '15px' }}>
-              {annotationBuckets[c].name}
+              {annotationBuckets.config[c].name}
             </span>
           </span>
         ))

diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/message.jsx
@@ -44,7 +44,7 @@ function RenderChatMessage({ message, mephistoContext, appContext, idx }) {
     let thisBoxAnnotations = checkboxValues[idx];
     if (!thisBoxAnnotations) {
       thisBoxAnnotations = Object.fromEntries(
-        annotationBuckets.map(bucket => [bucket.value, false])
+        Object.keys(annotationBuckets.config).map(bucket => [bucket, false])
       )
     }
     checkboxes = <div style={{"fontStyle": "italic"}}>

diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx b/parlai/crowdsourcing/tasks/model_chat/frontend/components/onboarding_components.jsx
@@ -127,7 +127,7 @@ function OnboardingComponent({ onboardingData, annotationBuckets, annotationQues
     } else {
         const [currentTurnAnnotations, setCurrentAnnotations] = React.useState(
             Array.from(Array(onboardingData.dialog.length), () => Object.fromEntries(
-                annotationBuckets.map(bucket => [bucket.value, false]))
+                Object.keys(annotationBuckets.config).map(bucket => [bucket, false]))
             )
         );
         return (

diff --git a/parlai/crowdsourcing/tasks/model_chat/frontend/main.js b/parlai/crowdsourcing/tasks/model_chat/frontend/main.js
@@ -30,7 +30,7 @@ function MainApp() {
       )}
       renderSidePane={({ mephistoContext: { taskConfig }, appContext: { taskContext } }) => (
         <DefaultTaskDescription
-          chatTitle={taskConfig.chat_title}
+          chatTitle={taskConfig.task_title}
           taskDescriptionHtml={taskConfig.left_pane_text}
         >
           {(taskContext.hasOwnProperty('image_src') && taskContext['image_src']) ? (

diff --git a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example.yaml
@@ -22,13 +22,6 @@ mephisto:
     assignment_duration_in_seconds: 600
     max_num_concurrent_units: 0  # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
     maximum_units_per_worker: 3
-    task_description: |
-      <b><h4>Task Description</h4></b>
-      <br>
-      Dummy Task Description.
-
-      Lorem ipsum.
-      <br><br>
     task_name: model_chat
     task_reward: 3
     task_tags: "chat,conversation,dialog,partner"

diff --git a/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml b/parlai/crowdsourcing/tasks/model_chat/hydra_configs/conf/example_image_chat.yaml
@@ -18,13 +18,6 @@ mephisto:
     assignment_duration_in_seconds: 600
     max_num_concurrent_units: 0  # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
     maximum_units_per_worker: 3
-    task_description: |
-      <b><h4>Task Description</h4></b>
-      <br>
-      Dummy Task Description.
-
-      Lorem ipsum.
-      <br><br>
     task_name: model_image_chat
     task_reward: 3
     task_tags: "chat,conversation,dialog,partner,image"

diff --git a/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json b/parlai/crowdsourcing/tasks/model_chat/task_config/annotations_config.json
@@ -1,32 +1,28 @@
-[
-    {
-        "value": "bucket_0",
-        "name": "Bucket 0",
-        "description": "this response implies something...0"
-    },
-    {
-        "value": "bucket_1",
-        "name": "Bucket 1",
-        "description": "this response implies something...1"
-    },
-    {
-        "value": "bucket_2",
-        "name": "Bucket 2",
-        "description": "this response implies something...2"
-    },
-    {
-        "value": "bucket_3",
-        "name": "Bucket 3",
-        "description": "this response implies something...3"
-    },
-    {
-        "value": "bucket_4",
-        "name": "Bucket 4",
-        "description": "this response implies something...4"
-    },
-    {
-        "value": "none_all_good",
-        "name": "None, all good",
-        "description": "This response implies that there are no problems with the data"
+{
+    "config": {
+        "bucket_0": {
+            "name": "Bucket 0",
+            "description": "this response implies something...0"
+        },
+        "bucket_1": {
+            "name": "Bucket 1",
+            "description": "this response implies something...1"
+        },
+        "bucket_2": {
+            "name": "Bucket 2",
+            "description": "this response implies something...2"
+        },
+        "bucket_3": {
+            "name": "Bucket 3",
+            "description": "this response implies something...3"
+        },
+        "bucket_4": {
+            "name": "Bucket 4",
+            "description": "this response implies something...4"
+        },
+        "none_all_good": {
+            "name": "None, all good",
+            "description": "This response implies that there are no problems with the data"
+        }
     }
-]
+}
diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/README.md b/parlai/crowdsourcing/tasks/turn_annotations_static/README.md
@@ -11,7 +11,7 @@ Two variants of the blueprint are supported:
 
 For both variants of the blueprint, it is required to pass in your own file of conversations with `mephisto.blueprint.data_jsonl=${PATH_TO_CONVERSATIONS}`.
 
-See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotation_buckets` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation.
+See `turn_annotations_blueprint.py` for various parameters of this task, including passing in custom annotation bucket definitions using the `annotations_config_path` YAML flag, being able to group multiple conversations into one HIT using the `subtasks_per_unit` flag, passing in onboarding data with answers, and being able to ask only for the final utterance as an annotation.
 
 The validation of the response field is handled by `validateFreetextResponse` function in `task_components.jsx` and checks for a minimum number of characters, words, and vowels specified by function parameters. To change this, modify the values passed in to the function call or override the function to set your own validation requirements.
 

diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py b/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py
@@ -20,7 +20,7 @@ class TurnAnnotationsStaticResultsCompiler(AbstractTurnAnnotationResultsCompiler
     """
     Class to compile results from static turn annotations.
 
-    Change PROBLEM_BUCKETS in task_config/annotation_buckets.json to be the buckets that
+    Change PROBLEM_BUCKETS in task_config/annotations_config.json to be the buckets that
     you are asking crowdsource workers to annotate with.
     """
 

diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml b/parlai/crowdsourcing/tasks/turn_annotations_static/hydra_configs/conf/example.yaml
@@ -11,20 +11,14 @@ mephisto:
     extra_source_dir: ${task_dir}/webapp/src/static
     units_per_assignment: 5
     onboarding_qualification: turn_annotations_static
-    annotation_buckets: ${task_dir}/task_config/annotation_buckets.json
+    annotations_config_path: ${task_dir}/task_config/annotations_config.json
     response_field: False
+    task_description_file: ${task_dir}/task_config/task_description.html
   task:
     allowed_concurrent: 1
     assignment_duration_in_seconds: 600
     max_num_concurrent_units: 0  # 0 means infinite; set this to a positive integer to limit concurrent HITs and prevent crashes
     maximum_units_per_worker: 5
-    task_description: |
-      <b><h4>Task Description</h4></b>
-      <br>
-      Dummy Task Description.
-
-      Lorem ipsum.
-      <br><br>
     task_name: turn_annotations_static
     task_reward: 0.3
     task_tags: "chat,conversation,dialog,partner"

diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/run.py b/parlai/crowdsourcing/tasks/turn_annotations_static/run.py
@@ -12,10 +12,15 @@
 from mephisto.operations.hydra_config import register_script_config
 from omegaconf import DictConfig
 
+from parlai.crowdsourcing.tasks.turn_annotations_static.turn_annotations_blueprint import (
+    STATIC_BLUEPRINT_TYPE,
+)
 from parlai.crowdsourcing.tasks.turn_annotations_static.util import run_static_task
 from parlai.crowdsourcing.utils.mturk import MTurkRunScriptConfig
 
 
+_ = STATIC_BLUEPRINT_TYPE
+
 TASK_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
 
 # To run the task with your own config outside this folder (recommended!)

diff --git a/...tatic/task_config/annotation_buckets.json → ...tatic/task_config/annotations_config.json b/...tatic/task_config/annotation_buckets.json → ...tatic/task_config/annotations_config.json
diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html b/parlai/crowdsourcing/tasks/turn_annotations_static/task_config/task_description.html
@@ -0,0 +1,7 @@
+<br>
+<b><h4>Task Description</h4></b>
+<br>
+Dummy Task Description.
+Lorem ipsum.
+turnannotations
+<br><br>
diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py b/parlai/crowdsourcing/tasks/turn_annotations_static/turn_annotations_blueprint.py
@@ -84,10 +84,11 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs):
             "help": "Path to data and answers for onboarding task in JSON format"
         },
     )
-    annotation_buckets: Optional[str] = field(
-        default=None,
+    annotations_config_path: str = field(
+        default="",
         metadata={
-            "help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. If none provided, no checkboxes."
+            "help": "As per Turn Annotations task, path to annotation buckets which will be checkboxes in the frontend for worker to annotate an utterance. Set to "
+            " to disable checkboxes."
         },
     )
     response_field: bool = field(
@@ -96,6 +97,10 @@ class TurnAnnotationsStaticBlueprintArgs(StaticReactBlueprintArgs):
             "help": "If we want a freeform textbox input for the crowdworker to respond to the message."
         },
     )
+    task_description_file: str = field(
+        default=os.path.join(get_task_path(), 'task_config/task_description.html'),
+        metadata={"help": "Path to file of HTML to show on the task-description page"},
+    )
 
 
 @register_mephisto_abstraction()
@@ -184,18 +189,28 @@ def get_frontend_args(self) -> Dict[str, Any]:
         for use by the task's frontend.
         """
 
+        # Load task description from file
+        task_description = "<h1>" "You didn't specify a task_description_file" "</h1>"
+        if self.args.blueprint.get("task_description_file", None) is not None:
+            full_path = os.path.expanduser(self.args.blueprint.task_description_file)
+            assert os.path.exists(
+                full_path
+            ), f"Target task description path {full_path} doesn't exist"
+            with open(full_path, "r") as description_fp:
+                task_description = description_fp.read()
+
         with open(self.args.blueprint.onboarding_data, "r", encoding="utf-8-sig") as f:
             onboarding_data = json.loads(f.read())
 
         annotation_buckets = None
-        if self.args.blueprint.annotation_buckets:
+        if self.args.blueprint.get('annotations_config_path', ''):
             with open(
-                self.args.blueprint.annotation_buckets, "r", encoding="utf-8-sig"
+                self.args.blueprint.annotations_config_path, "r", encoding="utf-8-sig"
             ) as f:
                 annotation_buckets = json.loads(f.read())
 
         return {
-            "task_description": self.args.task.get('task_description', None),
+            "task_description": task_description,
             "task_title": self.args.task.get('task_title', None),
             "annotation_question": self.args.blueprint.annotation_question,
             "onboarding_data": onboarding_data,

diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx b/parlai/crowdsourcing/tasks/turn_annotations_static/webapp/src/components/checkboxes.jsx
@@ -44,7 +44,16 @@ function Checkboxes({ annotationBuckets, turnIdx, onUserInputUpdate, askReason }
     <div key={'checkboxes_' + turnIdx}>
       {
         Object.keys(annotationBuckets.config).map(c => (
-          <span key={'span_' + c + '_' + turnIdx}><input type={input_type} id={c + '_' + turnIdx} name={'checkbox_group_' + turnIdx} onChange={(evt) => handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)} /><span style={{ marginRight: '15px' }}>{annotationBuckets.config[c].name}</span>
+          <span key={'span_' + c + '_' + turnIdx}>
+            <input
+              type={input_type}
+              id={c + '_' + turnIdx}
+              name={'checkbox_group_' + turnIdx}
+              onChange={(evt) => handleCheckboxChange(evt, annotationBuckets, onUserInputUpdate)}
+            />
+            <span style={{ marginRight: '15px' }}>
+              {annotationBuckets.config[c].name}
+            </span>
           </span>
         ))
       }

diff --git a/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json b/tests/crowdsourcing/tasks/model_chat/expected_states/final_chat_data.json
@@ -132,7 +132,7 @@
         "1"
     ],
     "task_description": {
-        "annotations_config": "[\n    {\n        \"value\": \"bucket_0\",\n        \"name\": \"Bucket 0\",\n        \"description\": \"this response implies something...0\"\n    },\n    {\n        \"value\": \"bucket_1\",\n        \"name\": \"Bucket 1\",\n        \"description\": \"this response implies something...1\"\n    },\n    {\n        \"value\": \"bucket_2\",\n        \"name\": \"Bucket 2\",\n        \"description\": \"this response implies something...2\"\n    },\n    {\n        \"value\": \"bucket_3\",\n        \"name\": \"Bucket 3\",\n        \"description\": \"this response implies something...3\"\n    },\n    {\n        \"value\": \"bucket_4\",\n        \"name\": \"Bucket 4\",\n        \"description\": \"this response implies something...4\"\n    },\n    {\n        \"value\": \"none_all_good\",\n        \"name\": \"None, all good\",\n        \"description\": \"This response implies that there are no problems with the data\"\n    }\n]\n",
+        "annotations_config": "{\n    \"config\": {\n        \"bucket_0\": {\n            \"name\": \"Bucket 0\",\n            \"description\": \"this response implies something...0\"\n        },\n        \"bucket_1\": {\n            \"name\": \"Bucket 1\",\n            \"description\": \"this response implies something...1\"\n        },\n        \"bucket_2\": {\n            \"name\": \"Bucket 2\",\n            \"description\": \"this response implies something...2\"\n        },\n        \"bucket_3\": {\n            \"name\": \"Bucket 3\",\n            \"description\": \"this response implies something...3\"\n        },\n        \"bucket_4\": {\n            \"name\": \"Bucket 4\",\n            \"description\": \"this response implies something...4\"\n        },\n        \"none_all_good\": {\n            \"name\": \"None, all good\",\n            \"description\": \"This response implies that there are no problems with the data\"\n        }\n    }\n}\n",
         "model_nickname": "fixed_response",
         "model_file": "/private/home/ems/GitHub/facebookresearch/ParlAI_dev/data/models/fixed_response/model",
         "model_opt": {

diff --git a/tests/crowdsourcing/tasks/model_chat/expected_states/state.json b/tests/crowdsourcing/tasks/model_chat/expected_states/state.json
@@ -357,7 +357,7 @@
                     "person2_seed_utterance": null,
                     "personas": null,
                     "task_description": {
-                        "annotations_config": "[\n    {\n        \"value\": \"bucket_0\",\n        \"name\": \"Bucket 0\",\n        \"description\": \"this response implies something...0\"\n    },\n    {\n        \"value\": \"bucket_1\",\n        \"name\": \"Bucket 1\",\n        \"description\": \"this response implies something...1\"\n    },\n    {\n        \"value\": \"bucket_2\",\n        \"name\": \"Bucket 2\",\n        \"description\": \"this response implies something...2\"\n    },\n    {\n        \"value\": \"bucket_3\",\n        \"name\": \"Bucket 3\",\n        \"description\": \"this response implies something...3\"\n    },\n    {\n        \"value\": \"bucket_4\",\n        \"name\": \"Bucket 4\",\n        \"description\": \"this response implies something...4\"\n    },\n    {\n        \"value\": \"none_all_good\",\n        \"name\": \"None, all good\",\n        \"description\": \"This response implies that there are no problems with the data\"\n    }\n]\n",
+                        "annotations_config": "{\n    \"config\": {\n        \"bucket_0\": {\n            \"name\": \"Bucket 0\",\n            \"description\": \"this response implies something...0\"\n        },\n        \"bucket_1\": {\n            \"name\": \"Bucket 1\",\n            \"description\": \"this response implies something...1\"\n        },\n        \"bucket_2\": {\n            \"name\": \"Bucket 2\",\n            \"description\": \"this response implies something...2\"\n        },\n        \"bucket_3\": {\n            \"name\": \"Bucket 3\",\n            \"description\": \"this response implies something...3\"\n        },\n        \"bucket_4\": {\n            \"name\": \"Bucket 4\",\n            \"description\": \"this response implies something...4\"\n        },\n        \"none_all_good\": {\n            \"name\": \"None, all good\",\n            \"description\": \"This response implies that there are no problems with the data\"\n        }\n    }\n}\n",
                         "model_nickname": "fixed_response",
                         "model_opt": {
                             "init_opt": null,
@@ -515,7 +515,7 @@
                             "hit_ids": ["1"],
                             "assignment_ids": ["1"],
                             "task_description": {
-                                "annotations_config": "[\n    {\n        \"value\": \"bucket_0\",\n        \"name\": \"Bucket 0\",\n        \"description\": \"this response implies something...0\"\n    },\n    {\n        \"value\": \"bucket_1\",\n        \"name\": \"Bucket 1\",\n        \"description\": \"this response implies something...1\"\n    },\n    {\n        \"value\": \"bucket_2\",\n        \"name\": \"Bucket 2\",\n        \"description\": \"this response implies something...2\"\n    },\n    {\n        \"value\": \"bucket_3\",\n        \"name\": \"Bucket 3\",\n        \"description\": \"this response implies something...3\"\n    },\n    {\n        \"value\": \"bucket_4\",\n        \"name\": \"Bucket 4\",\n        \"description\": \"this response implies something...4\"\n    },\n    {\n        \"value\": \"none_all_good\",\n        \"name\": \"None, all good\",\n        \"description\": \"This response implies that there are no problems with the data\"\n    }\n]\n",
+                                "annotations_config": "{\n    \"config\": {\n        \"bucket_0\": {\n            \"name\": \"Bucket 0\",\n            \"description\": \"this response implies something...0\"\n        },\n        \"bucket_1\": {\n            \"name\": \"Bucket 1\",\n            \"description\": \"this response implies something...1\"\n        },\n        \"bucket_2\": {\n            \"name\": \"Bucket 2\",\n            \"description\": \"this response implies something...2\"\n        },\n        \"bucket_3\": {\n            \"name\": \"Bucket 3\",\n            \"description\": \"this response implies something...3\"\n        },\n        \"bucket_4\": {\n            \"name\": \"Bucket 4\",\n            \"description\": \"this response implies something...4\"\n        },\n        \"none_all_good\": {\n            \"name\": \"None, all good\",\n            \"description\": \"This response implies that there are no problems with the data\"\n        }\n    }\n}\n",
                                 "model_nickname": "fixed_response",
                                 "model_file": null,
                                 "model_opt": {