diff --git a/promptsource/templates/app_reviews/templates.yaml b/promptsource/templates/app_reviews/templates.yaml index 91437804f..08cd8fdbb 100644 --- a/promptsource/templates/app_reviews/templates.yaml +++ b/promptsource/templates/app_reviews/templates.yaml @@ -30,8 +30,10 @@ templates: {{review}}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] - original_task: null + metrics: + - Accuracy + - Spearman Correlation + original_task: false name: generate_review reference: Generate a review from the rating. 9746ce4b-ac58-4dfb-9783-d77c95cb62cf: !Template diff --git a/promptsource/templates/dbpedia_14/templates.yaml b/promptsource/templates/dbpedia_14/templates.yaml index 92c85aa89..651b7eefd 100644 --- a/promptsource/templates/dbpedia_14/templates.yaml +++ b/promptsource/templates/dbpedia_14/templates.yaml @@ -30,7 +30,8 @@ templates: {{title}} - {{content}} ||| {{ answer_choices[label] }} metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: pick_one_category_for_the_following_text reference: '' @@ -45,7 +46,8 @@ templates: refers to which one? ||| {{ answer_choices[label] }}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: 'given_a_choice_of_categories ' reference: '' diff --git a/promptsource/templates/glue/ax/templates.yaml b/promptsource/templates/glue/ax/templates.yaml index 9caefc8c4..4712ae9d2 100644 --- a/promptsource/templates/glue/ax/templates.yaml +++ b/promptsource/templates/glue/ax/templates.yaml @@ -68,7 +68,8 @@ templates: {{answer_choices[label]}}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: does_this_imply reference: '' @@ -90,7 +91,8 @@ templates: {{answer_choices[label]}}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: does_this_support reference: '' diff --git a/promptsource/templates/jigsaw_unintended_bias/templates.yaml b/promptsource/templates/jigsaw_unintended_bias/templates.yaml index b1a2e9ec4..3a47e830f 100644 --- a/promptsource/templates/jigsaw_unintended_bias/templates.yaml +++ b/promptsource/templates/jigsaw_unintended_bias/templates.yaml @@ -131,7 +131,7 @@ templates: choices_in_prompt: null metrics: - Other - original_task: null + original_task: true name: threatening reference: '' 71508d0b-7fd9-4810-933c-c3c59f0c5329: !Template diff --git a/promptsource/templates/mdd/task2_recs/templates.yaml b/promptsource/templates/mdd/task2_recs/templates.yaml index 93d4c6acd..ffd7c3379 100644 --- a/promptsource/templates/mdd/task2_recs/templates.yaml +++ b/promptsource/templates/mdd/task2_recs/templates.yaml @@ -43,7 +43,8 @@ templates: {{dialogue_turns.utterance[1]}}' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - Other original_task: false name: recommend_movies_dialogue reference: Given the likes, recommend a movie as a dialogue diff --git a/promptsource/templates/scitail/snli_format/templates.yaml b/promptsource/templates/scitail/snli_format/templates.yaml index 7cffcc870..46649a81f 100644 --- a/promptsource/templates/scitail/snli_format/templates.yaml +++ b/promptsource/templates/scitail/snli_format/templates.yaml @@ -19,7 +19,8 @@ templates: {% endif %}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: Another Yes/No Entailment Framing reference: '' diff --git a/promptsource/templates/tydiqa/primary_task/templates.yaml b/promptsource/templates/tydiqa/primary_task/templates.yaml index d60cc2a86..e4cb527ad 100644 --- a/promptsource/templates/tydiqa/primary_task/templates.yaml +++ b/promptsource/templates/tydiqa/primary_task/templates.yaml @@ -150,7 +150,8 @@ templates: \ | capitalize}}\n {% endif %}\n{% endif %}" metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - Accuracy original_task: true name: based_on_the_text reference: Binary question without mentioning KB diff --git a/promptsource/templates/tydiqa/secondary_task/templates.yaml b/promptsource/templates/tydiqa/secondary_task/templates.yaml index bcdaf87a5..215499e23 100644 --- a/promptsource/templates/tydiqa/secondary_task/templates.yaml +++ b/promptsource/templates/tydiqa/secondary_task/templates.yaml @@ -51,7 +51,9 @@ templates: {% endif %}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - BLEU + - ROUGE original_task: false name: title_generation reference: Generate title from a passage diff --git a/promptsource/templates/wiki_bio/templates.yaml b/promptsource/templates/wiki_bio/templates.yaml index 149d2160f..355e63795 100644 --- a/promptsource/templates/wiki_bio/templates.yaml +++ b/promptsource/templates/wiki_bio/templates.yaml @@ -22,7 +22,10 @@ templates: {{target_text}}' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - BLEU + - ROUGE + - Other original_task: true name: who reference: '' @@ -40,7 +43,8 @@ templates: content\"][n] }}\n{% endif %}\n{% endfor %}\n" metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: comprehension reference: '' @@ -54,7 +58,8 @@ templates: _\",\" \") }} \n{% endif %}\n{% endfor %}" metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: what_content reference: '' @@ -81,7 +86,8 @@ templates: ' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: guess_person reference: '' @@ -106,7 +112,8 @@ templates: {% endfor %}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: key_content reference: '' diff --git a/promptsource/templates/wiki_hop/masked/templates.yaml b/promptsource/templates/wiki_hop/masked/templates.yaml index 9cb72cad4..ce6da0a71 100644 --- a/promptsource/templates/wiki_hop/masked/templates.yaml +++ b/promptsource/templates/wiki_hop/masked/templates.yaml @@ -2,7 +2,7 @@ dataset: wiki_hop subset: masked templates: 08f2d1cf-c026-4b65-96d0-a28ff91affb5: !Template - answer_choices: null + answer_choices: '{{candidates | join("|||")}}' id: 08f2d1cf-c026-4b65-96d0-a28ff91affb5 jinja: '{% set question_split = question.split('' '') %} @@ -52,7 +52,8 @@ templates: {% endif %}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Accuracy original_task: false name: Indirect Question about Birthplace / Citizenship / Place of Death reference: Ask about place of birth, citizenship, or place of death for the subject @@ -75,13 +76,14 @@ templates: {{ question_split[0] | replace("_", " ") }}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: Explain Relation reference: Given information, explain the relation between the subject entity and the object entity in a fact triple. 3181f711-a376-4d6e-9fca-a34e1d048585: !Template - answer_choices: null + answer_choices: '{{candidates | join("|||")}}' id: 3181f711-a376-4d6e-9fca-a34e1d048585 jinja: 'Information: @@ -99,12 +101,13 @@ templates: {{answer}}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Accuracy original_task: false name: Generate Object Answer reference: Given information, generate the best object entity for the fact triple. 639fa83f-14fd-457a-886e-a65334cb7e66: !Template - answer_choices: null + answer_choices: '{{candidates | join("|||")}}' id: 639fa83f-14fd-457a-886e-a65334cb7e66 jinja: "Information:\n- {{ supports | join(\"\\n- \") }}\n\n{% set question_split\ \ = question.split(' ') %}\nQuestion: ({{ question_split[1:] | join(\" \")}},\ @@ -112,7 +115,8 @@ templates: \ \n- {{ candidates | join(\"\\n- \") }}\n|||\n{{answer}}" metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Accuracy original_task: true name: Choose Best Object Candidate reference: Given information and possible object candidates, choose the best object @@ -136,7 +140,8 @@ templates: ") }}, {{answer}})' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: Generate Fact Triple reference: Given information, generate a fact triple. @@ -158,7 +163,8 @@ templates: {{ question_split[1:] | join(" ")}}' metadata: !TemplateMetadata choices_in_prompt: null - metrics: [] + metrics: + - Other original_task: false name: Generate Subject Answer reference: Given information, generate the best subject entity for the fact triple. diff --git a/promptsource/templates/wiqa/templates.yaml b/promptsource/templates/wiqa/templates.yaml index 6fb3f8e36..fbcf818b9 100644 --- a/promptsource/templates/wiqa/templates.yaml +++ b/promptsource/templates/wiqa/templates.yaml @@ -17,7 +17,9 @@ templates: ' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - BLEU + - ROUGE original_task: false name: what_might_be_the_first_step_of_the_process reference: '' @@ -43,7 +45,9 @@ templates: ' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - BLEU + - ROUGE original_task: false name: what_might_be_the_last_step_of_the_process reference: '' @@ -62,7 +66,9 @@ templates: {{ question_para_step | first }}' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - BLEU + - ROUGE original_task: false name: what_is_the_missing_first_step reference: '' @@ -85,12 +91,14 @@ templates: ' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - BLEU + - ROUGE original_task: false name: what_is_the_final_step_of_the_following_process reference: '' 5dfee2c2-9742-4003-8ab6-dfe0ce5a745b: !Template - answer_choices: null + answer_choices: '{{choices.text | join("|||")}}' id: 5dfee2c2-9742-4003-8ab6-dfe0ce5a745b jinja: 'Process: @@ -112,12 +120,13 @@ templates: {{answer_label|replace("_", " ")}}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: effect_with_string_answer reference: '' 667c291f-6a36-4334-aa49-804c9e72500b: !Template - answer_choices: null + answer_choices: 'indirectly impacting a step of the process ||| not impacting any step of the process' id: 667c291f-6a36-4334-aa49-804c9e72500b jinja: 'Process: @@ -147,12 +156,13 @@ templates: a step of the process"}[metadata_question_type]}}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: false name: which_of_the_following_is_the_supposed_perturbation reference: '' 6cf2b300-6790-4120-9592-9db63bec221b: !Template - answer_choices: null + answer_choices: 'A ||| B ||| C' id: 6cf2b300-6790-4120-9592-9db63bec221b jinja: 'Process: @@ -177,12 +187,13 @@ templates: {{answer_label_as_choice}}' metadata: !TemplateMetadata choices_in_prompt: true - metrics: [] + metrics: + - Accuracy original_task: true name: effect_with_label_answer reference: '' a17313bd-94bb-47ab-82bf-538df1b1ad5f: !Template - answer_choices: null + answer_choices: 'yes ||| no' id: a17313bd-94bb-47ab-82bf-538df1b1ad5f jinja: 'Process: @@ -205,7 +216,8 @@ templates: "yes"}[metadata_question_type]}}' metadata: !TemplateMetadata choices_in_prompt: false - metrics: [] + metrics: + - Accuracy original_task: false name: does_the_supposed_perturbation_have_an_effect reference: ''