From 5cd7e6bfdab94a8783dbb1aa10bfbb796b6f2f70 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 14:36:03 -0400 Subject: [PATCH 01/15] add custom datasets (story_cloze) Co-authored-by: zaidalyafeai --- .../story_cloze/dataset_infos.json | 1 + .../story_cloze/story_cloze.py | 127 ++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 promptsource/custom_datasets/story_cloze/dataset_infos.json create mode 100644 promptsource/custom_datasets/story_cloze/story_cloze.py diff --git a/promptsource/custom_datasets/story_cloze/dataset_infos.json b/promptsource/custom_datasets/story_cloze/dataset_infos.json new file mode 100644 index 000000000..f95933711 --- /dev/null +++ b/promptsource/custom_datasets/story_cloze/dataset_infos.json @@ -0,0 +1 @@ +{"2016": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2016", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"validation": {"name": "validation", "num_bytes": 614084, "num_examples": 1871, "dataset_name": "story_cloze"}, "test": {"name": "test", "num_bytes": 613184, "num_examples": 1871, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1227268, "size_in_bytes": 1227268}, "2018": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2018", "version": "0.0.0", "splits": {"validation": {"name": "validation", "num_bytes": 515439, "num_examples": 1571, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 515439, "size_in_bytes": 515439}} \ No newline at end of file diff --git a/promptsource/custom_datasets/story_cloze/story_cloze.py b/promptsource/custom_datasets/story_cloze/story_cloze.py new file mode 100644 index 000000000..68475dafe --- /dev/null +++ b/promptsource/custom_datasets/story_cloze/story_cloze.py @@ -0,0 +1,127 @@ +# coding=utf-8 +# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Story Cloze datasets.""" + + +import csv +import os + +import datasets + + +_DESCRIPTION = """ +Story Cloze Test' is a commonsense reasoning framework for evaluating story understanding, +story generation, and script learning.This test requires a system to choose the correct ending +to a four-sentence story. +""" + +_CITATION = """\ +@inproceedings{mostafazadeh2017lsdsem, + title={Lsdsem 2017 shared task: The story cloze test}, + author={Mostafazadeh, Nasrin and Roth, Michael and Louis, Annie and Chambers, Nathanael and Allen, James}, + booktitle={Proceedings of the 2nd Workshop on Linking Models of Lexical, Sentential and Discourse-level Semantics}, + pages={46--51}, + year={2017} +} +""" + + +class StoryCloze(datasets.GeneratorBasedBuilder): + """.""" + + BUILDER_CONFIGS = [ + datasets.BuilderConfig(name="2016", description="Story Cloze Test Spring 2016 set"), + datasets.BuilderConfig(name="2018", description="Story Cloze Test Winter 2018 set"), + ] + + @property + def manual_download_instructions(self): + return ( + "To use Sotry Cloze you have to download it manually. Please fill this " + "google form (http://goo.gl/forms/aQz39sdDrO). complete the form. " + "Then you will recieve a a download link for the dataset. Load it using : " + "`datasets.load_dataset('story_cloze', data_dir='path/to/folder/folder_name')`" + ) + + def _info(self): + return datasets.DatasetInfo( + description=_DESCRIPTION, + features=datasets.Features( + { + "story_id": datasets.Value("string"), + "input_sentence_1": datasets.Value("string"), + "input_sentence_2": datasets.Value("string"), + "input_sentence_3": datasets.Value("string"), + "input_sentence_4": datasets.Value("string"), + "sentence_quiz1": datasets.Value("string"), + "sentence_quiz2": datasets.Value("string"), + "answer_right_ending": datasets.Value("int32"), + } + ), + homepage="https://cs.rochester.edu/nlp/rocstories/", + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + path_to_manual_folder = os.path.abspath(os.path.expanduser(dl_manager.manual_dir)) + if self.config.name == "2016": + test_file = os.path.join(path_to_manual_folder, "cloze_test_test__spring2016 - cloze_test_ALL_test.csv") + val_file = os.path.join(path_to_manual_folder, "cloze_test_val__spring2016 - cloze_test_ALL_val.csv") + return [ + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + gen_kwargs={ + "filepath": val_file, + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TEST, + gen_kwargs={ + "filepath": test_file, + }, + ), + ] + + else: + val_file = os.path.join(path_to_manual_folder, "cloze_test_val__winter2018-cloze_test_ALL_val - 1 - 1.csv") + + return [ + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + gen_kwargs={ + "filepath": val_file, + }, + ), + ] + + def _generate_examples(self, filepath): + """Generate Eduge news examples.""" + with open(filepath, encoding="utf-8") as csv_file: + csv_reader = csv.reader( + csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True + ) + _ = next(csv_reader) + for id_, row in enumerate(csv_reader): + if row and len(row) == 8: + yield row[0], { + "story_id": row[0], + "input_sentence_1": row[1], + "input_sentence_2": row[2], + "input_sentence_3": row[3], + "input_sentence_4": row[4], + "sentence_quiz1": row[5], + "sentence_quiz2": row[6], + "answer_right_ending": int(row[7]), + } From ff0e3d2fd0f2526187a7a7a705418e9503a6c28b Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 14:36:37 -0400 Subject: [PATCH 02/15] add story_cloze template Co-authored-by: zaidalyafeai --- .../templates/story_cloze/2016/templates.yaml | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 promptsource/templates/story_cloze/2016/templates.yaml diff --git a/promptsource/templates/story_cloze/2016/templates.yaml b/promptsource/templates/story_cloze/2016/templates.yaml new file mode 100644 index 000000000..f0fea9629 --- /dev/null +++ b/promptsource/templates/story_cloze/2016/templates.yaml @@ -0,0 +1,112 @@ +dataset: story_cloze +subset: '2016' +templates: + 1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: 1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8 + jinja: '{{input_sentence_1}} + {{input_sentence_2}} + {{input_sentence_3}} + {{input_sentence_4}} + What is a possible continuation for the story given the following options ? + - {{answer_choices | join("\n- ")}} + ||| + {{answer_choices[answer_right_ending -1]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + metrics: + - Accuracy + original_task: true + name: Answer Given options + reference: '' + 1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: 1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1 + jinja: "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n\ + {{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the\ + \ previous story from the following options: \n- {{answer_choices | join(\"\\\ + n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}" + metadata: !TemplateMetadata + choices_in_prompt: true + metrics: + - Accuracy + original_task: true + name: Choose Story Ending + reference: '' + 9dab69d1-cad0-4d2f-a7cc-120df233571c: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: 9dab69d1-cad0-4d2f-a7cc-120df233571c + jinja: 'Yesterday, I watched a movie. Here''s what happened: + {{input_sentence_1}} + {{input_sentence_2}} + {{input_sentence_3}} + {{input_sentence_4}} + What happens next? + - {{answer_choices | join("\n- ")}} + ||| + {{answer_choices[answer_right_ending -1]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + metrics: + - Accuracy + original_task: true + name: Movie What Happens Next + reference: '' + b5c8445f-2d3a-4691-bdd5-58956816702f: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: b5c8445f-2d3a-4691-bdd5-58956816702f + jinja: "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n\ + {{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from\ + \ the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending\ + \ -1]}}" + metadata: !TemplateMetadata + choices_in_prompt: true + metrics: + - Accuracy + original_task: true + name: Story Continuation and Options + reference: '' + baffa716-43cf-4954-a35c-655d775321e6: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: baffa716-43cf-4954-a35c-655d775321e6 + jinja: 'Generate a possible ending for the following story: + {{input_sentence_1}} + {{input_sentence_2}} + {{input_sentence_3}} + {{input_sentence_4}} + ||| + {{answer_choices[answer_right_ending -1]}}' + metadata: !TemplateMetadata + choices_in_prompt: false + metrics: + - BLEU + - ROUGE + original_task: false + name: Generate Ending + reference: '' + c6f3d802-4f97-449f-a911-03470d418f7d: !Template + answer_choices: null + answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}' + id: c6f3d802-4f97-449f-a911-03470d418f7d + jinja: 'I read the following novel: + {{input_sentence_1}} + {{input_sentence_2}} + {{input_sentence_3}} + {{input_sentence_4}} + What do you think is the most probable ending? You can choose from the following + options: + - {{answer_choices | join("\n- ")}} + ||| + {{answer_choices[answer_right_ending -1]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + metrics: + - Accuracy + original_task: true + name: Novel Correct Ending + reference: '' From 028a1e1e391f051c7cdd1b23e0c31a40310c60bb Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 14:36:57 -0400 Subject: [PATCH 03/15] modify `get_dataset_splits` Co-authored-by: zaidalyafeai --- promptsource/seqio_tasks/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index 8feed3439..d5c83baa1 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -1,4 +1,6 @@ +import os import re +from pathlib import Path import datasets import tensorflow as tf @@ -60,6 +62,12 @@ def filter_fn(ex): def get_dataset_splits(dataset_name, subset_name=None): + # `datasets.get_dataset_infos` pulls infos from hf/datasets's master. + # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907) + # This is a temporary fix to be able to do `import promptsource.seqio_tasks` + # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder) + if dataset_name == "story_cloze": + dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/") info = datasets.get_dataset_infos(dataset_name) subset_name = subset_name or list(info.keys())[0] return info[subset_name].splits From 920a9bd4d9e4bf634e5747c6e8009326018c29f1 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 14:40:02 -0400 Subject: [PATCH 04/15] add gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index b6467cec0..0df936d26 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,6 @@ dmypy.json # Pyre type checker .pyre/ + +# Locked files +*.lock \ No newline at end of file From f87d2475226080ee8dc40562e48622ea5bf27685 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 15:16:25 -0400 Subject: [PATCH 05/15] fix tests Co-authored-by: zaidalyafeai --- promptsource/seqio_tasks/utils.py | 1 + promptsource/utils.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index d5c83baa1..a05db0c7a 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -66,6 +66,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907) # This is a temporary fix to be able to do `import promptsource.seqio_tasks` # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder) + # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/") info = datasets.get_dataset_infos(dataset_name) diff --git a/promptsource/utils.py b/promptsource/utils.py index 0d55591e9..ed5bd3bf3 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -1,5 +1,6 @@ # coding=utf-8 +import os import datasets import requests @@ -31,6 +32,13 @@ def renameDatasetColumn(dataset): def get_dataset_builder(path, conf=None): "Get a dataset builder from name and conf." + # `datasets.load.prepare_module` pulls infos from hf/datasets's master. + # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907) + # This is a temporary fix for the tests (more specifically test_templates.py) + # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder) + # Also see `promptsource.seqio_tasks.utils.get_dataset_splits` + if path == "story_cloze": + path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/") module_path = datasets.load.prepare_module(path, dataset=True) builder_cls = datasets.load.import_main_class(module_path[0], dataset=True) if conf: From caf3295159d1ef9c49cdce2640dc25d616e14d61 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 15:19:20 -0400 Subject: [PATCH 06/15] style and comment --- promptsource/seqio_tasks/utils.py | 2 +- promptsource/utils.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index a05db0c7a..d9425b6f3 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -65,7 +65,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # `datasets.get_dataset_infos` pulls infos from hf/datasets's master. # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907) # This is a temporary fix to be able to do `import promptsource.seqio_tasks` - # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder) + # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/") diff --git a/promptsource/utils.py b/promptsource/utils.py index ed5bd3bf3..0d1df1162 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -1,6 +1,7 @@ # coding=utf-8 import os + import datasets import requests @@ -35,7 +36,7 @@ def get_dataset_builder(path, conf=None): # `datasets.load.prepare_module` pulls infos from hf/datasets's master. # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907) # This is a temporary fix for the tests (more specifically test_templates.py) - # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder) + # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.seqio_tasks.utils.get_dataset_splits` if path == "story_cloze": path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/") From 5ac27dd3ad4d5311124c4454b421d34c295f65a0 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 15:34:19 -0400 Subject: [PATCH 07/15] accelerate test --- test/test_templates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_templates.py b/test/test_templates.py index 730fb37b0..b850d93fa 100644 --- a/test/test_templates.py +++ b/test/test_templates.py @@ -37,7 +37,8 @@ def test_uuids(): all_uuids[uuid] = (dataset_name, subset_name) -@pytest.mark.parametrize("dataset", template_collection.keys) +# @pytest.mark.parametrize("dataset", template_collection.keys) +@pytest.mark.parametrize("dataset", [("story_cloze", "2016")]) def test_dataset(dataset): """ Validates all the templates in the repository with simple syntactic checks: From c9db4e21e2a309d5b31ad4322acbf1eed7b519e2 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:00:14 -0400 Subject: [PATCH 08/15] fix --- promptsource/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/promptsource/utils.py b/promptsource/utils.py index 0d1df1162..ea4310c46 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -1,6 +1,7 @@ # coding=utf-8 import os +from pathlib import Path import datasets import requests @@ -39,7 +40,7 @@ def get_dataset_builder(path, conf=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.seqio_tasks.utils.get_dataset_splits` if path == "story_cloze": - path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/") + path = os.path.join(Path(__file__).parents[0], "custom_datasets/story_cloze/") module_path = datasets.load.prepare_module(path, dataset=True) builder_cls = datasets.load.import_main_class(module_path[0], dataset=True) if conf: From 48280b2d759a8f3fba51fa975517233faefd8cd3 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:12:08 -0400 Subject: [PATCH 09/15] include story_cloze into pkg_resources --- promptsource/seqio_tasks/utils.py | 5 ++--- promptsource/utils.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index d9425b6f3..ac92f5769 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -1,6 +1,5 @@ -import os +import pkg_resources import re -from pathlib import Path import datasets import tensorflow as tf @@ -68,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": - dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/") + dataset_name = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze") info = datasets.get_dataset_infos(dataset_name) subset_name = subset_name or list(info.keys())[0] return info[subset_name].splits diff --git a/promptsource/utils.py b/promptsource/utils.py index ea4310c46..4e6b60f84 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -1,7 +1,6 @@ # coding=utf-8 -import os -from pathlib import Path +import pkg_resources import datasets import requests @@ -40,7 +39,7 @@ def get_dataset_builder(path, conf=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.seqio_tasks.utils.get_dataset_splits` if path == "story_cloze": - path = os.path.join(Path(__file__).parents[0], "custom_datasets/story_cloze/") + path = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze") module_path = datasets.load.prepare_module(path, dataset=True) builder_cls = datasets.load.import_main_class(module_path[0], dataset=True) if conf: From f3d7640a6c0c6dfd81285f7f81a511fcaae83313 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:17:57 -0400 Subject: [PATCH 10/15] forgot the setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d6b0e2bcb..671488132 100644 --- a/setup.py +++ b/setup.py @@ -27,5 +27,6 @@ "templates/*/*/*.yaml", "seqio_tasks/experiment_D3.csv", # Experiment D3 "seqio_tasks/experiment_D4.csv", + "custom_datasets/*" ]} ) From 76fd1d4524cecc386d5a0bb72b9916f296bdc661 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:18:11 -0400 Subject: [PATCH 11/15] style --- promptsource/seqio_tasks/utils.py | 2 +- promptsource/utils.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index ac92f5769..da039b0f0 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -1,7 +1,7 @@ -import pkg_resources import re import datasets +import pkg_resources import tensorflow as tf import promptsource.utils diff --git a/promptsource/utils.py b/promptsource/utils.py index 4e6b60f84..d709c76dc 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -1,8 +1,7 @@ # coding=utf-8 -import pkg_resources - import datasets +import pkg_resources import requests From f91d8e369ca2d55196b0886b3e7eb178b6349e2d Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:41:14 -0400 Subject: [PATCH 12/15] f --- .gitignore | 2 +- promptsource/seqio_tasks/utils.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 0df936d26..9423edd27 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,4 @@ dmypy.json .pyre/ # Locked files -*.lock \ No newline at end of file +*.lock diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index da039b0f0..cee883c0d 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": - dataset_name = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze") + dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze") info = datasets.get_dataset_infos(dataset_name) subset_name = subset_name or list(info.keys())[0] return info[subset_name].splits diff --git a/setup.py b/setup.py index 671488132..640c7c83c 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,6 @@ "templates/*/*/*.yaml", "seqio_tasks/experiment_D3.csv", # Experiment D3 "seqio_tasks/experiment_D4.csv", - "custom_datasets/*" + "custom_datasets/*/*" ]} ) From 35e2e64b9e4d64844f2e6e9de717342514bfdef3 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 16:55:21 -0400 Subject: [PATCH 13/15] test --- promptsource/seqio_tasks/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index cee883c0d..1f08f0495 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": - dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze") + dataset_name = pkg_resources.resource_filename(__name__, "story_cloze") info = datasets.get_dataset_infos(dataset_name) subset_name = subset_name or list(info.keys())[0] return info[subset_name].splits From 9a7f8e3c79d1d2659a4da4048b5f06ec6337ce41 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 17:09:54 -0400 Subject: [PATCH 14/15] final --- promptsource/seqio_tasks/utils.py | 2 +- promptsource/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py index 1f08f0495..cee883c0d 100644 --- a/promptsource/seqio_tasks/utils.py +++ b/promptsource/seqio_tasks/utils.py @@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.utils.get_dataset_builder` if dataset_name == "story_cloze": - dataset_name = pkg_resources.resource_filename(__name__, "story_cloze") + dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze") info = datasets.get_dataset_infos(dataset_name) subset_name = subset_name or list(info.keys())[0] return info[subset_name].splits diff --git a/promptsource/utils.py b/promptsource/utils.py index d709c76dc..ab090dfc5 100644 --- a/promptsource/utils.py +++ b/promptsource/utils.py @@ -38,7 +38,7 @@ def get_dataset_builder(path, conf=None): # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder) # Also see `promptsource.seqio_tasks.utils.get_dataset_splits` if path == "story_cloze": - path = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze") + path = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze") module_path = datasets.load.prepare_module(path, dataset=True) builder_cls = datasets.load.import_main_class(module_path[0], dataset=True) if conf: From 2a92cd04496977bb2c49f19f2d173e721f412fdd Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 20 Sep 2021 17:17:15 -0400 Subject: [PATCH 15/15] re-activate all tests --- test/test_templates.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_templates.py b/test/test_templates.py index b850d93fa..730fb37b0 100644 --- a/test/test_templates.py +++ b/test/test_templates.py @@ -37,8 +37,7 @@ def test_uuids(): all_uuids[uuid] = (dataset_name, subset_name) -# @pytest.mark.parametrize("dataset", template_collection.keys) -@pytest.mark.parametrize("dataset", [("story_cloze", "2016")]) +@pytest.mark.parametrize("dataset", template_collection.keys) def test_dataset(dataset): """ Validates all the templates in the repository with simple syntactic checks: