From 5cd7e6bfdab94a8783dbb1aa10bfbb796b6f2f70 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 14:36:03 -0400
Subject: [PATCH 01/15] add custom datasets (story_cloze)

Co-authored-by: zaidalyafeai <alyafey22@gmail.com>
---
 .../story_cloze/dataset_infos.json            |   1 +
 .../story_cloze/story_cloze.py                | 127 ++++++++++++++++++
 2 files changed, 128 insertions(+)
 create mode 100644 promptsource/custom_datasets/story_cloze/dataset_infos.json
 create mode 100644 promptsource/custom_datasets/story_cloze/story_cloze.py

diff --git a/promptsource/custom_datasets/story_cloze/dataset_infos.json b/promptsource/custom_datasets/story_cloze/dataset_infos.json
new file mode 100644
index 000000000..f95933711
--- /dev/null
+++ b/promptsource/custom_datasets/story_cloze/dataset_infos.json
@@ -0,0 +1 @@
+{"2016": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2016", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"validation": {"name": "validation", "num_bytes": 614084, "num_examples": 1871, "dataset_name": "story_cloze"}, "test": {"name": "test", "num_bytes": 613184, "num_examples": 1871, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1227268, "size_in_bytes": 1227268}, "2018": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2018", "version": "0.0.0", "splits": {"validation": {"name": "validation", "num_bytes": 515439, "num_examples": 1571, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 515439, "size_in_bytes": 515439}}
\ No newline at end of file
diff --git a/promptsource/custom_datasets/story_cloze/story_cloze.py b/promptsource/custom_datasets/story_cloze/story_cloze.py
new file mode 100644
index 000000000..68475dafe
--- /dev/null
+++ b/promptsource/custom_datasets/story_cloze/story_cloze.py
@@ -0,0 +1,127 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Story Cloze datasets."""
+
+
+import csv
+import os
+
+import datasets
+
+
+_DESCRIPTION = """
+Story Cloze Test' is a commonsense reasoning framework for evaluating story understanding,
+story generation, and script learning.This test requires a system to choose the correct ending
+to a four-sentence story.
+"""
+
+_CITATION = """\
+@inproceedings{mostafazadeh2017lsdsem,
+  title={Lsdsem 2017 shared task: The story cloze test},
+  author={Mostafazadeh, Nasrin and Roth, Michael and Louis, Annie and Chambers, Nathanael and Allen, James},
+  booktitle={Proceedings of the 2nd Workshop on Linking Models of Lexical, Sentential and Discourse-level Semantics},
+  pages={46--51},
+  year={2017}
+}
+"""
+
+
+class StoryCloze(datasets.GeneratorBasedBuilder):
+    """."""
+
+    BUILDER_CONFIGS = [
+        datasets.BuilderConfig(name="2016", description="Story Cloze Test Spring 2016 set"),
+        datasets.BuilderConfig(name="2018", description="Story Cloze Test Winter 2018 set"),
+    ]
+
+    @property
+    def manual_download_instructions(self):
+        return (
+            "To use Sotry Cloze you have to download it manually. Please fill this "
+            "google form (http://goo.gl/forms/aQz39sdDrO). complete the form. "
+            "Then you will recieve a a download link for the dataset. Load it using : "
+            "`datasets.load_dataset('story_cloze', data_dir='path/to/folder/folder_name')`"
+        )
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "story_id": datasets.Value("string"),
+                    "input_sentence_1": datasets.Value("string"),
+                    "input_sentence_2": datasets.Value("string"),
+                    "input_sentence_3": datasets.Value("string"),
+                    "input_sentence_4": datasets.Value("string"),
+                    "sentence_quiz1": datasets.Value("string"),
+                    "sentence_quiz2": datasets.Value("string"),
+                    "answer_right_ending": datasets.Value("int32"),
+                }
+            ),
+            homepage="https://cs.rochester.edu/nlp/rocstories/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        path_to_manual_folder = os.path.abspath(os.path.expanduser(dl_manager.manual_dir))
+        if self.config.name == "2016":
+            test_file = os.path.join(path_to_manual_folder, "cloze_test_test__spring2016 - cloze_test_ALL_test.csv")
+            val_file = os.path.join(path_to_manual_folder, "cloze_test_val__spring2016 - cloze_test_ALL_val.csv")
+            return [
+                datasets.SplitGenerator(
+                    name=datasets.Split.VALIDATION,
+                    gen_kwargs={
+                        "filepath": val_file,
+                    },
+                ),
+                datasets.SplitGenerator(
+                    name=datasets.Split.TEST,
+                    gen_kwargs={
+                        "filepath": test_file,
+                    },
+                ),
+            ]
+
+        else:
+            val_file = os.path.join(path_to_manual_folder, "cloze_test_val__winter2018-cloze_test_ALL_val - 1 - 1.csv")
+
+            return [
+                datasets.SplitGenerator(
+                    name=datasets.Split.VALIDATION,
+                    gen_kwargs={
+                        "filepath": val_file,
+                    },
+                ),
+            ]
+
+    def _generate_examples(self, filepath):
+        """Generate Eduge news examples."""
+        with open(filepath, encoding="utf-8") as csv_file:
+            csv_reader = csv.reader(
+                csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True
+            )
+            _ = next(csv_reader)
+            for id_, row in enumerate(csv_reader):
+                if row and len(row) == 8:
+                    yield row[0], {
+                        "story_id": row[0],
+                        "input_sentence_1": row[1],
+                        "input_sentence_2": row[2],
+                        "input_sentence_3": row[3],
+                        "input_sentence_4": row[4],
+                        "sentence_quiz1": row[5],
+                        "sentence_quiz2": row[6],
+                        "answer_right_ending": int(row[7]),
+                    }

From ff0e3d2fd0f2526187a7a7a705418e9503a6c28b Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 14:36:37 -0400
Subject: [PATCH 02/15] add story_cloze template

Co-authored-by: zaidalyafeai <alyafey22@gmail.com>
---
 .../templates/story_cloze/2016/templates.yaml | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 promptsource/templates/story_cloze/2016/templates.yaml

diff --git a/promptsource/templates/story_cloze/2016/templates.yaml b/promptsource/templates/story_cloze/2016/templates.yaml
new file mode 100644
index 000000000..f0fea9629
--- /dev/null
+++ b/promptsource/templates/story_cloze/2016/templates.yaml
@@ -0,0 +1,112 @@
+dataset: story_cloze
+subset: '2016'
+templates:
+  1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: 1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8
+    jinja: '{{input_sentence_1}}
+      {{input_sentence_2}}
+      {{input_sentence_3}}
+      {{input_sentence_4}}
+      What is a possible continuation for the story given the following options ?
+      - {{answer_choices | join("\n- ")}}
+      |||
+      {{answer_choices[answer_right_ending -1]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Answer Given options
+    reference: ''
+  1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: 1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1
+    jinja: "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n\
+      {{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the\
+      \ previous story from the following options: \n- {{answer_choices | join(\"\\\
+      n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Choose Story Ending
+    reference: ''
+  9dab69d1-cad0-4d2f-a7cc-120df233571c: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: 9dab69d1-cad0-4d2f-a7cc-120df233571c
+    jinja: 'Yesterday, I watched a movie. Here''s what happened:
+      {{input_sentence_1}}
+      {{input_sentence_2}}
+      {{input_sentence_3}}
+      {{input_sentence_4}}
+      What happens next?
+      - {{answer_choices | join("\n- ")}}
+      |||
+      {{answer_choices[answer_right_ending -1]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Movie What Happens Next
+    reference: ''
+  b5c8445f-2d3a-4691-bdd5-58956816702f: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: b5c8445f-2d3a-4691-bdd5-58956816702f
+    jinja: "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n\
+      {{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from\
+      \ the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending\
+      \ -1]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Story Continuation and Options
+    reference: ''
+  baffa716-43cf-4954-a35c-655d775321e6: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: baffa716-43cf-4954-a35c-655d775321e6
+    jinja: 'Generate a possible ending for the following story:
+      {{input_sentence_1}}
+      {{input_sentence_2}}
+      {{input_sentence_3}}
+      {{input_sentence_4}}
+      |||
+      {{answer_choices[answer_right_ending -1]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: Generate Ending
+    reference: ''
+  c6f3d802-4f97-449f-a911-03470d418f7d: !Template
+    answer_choices: null
+    answer_choices_key: '{{sentence_quiz1}} ||| {{sentence_quiz2}}'
+    id: c6f3d802-4f97-449f-a911-03470d418f7d
+    jinja: 'I read the following novel:
+      {{input_sentence_1}}
+      {{input_sentence_2}}
+      {{input_sentence_3}}
+      {{input_sentence_4}}
+      What do you think is the most probable ending? You can choose from the following
+      options:
+      - {{answer_choices | join("\n- ")}}
+      |||
+      {{answer_choices[answer_right_ending -1]}}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: true
+    name: Novel Correct Ending
+    reference: ''

From 028a1e1e391f051c7cdd1b23e0c31a40310c60bb Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 14:36:57 -0400
Subject: [PATCH 03/15] modify `get_dataset_splits`

Co-authored-by: zaidalyafeai <alyafey22@gmail.com>
---
 promptsource/seqio_tasks/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index 8feed3439..d5c83baa1 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -1,4 +1,6 @@
+import os
 import re
+from pathlib import Path
 
 import datasets
 import tensorflow as tf
@@ -60,6 +62,12 @@ def filter_fn(ex):
 
 
 def get_dataset_splits(dataset_name, subset_name=None):
+    # `datasets.get_dataset_infos` pulls infos from hf/datasets's master.
+    # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
+    # This is a temporary fix to be able to do `import promptsource.seqio_tasks`
+    # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder)
+    if dataset_name == "story_cloze":
+        dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits

From 920a9bd4d9e4bf634e5747c6e8009326018c29f1 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 14:40:02 -0400
Subject: [PATCH 04/15] add gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index b6467cec0..0df936d26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,3 +133,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Locked files
+*.lock
\ No newline at end of file

From f87d2475226080ee8dc40562e48622ea5bf27685 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 15:16:25 -0400
Subject: [PATCH 05/15] fix tests

Co-authored-by: zaidalyafeai <alyafey22@gmail.com>
---
 promptsource/seqio_tasks/utils.py | 1 +
 promptsource/utils.py             | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index d5c83baa1..a05db0c7a 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -66,6 +66,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
     # This is a temporary fix to be able to do `import promptsource.seqio_tasks`
     # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder)
+    # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
         dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/")
     info = datasets.get_dataset_infos(dataset_name)
diff --git a/promptsource/utils.py b/promptsource/utils.py
index 0d55591e9..ed5bd3bf3 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,5 +1,6 @@
 # coding=utf-8
 
+import os
 import datasets
 import requests
 
@@ -31,6 +32,13 @@ def renameDatasetColumn(dataset):
 
 def get_dataset_builder(path, conf=None):
     "Get a dataset builder from name and conf."
+    # `datasets.load.prepare_module` pulls infos from hf/datasets's master.
+    # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
+    # This is a temporary fix for the tests (more specifically test_templates.py)
+    # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder)
+    # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
+    if path == "story_cloze":
+        path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/")
     module_path = datasets.load.prepare_module(path, dataset=True)
     builder_cls = datasets.load.import_main_class(module_path[0], dataset=True)
     if conf:

From caf3295159d1ef9c49cdce2640dc25d616e14d61 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 15:19:20 -0400
Subject: [PATCH 06/15] style and comment

---
 promptsource/seqio_tasks/utils.py | 2 +-
 promptsource/utils.py             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index a05db0c7a..d9425b6f3 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -65,7 +65,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # `datasets.get_dataset_infos` pulls infos from hf/datasets's master.
     # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
     # This is a temporary fix to be able to do `import promptsource.seqio_tasks`
-    # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder)
+    # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
         dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/")
diff --git a/promptsource/utils.py b/promptsource/utils.py
index ed5bd3bf3..0d1df1162 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,6 +1,7 @@
 # coding=utf-8
 
 import os
+
 import datasets
 import requests
 
@@ -35,7 +36,7 @@ def get_dataset_builder(path, conf=None):
     # `datasets.load.prepare_module` pulls infos from hf/datasets's master.
     # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
     # This is a temporary fix for the tests (more specifically test_templates.py)
-    # Once PR 2907 is merged, we can remove these lines (along with the `custom_datasets` folder)
+    # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
     if path == "story_cloze":
         path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/")

From 5ac27dd3ad4d5311124c4454b421d34c295f65a0 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 15:34:19 -0400
Subject: [PATCH 07/15] accelerate test

---
 test/test_templates.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_templates.py b/test/test_templates.py
index 730fb37b0..b850d93fa 100644
--- a/test/test_templates.py
+++ b/test/test_templates.py
@@ -37,7 +37,8 @@ def test_uuids():
             all_uuids[uuid] = (dataset_name, subset_name)
 
 
-@pytest.mark.parametrize("dataset", template_collection.keys)
+# @pytest.mark.parametrize("dataset", template_collection.keys)
+@pytest.mark.parametrize("dataset", [("story_cloze", "2016")])
 def test_dataset(dataset):
     """
     Validates all the templates in the repository with simple syntactic checks:

From c9db4e21e2a309d5b31ad4322acbf1eed7b519e2 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:00:14 -0400
Subject: [PATCH 08/15] fix

---
 promptsource/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/promptsource/utils.py b/promptsource/utils.py
index 0d1df1162..ea4310c46 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,6 +1,7 @@
 # coding=utf-8
 
 import os
+from pathlib import Path
 
 import datasets
 import requests
@@ -39,7 +40,7 @@ def get_dataset_builder(path, conf=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
     if path == "story_cloze":
-        path = os.path.join(os.path.dirname(__file__), "custom_datasets/story_cloze/")
+        path = os.path.join(Path(__file__).parents[0], "custom_datasets/story_cloze/")
     module_path = datasets.load.prepare_module(path, dataset=True)
     builder_cls = datasets.load.import_main_class(module_path[0], dataset=True)
     if conf:

From 48280b2d759a8f3fba51fa975517233faefd8cd3 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:12:08 -0400
Subject: [PATCH 09/15] include story_cloze into pkg_resources

---
 promptsource/seqio_tasks/utils.py | 5 ++---
 promptsource/utils.py             | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index d9425b6f3..ac92f5769 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -1,6 +1,5 @@
-import os
+import pkg_resources
 import re
-from pathlib import Path
 
 import datasets
 import tensorflow as tf
@@ -68,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
-        dataset_name = os.path.join(Path(__file__).parents[1], "custom_datasets/story_cloze/")
+        dataset_name = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits
diff --git a/promptsource/utils.py b/promptsource/utils.py
index ea4310c46..4e6b60f84 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,7 +1,6 @@
 # coding=utf-8
 
-import os
-from pathlib import Path
+import pkg_resources
 
 import datasets
 import requests
@@ -40,7 +39,7 @@ def get_dataset_builder(path, conf=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
     if path == "story_cloze":
-        path = os.path.join(Path(__file__).parents[0], "custom_datasets/story_cloze/")
+        path = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze")
     module_path = datasets.load.prepare_module(path, dataset=True)
     builder_cls = datasets.load.import_main_class(module_path[0], dataset=True)
     if conf:

From f3d7640a6c0c6dfd81285f7f81a511fcaae83313 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:17:57 -0400
Subject: [PATCH 10/15] forgot the setup.py

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index d6b0e2bcb..671488132 100644
--- a/setup.py
+++ b/setup.py
@@ -27,5 +27,6 @@
         "templates/*/*/*.yaml",
         "seqio_tasks/experiment_D3.csv",  # Experiment D3
         "seqio_tasks/experiment_D4.csv",
+        "custom_datasets/*"
     ]}
 )

From 76fd1d4524cecc386d5a0bb72b9916f296bdc661 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:18:11 -0400
Subject: [PATCH 11/15] style

---
 promptsource/seqio_tasks/utils.py | 2 +-
 promptsource/utils.py             | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index ac92f5769..da039b0f0 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -1,7 +1,7 @@
-import pkg_resources
 import re
 
 import datasets
+import pkg_resources
 import tensorflow as tf
 
 import promptsource.utils
diff --git a/promptsource/utils.py b/promptsource/utils.py
index 4e6b60f84..d709c76dc 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,8 +1,7 @@
 # coding=utf-8
 
-import pkg_resources
-
 import datasets
+import pkg_resources
 import requests
 
 

From f91d8e369ca2d55196b0886b3e7eb178b6349e2d Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:41:14 -0400
Subject: [PATCH 12/15] f

---
 .gitignore                        | 2 +-
 promptsource/seqio_tasks/utils.py | 2 +-
 setup.py                          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0df936d26..9423edd27 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,4 +135,4 @@ dmypy.json
 .pyre/
 
 # Locked files
-*.lock
\ No newline at end of file
+*.lock
diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index da039b0f0..cee883c0d 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
-        dataset_name = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze")
+        dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits
diff --git a/setup.py b/setup.py
index 671488132..640c7c83c 100644
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,6 @@
         "templates/*/*/*.yaml",
         "seqio_tasks/experiment_D3.csv",  # Experiment D3
         "seqio_tasks/experiment_D4.csv",
-        "custom_datasets/*"
+        "custom_datasets/*/*"
     ]}
 )

From 35e2e64b9e4d64844f2e6e9de717342514bfdef3 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 16:55:21 -0400
Subject: [PATCH 13/15] test

---
 promptsource/seqio_tasks/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index cee883c0d..1f08f0495 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
-        dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
+        dataset_name = pkg_resources.resource_filename(__name__, "story_cloze")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits

From 9a7f8e3c79d1d2659a4da4048b5f06ec6337ce41 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 17:09:54 -0400
Subject: [PATCH 14/15] final

---
 promptsource/seqio_tasks/utils.py | 2 +-
 promptsource/utils.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index 1f08f0495..cee883c0d 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -67,7 +67,7 @@ def get_dataset_splits(dataset_name, subset_name=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.utils.get_dataset_builder`
     if dataset_name == "story_cloze":
-        dataset_name = pkg_resources.resource_filename(__name__, "story_cloze")
+        dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits
diff --git a/promptsource/utils.py b/promptsource/utils.py
index d709c76dc..ab090dfc5 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -38,7 +38,7 @@ def get_dataset_builder(path, conf=None):
     # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
     # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
     if path == "story_cloze":
-        path = pkg_resources.resource_filename(__name__, "custom_datasets/story_cloze")
+        path = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
     module_path = datasets.load.prepare_module(path, dataset=True)
     builder_cls = datasets.load.import_main_class(module_path[0], dataset=True)
     if conf:

From 2a92cd04496977bb2c49f19f2d173e721f412fdd Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Mon, 20 Sep 2021 17:17:15 -0400
Subject: [PATCH 15/15] re-activate all tests

---
 test/test_templates.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_templates.py b/test/test_templates.py
index b850d93fa..730fb37b0 100644
--- a/test/test_templates.py
+++ b/test/test_templates.py
@@ -37,8 +37,7 @@ def test_uuids():
             all_uuids[uuid] = (dataset_name, subset_name)
 
 
-# @pytest.mark.parametrize("dataset", template_collection.keys)
-@pytest.mark.parametrize("dataset", [("story_cloze", "2016")])
+@pytest.mark.parametrize("dataset", template_collection.keys)
 def test_dataset(dataset):
     """
     Validates all the templates in the repository with simple syntactic checks: