From ec32ce11cff6ee712e75ac20bbd1e47b5b21a2b3 Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Fri, 15 Oct 2021 16:56:13 -0400
Subject: [PATCH 1/2] remove custom datasets (story_cloze)

---
 .../story_cloze/dataset_infos.json            |   1 -
 .../story_cloze/story_cloze.py                | 127 ------------------
 promptsource/seqio_tasks/utils.py             |   7 -
 promptsource/utils.py                         |   7 -
 4 files changed, 142 deletions(-)
 delete mode 100644 promptsource/custom_datasets/story_cloze/dataset_infos.json
 delete mode 100644 promptsource/custom_datasets/story_cloze/story_cloze.py

diff --git a/promptsource/custom_datasets/story_cloze/dataset_infos.json b/promptsource/custom_datasets/story_cloze/dataset_infos.json
deleted file mode 100644
index f95933711..000000000
--- a/promptsource/custom_datasets/story_cloze/dataset_infos.json
+++ /dev/null
@@ -1 +0,0 @@
-{"2016": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2016", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"validation": {"name": "validation", "num_bytes": 614084, "num_examples": 1871, "dataset_name": "story_cloze"}, "test": {"name": "test", "num_bytes": 613184, "num_examples": 1871, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1227268, "size_in_bytes": 1227268}, "2018": {"description": "", "citation": "", "homepage": "", "license": "", "features": {"story_id": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_1": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_2": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_3": {"dtype": "string", "id": null, "_type": "Value"}, "input_sentence_4": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz1": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_quiz2": {"dtype": "string", "id": null, "_type": "Value"}, "answer_right_ending": {"dtype": "int32", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "story_cloze", "config_name": "2018", "version": "0.0.0", "splits": {"validation": {"name": "validation", "num_bytes": 515439, "num_examples": 1571, "dataset_name": "story_cloze"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 515439, "size_in_bytes": 515439}}
\ No newline at end of file
diff --git a/promptsource/custom_datasets/story_cloze/story_cloze.py b/promptsource/custom_datasets/story_cloze/story_cloze.py
deleted file mode 100644
index 68475dafe..000000000
--- a/promptsource/custom_datasets/story_cloze/story_cloze.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# coding=utf-8
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Story Cloze datasets."""
-
-
-import csv
-import os
-
-import datasets
-
-
-_DESCRIPTION = """
-Story Cloze Test' is a commonsense reasoning framework for evaluating story understanding,
-story generation, and script learning.This test requires a system to choose the correct ending
-to a four-sentence story.
-"""
-
-_CITATION = """\
-@inproceedings{mostafazadeh2017lsdsem,
-  title={Lsdsem 2017 shared task: The story cloze test},
-  author={Mostafazadeh, Nasrin and Roth, Michael and Louis, Annie and Chambers, Nathanael and Allen, James},
-  booktitle={Proceedings of the 2nd Workshop on Linking Models of Lexical, Sentential and Discourse-level Semantics},
-  pages={46--51},
-  year={2017}
-}
-"""
-
-
-class StoryCloze(datasets.GeneratorBasedBuilder):
-    """."""
-
-    BUILDER_CONFIGS = [
-        datasets.BuilderConfig(name="2016", description="Story Cloze Test Spring 2016 set"),
-        datasets.BuilderConfig(name="2018", description="Story Cloze Test Winter 2018 set"),
-    ]
-
-    @property
-    def manual_download_instructions(self):
-        return (
-            "To use Sotry Cloze you have to download it manually. Please fill this "
-            "google form (http://goo.gl/forms/aQz39sdDrO). complete the form. "
-            "Then you will recieve a a download link for the dataset. Load it using : "
-            "`datasets.load_dataset('story_cloze', data_dir='path/to/folder/folder_name')`"
-        )
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "story_id": datasets.Value("string"),
-                    "input_sentence_1": datasets.Value("string"),
-                    "input_sentence_2": datasets.Value("string"),
-                    "input_sentence_3": datasets.Value("string"),
-                    "input_sentence_4": datasets.Value("string"),
-                    "sentence_quiz1": datasets.Value("string"),
-                    "sentence_quiz2": datasets.Value("string"),
-                    "answer_right_ending": datasets.Value("int32"),
-                }
-            ),
-            homepage="https://cs.rochester.edu/nlp/rocstories/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        path_to_manual_folder = os.path.abspath(os.path.expanduser(dl_manager.manual_dir))
-        if self.config.name == "2016":
-            test_file = os.path.join(path_to_manual_folder, "cloze_test_test__spring2016 - cloze_test_ALL_test.csv")
-            val_file = os.path.join(path_to_manual_folder, "cloze_test_val__spring2016 - cloze_test_ALL_val.csv")
-            return [
-                datasets.SplitGenerator(
-                    name=datasets.Split.VALIDATION,
-                    gen_kwargs={
-                        "filepath": val_file,
-                    },
-                ),
-                datasets.SplitGenerator(
-                    name=datasets.Split.TEST,
-                    gen_kwargs={
-                        "filepath": test_file,
-                    },
-                ),
-            ]
-
-        else:
-            val_file = os.path.join(path_to_manual_folder, "cloze_test_val__winter2018-cloze_test_ALL_val - 1 - 1.csv")
-
-            return [
-                datasets.SplitGenerator(
-                    name=datasets.Split.VALIDATION,
-                    gen_kwargs={
-                        "filepath": val_file,
-                    },
-                ),
-            ]
-
-    def _generate_examples(self, filepath):
-        """Generate Eduge news examples."""
-        with open(filepath, encoding="utf-8") as csv_file:
-            csv_reader = csv.reader(
-                csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True
-            )
-            _ = next(csv_reader)
-            for id_, row in enumerate(csv_reader):
-                if row and len(row) == 8:
-                    yield row[0], {
-                        "story_id": row[0],
-                        "input_sentence_1": row[1],
-                        "input_sentence_2": row[2],
-                        "input_sentence_3": row[3],
-                        "input_sentence_4": row[4],
-                        "sentence_quiz1": row[5],
-                        "sentence_quiz2": row[6],
-                        "answer_right_ending": int(row[7]),
-                    }
diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index 350464bed..a97591cad 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -64,13 +64,6 @@ def filter_fn(ex):
 
 
 def get_dataset_splits(dataset_name, subset_name=None):
-    # `datasets.get_dataset_infos` pulls infos from hf/datasets's master.
-    # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
-    # This is a temporary fix to be able to do `import promptsource.seqio_tasks`
-    # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
-    # Also see `promptsource.utils.get_dataset_builder`
-    if dataset_name == "story_cloze":
-        dataset_name = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
     info = datasets.get_dataset_infos(dataset_name)
     subset_name = subset_name or list(info.keys())[0]
     return info[subset_name].splits
diff --git a/promptsource/utils.py b/promptsource/utils.py
index efd4e06f3..0101ef01a 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -34,13 +34,6 @@ def renameDatasetColumn(dataset):
 
 def get_dataset_builder(path, conf=None):
     "Get a dataset builder from name and conf."
-    # `datasets.load.prepare_module` pulls infos from hf/datasets's master.
-    # story_cloze hasn't been merged yet (https://github.com/huggingface/datasets/pull/2907)
-    # This is a temporary fix for the tests (more specifically test_templates.py)
-    # Once PR 2907 is merged, we can remove this if condition (along with the `custom_datasets` folder)
-    # Also see `promptsource.seqio_tasks.utils.get_dataset_splits`
-    if path == "story_cloze":
-        path = pkg_resources.resource_filename("promptsource", "custom_datasets/story_cloze")
     module_path = datasets.load.prepare_module(path, dataset=True)
     builder_cls = datasets.load.import_main_class(module_path[0], dataset=True)
     if conf:

From a92ccbcb319f778ffbedd97635d5b85f15d48ece Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Fri, 15 Oct 2021 17:02:46 -0400
Subject: [PATCH 2/2] remove pkg

---
 promptsource/seqio_tasks/utils.py | 1 -
 promptsource/utils.py             | 1 -
 2 files changed, 2 deletions(-)

diff --git a/promptsource/seqio_tasks/utils.py b/promptsource/seqio_tasks/utils.py
index a97591cad..1b4df95aa 100644
--- a/promptsource/seqio_tasks/utils.py
+++ b/promptsource/seqio_tasks/utils.py
@@ -1,7 +1,6 @@
 import re
 
 import datasets
-import pkg_resources
 import tensorflow as tf
 
 import promptsource.utils
diff --git a/promptsource/utils.py b/promptsource/utils.py
index 0101ef01a..1ecf3a45b 100644
--- a/promptsource/utils.py
+++ b/promptsource/utils.py
@@ -1,7 +1,6 @@
 # coding=utf-8
 
 import datasets
-import pkg_resources
 import requests
 
 from promptsource.templates import INCLUDED_USERS