From c2107c3e42af713fcee2a610931f55fc6d60ec39 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Mon, 26 Apr 2021 11:32:24 +0200 Subject: [PATCH 1/2] update in setup.py black>=21.4b0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 817c06189a9..bdebbb730a3 100644 --- a/setup.py +++ b/setup.py @@ -173,7 +173,7 @@ ) -QUALITY_REQUIRE = ["black", "flake8==3.7.9", "isort", "pyyaml>=5.3.1"] +QUALITY_REQUIRE = ["black>=21.4b0", "flake8==3.7.9", "isort", "pyyaml>=5.3.1"] EXTRAS_REQUIRE = { From 570d7d2aa46d9beb1a96c7569ba04e448410527d Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Mon, 26 Apr 2021 11:32:29 +0200 Subject: [PATCH 2/2] run black --- .../afrikaans_ner_corpus/afrikaans_ner_corpus.py | 2 +- datasets/air_dialogue/air_dialogue.py | 2 +- datasets/allegro_reviews/allegro_reviews.py | 2 +- datasets/amazon_polarity/amazon_polarity.py | 2 +- datasets/aqua_rat/aqua_rat.py | 2 +- datasets/aquamuse/aquamuse.py | 2 +- datasets/ar_cov19/ar_cov19.py | 2 +- .../arabic_billion_words/arabic_billion_words.py | 2 +- datasets/arabic_pos_dialect/arabic_pos_dialect.py | 4 ++-- datasets/arsentd_lev/arsentd_lev.py | 2 +- datasets/arxiv_dataset/arxiv_dataset.py | 2 +- datasets/aslg_pc12/aslg_pc12.py | 2 +- datasets/asset/asset.py | 2 +- datasets/assin/assin.py | 2 +- datasets/assin2/assin2.py | 2 +- datasets/atomic/atomic.py | 2 +- datasets/autshumato/autshumato.py | 4 ++-- datasets/banking77/banking77.py | 2 +- datasets/bbc_hindi_nli/bbc_hindi_nli.py | 2 +- datasets/bprec/bprec.py | 2 +- datasets/brwac/brwac.py | 2 +- datasets/bsd_ja_en/bsd_ja_en.py | 4 ++-- datasets/c3/c3.py | 4 ++-- datasets/caner/caner.py | 2 +- datasets/cbt/cbt.py | 2 +- .../ccaligned_multilingual.py | 2 +- datasets/cdsc/cdsc.py | 2 +- datasets/cdt/cdt.py | 2 +- datasets/clickbait_news_bg/clickbait_news_bg.py | 2 +- datasets/climate_fever/climate_fever.py | 2 +- datasets/coached_conv_pref/coached_conv_pref.py | 2 +- datasets/code_search_net/code_search_net.py | 2 +- datasets/common_voice/common_voice.py | 2 +- datasets/compguesswhat/compguesswhat.py | 2 +- datasets/conceptnet5/conceptnet5.py | 2 +- datasets/cord19/cord19.py | 2 +- datasets/cos_e/cos_e.py | 2 +- datasets/counter/counter.py | 2 +- .../covid_tweets_japanese/covid_tweets_japanese.py | 2 +- .../craigslist_bargains/craigslist_bargains.py | 2 +- datasets/crawl_domain/crawl_domain.py | 2 +- datasets/cryptonite/cryptonite.py | 2 +- datasets/cs_restaurants/cs_restaurants.py | 2 +- datasets/cuad/cuad.py | 2 +- datasets/curiosity_dialogs/curiosity_dialogs.py | 2 +- datasets/daily_dialog/daily_dialog.py | 2 +- datasets/dane/dane.py | 2 +- datasets/dbpedia_14/dbpedia_14.py | 2 +- datasets/deal_or_no_dialog/deal_or_no_dialog.py | 2 +- datasets/dengue_filipino/dengue_filipino.py | 2 +- datasets/dialog_re/dialog_re.py | 2 +- .../diplomacy_detection/diplomacy_detection.py | 4 ++-- datasets/discofuse/discofuse.py | 2 +- datasets/dutch_social/dutch_social.py | 2 +- datasets/dyk/dyk.py | 2 +- datasets/ecthr_cases/ecthr_cases.py | 2 +- datasets/ehealth_kd/ehealth_kd.py | 2 +- datasets/emo/emo.py | 4 ++-- datasets/enriched_web_nlg/enriched_web_nlg.py | 2 +- datasets/eth_py150_open/eth_py150_open.py | 2 +- datasets/eu_regulatory_ir/eu_regulatory_ir.py | 2 +- datasets/eurlex/eurlex.py | 2 +- datasets/europarl_bilingual/europarl_bilingual.py | 6 +++--- .../evidence_infer_treatment.py | 4 ++-- datasets/factckbr/factckbr.py | 2 +- datasets/fake_news_english/fake_news_english.py | 2 +- datasets/fake_news_filipino/fake_news_filipino.py | 2 +- datasets/few_rel/few_rel.py | 2 +- .../financial_phrasebank/financial_phrasebank.py | 2 +- datasets/flue/flue.py | 14 +++++++------- datasets/freebase_qa/freebase_qa.py | 2 +- datasets/gem/gem.py | 2 +- datasets/generics_kb/generics_kb.py | 4 ++-- datasets/glucose/glucose.py | 2 +- datasets/gnad10/gnad10.py | 2 +- .../google_wellformed_query.py | 2 +- .../guardian_authorship/guardian_authorship.py | 4 ++-- datasets/harem/harem.py | 4 ++-- datasets/has_part/has_part.py | 2 +- datasets/hate_offensive/hate_offensive.py | 4 ++-- .../hate_speech_filipino/hate_speech_filipino.py | 2 +- .../hate_speech_offensive/hate_speech_offensive.py | 2 +- datasets/hate_speech_pl/hate_speech_pl.py | 2 +- .../hate_speech_portuguese.py | 2 +- datasets/hda_nli_hindi/hda_nli_hindi.py | 2 +- datasets/head_qa/head_qa.py | 2 +- datasets/hind_encorp/hind_encorp.py | 2 +- datasets/hindi_discourse/hindi_discourse.py | 2 +- datasets/hkcancor/hkcancor.py | 2 +- datasets/humicroedit/humicroedit.py | 2 +- datasets/igbo_monolingual/igbo_monolingual.py | 2 +- datasets/igbo_ner/igbo_ner.py | 2 +- datasets/ilist/ilist.py | 2 +- datasets/imppres/imppres.py | 2 +- datasets/indonlu/indonlu.py | 2 +- datasets/irc_disentangle/irc_disentangle.py | 2 +- .../isixhosa_ner_corpus/isixhosa_ner_corpus.py | 2 +- datasets/isizulu_ner_corpus/isizulu_ner_corpus.py | 2 +- datasets/iwslt2017/iwslt2017.py | 4 ++-- datasets/jfleg/jfleg.py | 2 +- .../jigsaw_toxicity_pred/jigsaw_toxicity_pred.py | 2 +- datasets/kd_conv/kd_conv.py | 2 +- datasets/kinnews_kirnews/kinnews_kirnews.py | 2 +- datasets/lama/lama.py | 2 +- datasets/lambada/lambada.py | 2 +- datasets/lener_br/lener_br.py | 2 +- datasets/liar/liar.py | 2 +- datasets/liveqa/liveqa.py | 2 +- datasets/m_lama/m_lama.py | 2 +- datasets/mac_morpho/mac_morpho.py | 2 +- datasets/makhzan/makhzan.py | 2 +- datasets/mc_taco/mc_taco.py | 2 +- .../medical_questions_pairs.py | 2 +- datasets/meta_woz/meta_woz.py | 2 +- datasets/metooma/metooma.py | 2 +- datasets/mkb/mkb.py | 2 +- datasets/msr_sqa/msr_sqa.py | 2 +- datasets/multi_re_qa/multi_re_qa.py | 2 +- .../multi_x_science_sum/multi_x_science_sum.py | 2 +- datasets/mutual_friends/mutual_friends.py | 2 +- datasets/narrativeqa_manual/narrativeqa_manual.py | 2 +- datasets/natural_questions/natural_questions.py | 2 +- datasets/ncslgr/ncslgr.py | 2 +- datasets/nell/nell.py | 2 +- datasets/neural_code_search/neural_code_search.py | 2 +- datasets/newsph_nli/newsph_nli.py | 2 +- datasets/newspop/newspop.py | 2 +- datasets/newsqa/newsqa.py | 2 +- datasets/nkjp-ner/nkjp-ner.py | 2 +- datasets/nli_tr/nli_tr.py | 4 ++-- datasets/nq_open/nq_open.py | 2 +- datasets/numeric_fused_head/numeric_fused_head.py | 2 +- datasets/oclar/oclar.py | 2 +- datasets/offcombr/offcombr.py | 2 +- datasets/ohsumed/ohsumed.py | 2 +- datasets/ollie/ollie.py | 2 +- datasets/omp/omp.py | 2 +- datasets/openslr/openslr.py | 2 +- datasets/openwebtext/openwebtext.py | 2 +- datasets/opus100/opus100.py | 2 +- datasets/opus_dogc/opus_dogc.py | 2 +- datasets/opus_rf/opus_rf.py | 4 ++-- datasets/orange_sum/orange_sum.py | 2 +- datasets/para_pat/para_pat.py | 2 +- datasets/paws-x/paws-x.py | 2 +- datasets/paws/paws.py | 2 +- datasets/pec/pec.py | 4 ++-- datasets/peer_read/peer_read.py | 2 +- datasets/persian_ner/persian_ner.py | 2 +- datasets/pib/pib.py | 2 +- datasets/piqa/piqa.py | 2 +- datasets/polemo2/polemo2.py | 2 +- .../poleval2019_cyberbullying.py | 2 +- datasets/polsum/polsum.py | 2 +- datasets/proto_qa/proto_qa.py | 2 +- datasets/psc/psc.py | 2 +- datasets/ptb_text_only/ptb_text_only.py | 2 +- datasets/pubmed/pubmed.py | 2 +- datasets/py_ast/py_ast.py | 2 +- datasets/qa_srl/qa_srl.py | 2 +- datasets/quac/quac.py | 2 +- datasets/re_dial/re_dial.py | 2 +- datasets/reasoning_bg/reasoning_bg.py | 2 +- datasets/refresd/refresd.py | 2 +- datasets/ro_sent/ro_sent.py | 2 +- datasets/ronec/ronec.py | 2 +- datasets/ropes/ropes.py | 2 +- datasets/rotten_tomatoes/rotten_tomatoes.py | 2 +- datasets/s2orc/s2orc.py | 2 +- datasets/samsum/samsum.py | 2 +- datasets/sanskrit_classic/sanskrit_classic.py | 2 +- datasets/scitail/scitail.py | 2 +- datasets/scitldr/scitldr.py | 2 +- datasets/selqa/selqa.py | 4 ++-- .../sem_eval_2010_task_8/sem_eval_2010_task_8.py | 2 +- .../sem_eval_2014_task_1/sem_eval_2014_task_1.py | 2 +- .../sem_eval_2020_task_11/sem_eval_2020_task_11.py | 2 +- datasets/sent_comp/sent_comp.py | 2 +- datasets/senti_lex/senti_lex.py | 2 +- datasets/senti_ws/senti_ws.py | 2 +- datasets/sepedi_ner/sepedi_ner.py | 2 +- datasets/sesotho_ner_corpus/sesotho_ner_corpus.py | 2 +- .../setswana_ner_corpus/setswana_ner_corpus.py | 2 +- datasets/siswati_ner_corpus/siswati_ner_corpus.py | 2 +- datasets/smartdata/smartdata.py | 2 +- .../snow_simplified_japanese_corpus.py | 2 +- datasets/so_stacksample/so_stacksample.py | 2 +- .../sofc_materials_articles.py | 4 ++-- .../spanish_billion_words/spanish_billion_words.py | 2 +- datasets/stsb_multi_mt/stsb_multi_mt.py | 2 +- datasets/swag/swag.py | 2 +- datasets/swahili/swahili.py | 2 +- datasets/tapaco/tapaco.py | 2 +- datasets/ted_talks_iwslt/ted_talks_iwslt.py | 4 ++-- datasets/telugu_books/telugu_books.py | 2 +- datasets/telugu_news/telugu_news.py | 2 +- .../times_of_india_news_headlines.py | 2 +- datasets/tmu_gfm_dataset/tmu_gfm_dataset.py | 2 +- datasets/trec/trec.py | 2 +- datasets/tuple_ie/tuple_ie.py | 2 +- datasets/turk/turk.py | 2 +- datasets/turkish_ner/turkish_ner.py | 2 +- .../turkish_shrinked_ner/turkish_shrinked_ner.py | 2 +- datasets/tweet_eval/tweet_eval.py | 2 +- datasets/tweet_qa/tweet_qa.py | 2 +- datasets/tydiqa/tydiqa.py | 2 +- datasets/web_nlg/web_nlg.py | 2 +- datasets/wi_locness/wi_locness.py | 2 +- datasets/wiki_asp/wiki_asp.py | 2 +- datasets/wiki_bio/wiki_bio.py | 2 +- datasets/wiki_lingua/wiki_lingua.py | 2 +- datasets/wikicorpus/wikicorpus.py | 2 +- datasets/wino_bias/wino_bias.py | 2 +- datasets/winograd_wsc/winograd_wsc.py | 6 +++--- datasets/winogrande/winogrande.py | 2 +- datasets/wmt20_mlqe_task1/wmt20_mlqe_task1.py | 2 +- datasets/wmt20_mlqe_task2/wmt20_mlqe_task2.py | 2 +- datasets/wmt20_mlqe_task3/wmt20_mlqe_task3.py | 2 +- datasets/wrbsc/wrbsc.py | 2 +- datasets/xed_en_fi/xed_en_fi.py | 2 +- datasets/xquad/xquad.py | 2 +- datasets/xquad_r/xquad_r.py | 2 +- datasets/xsum_factuality/xsum_factuality.py | 2 +- datasets/yelp_review_full/yelp_review_full.py | 2 +- .../youtube_caption_corrections.py | 2 +- src/datasets/arrow_dataset.py | 12 ++++++------ src/datasets/arrow_writer.py | 2 +- src/datasets/dataset_dict.py | 2 +- src/datasets/metric.py | 6 +++--- src/datasets/utils/filelock.py | 6 +++--- src/datasets/utils/metadata.py | 2 +- 231 files changed, 268 insertions(+), 268 deletions(-) diff --git a/datasets/afrikaans_ner_corpus/afrikaans_ner_corpus.py b/datasets/afrikaans_ner_corpus/afrikaans_ner_corpus.py index 0a62238b66a..7bd246f6db8 100644 --- a/datasets/afrikaans_ner_corpus/afrikaans_ner_corpus.py +++ b/datasets/afrikaans_ner_corpus/afrikaans_ner_corpus.py @@ -59,7 +59,7 @@ def __init__(self, **kwargs): class AfrikaansNerCorpus(datasets.GeneratorBasedBuilder): - """ Afrikaans Ner dataset""" + """Afrikaans Ner dataset""" BUILDER_CONFIGS = [ AfrikaansNerCorpusConfig( diff --git a/datasets/air_dialogue/air_dialogue.py b/datasets/air_dialogue/air_dialogue.py index 31f5c237986..9a1425efa8a 100644 --- a/datasets/air_dialogue/air_dialogue.py +++ b/datasets/air_dialogue/air_dialogue.py @@ -213,7 +213,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/allegro_reviews/allegro_reviews.py b/datasets/allegro_reviews/allegro_reviews.py index 7a6941866e4..7420f570731 100644 --- a/datasets/allegro_reviews/allegro_reviews.py +++ b/datasets/allegro_reviews/allegro_reviews.py @@ -99,7 +99,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/amazon_polarity/amazon_polarity.py b/datasets/amazon_polarity/amazon_polarity.py index 300b2c5eadd..cff8b820639 100644 --- a/datasets/amazon_polarity/amazon_polarity.py +++ b/datasets/amazon_polarity/amazon_polarity.py @@ -113,7 +113,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.reader(f, delimiter=",", quoting=csv.QUOTE_ALL) diff --git a/datasets/aqua_rat/aqua_rat.py b/datasets/aqua_rat/aqua_rat.py index 1484cf1071e..3c3c02b12ae 100644 --- a/datasets/aqua_rat/aqua_rat.py +++ b/datasets/aqua_rat/aqua_rat.py @@ -118,7 +118,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): data = json.loads(row) diff --git a/datasets/aquamuse/aquamuse.py b/datasets/aquamuse/aquamuse.py index 59d0bfba19c..17302f36f0c 100644 --- a/datasets/aquamuse/aquamuse.py +++ b/datasets/aquamuse/aquamuse.py @@ -140,7 +140,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" filepath = [join(filepath, f) for f in listdir(filepath) if isfile(join(filepath, f))] filepath = sorted(filepath) raw_dataset = tf.data.TFRecordDataset(filepath) diff --git a/datasets/ar_cov19/ar_cov19.py b/datasets/ar_cov19/ar_cov19.py index d876cd49928..afa11f7bcae 100644 --- a/datasets/ar_cov19/ar_cov19.py +++ b/datasets/ar_cov19/ar_cov19.py @@ -124,7 +124,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"data_dir": data_dir})] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/arabic_billion_words/arabic_billion_words.py b/datasets/arabic_billion_words/arabic_billion_words.py index 77e5b2cdb64..5b0a671450f 100644 --- a/datasets/arabic_billion_words/arabic_billion_words.py +++ b/datasets/arabic_billion_words/arabic_billion_words.py @@ -144,7 +144,7 @@ def _clean_text(self, text): return text.replace("?", "") def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" current_multi_line = "" _idx = 0 data_tag = self.config.name diff --git a/datasets/arabic_pos_dialect/arabic_pos_dialect.py b/datasets/arabic_pos_dialect/arabic_pos_dialect.py index 14bb14cfb47..69b5d290a6c 100644 --- a/datasets/arabic_pos_dialect/arabic_pos_dialect.py +++ b/datasets/arabic_pos_dialect/arabic_pos_dialect.py @@ -45,7 +45,7 @@ class ArabicPosDialectConfig(datasets.BuilderConfig): - """ BuilderConfig for ArabicPosDialect""" + """BuilderConfig for ArabicPosDialect""" def __init__(self, dialect=None, **kwargs): """ @@ -112,7 +112,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples in the raw (text) form. """ + """Yields examples in the raw (text) form.""" with open(filepath, encoding="utf-8") as csv_file: reader = csv.DictReader(csv_file, delimiter="\t", quoting=csv.QUOTE_NONE) fold = -1 diff --git a/datasets/arsentd_lev/arsentd_lev.py b/datasets/arsentd_lev/arsentd_lev.py index 47dba268e5c..91c4e8eb2fc 100644 --- a/datasets/arsentd_lev/arsentd_lev.py +++ b/datasets/arsentd_lev/arsentd_lev.py @@ -40,7 +40,7 @@ class ArsentdLev(datasets.GeneratorBasedBuilder): - """"ArSenTD-Lev Dataset""" + """ "ArSenTD-Lev Dataset""" VERSION = datasets.Version("1.1.0") diff --git a/datasets/arxiv_dataset/arxiv_dataset.py b/datasets/arxiv_dataset/arxiv_dataset.py index 1d6cd785cfa..33e96dd944e 100644 --- a/datasets/arxiv_dataset/arxiv_dataset.py +++ b/datasets/arxiv_dataset/arxiv_dataset.py @@ -109,7 +109,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"path": path_to_manual_file})] def _generate_examples(self, path=None, title_set=None): - """ Yields examples. """ + """Yields examples.""" with open(path, encoding="utf8") as f: for i, entry in enumerate(f): data = dict(json.loads(entry)) diff --git a/datasets/aslg_pc12/aslg_pc12.py b/datasets/aslg_pc12/aslg_pc12.py index 9a647c9a4c4..8a5cec5721c 100644 --- a/datasets/aslg_pc12/aslg_pc12.py +++ b/datasets/aslg_pc12/aslg_pc12.py @@ -70,7 +70,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, gloss_path, text_path): - """ Yields examples. """ + """Yields examples.""" gloss_f = open(gloss_path, "r", encoding="utf-8") text_f = open(text_path, "r", encoding="utf-8") diff --git a/datasets/asset/asset.py b/datasets/asset/asset.py index 6fb9352803c..07087c12729 100644 --- a/datasets/asset/asset.py +++ b/datasets/asset/asset.py @@ -140,7 +140,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "simplification": files = [open(filepaths[f"asset.{split}.orig"], encoding="utf-8")] + [ open(filepaths[f"asset.{split}.simp.{i}"], encoding="utf-8") for i in range(10) diff --git a/datasets/assin/assin.py b/datasets/assin/assin.py index 7f3d9528b04..4b250dbbcac 100644 --- a/datasets/assin/assin.py +++ b/datasets/assin/assin.py @@ -151,7 +151,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" id_ = 0 diff --git a/datasets/assin2/assin2.py b/datasets/assin2/assin2.py index 956e7a2e48e..5bd400a832c 100644 --- a/datasets/assin2/assin2.py +++ b/datasets/assin2/assin2.py @@ -103,7 +103,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" id_ = 0 diff --git a/datasets/atomic/atomic.py b/datasets/atomic/atomic.py index 4015ef41639..004c0d86d70 100755 --- a/datasets/atomic/atomic.py +++ b/datasets/atomic/atomic.py @@ -116,7 +116,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the Atomic dataset. """ + """Yields examples from the Atomic dataset.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/autshumato/autshumato.py b/datasets/autshumato/autshumato.py index d2f69711dc0..3ec7a9940ba 100644 --- a/datasets/autshumato/autshumato.py +++ b/datasets/autshumato/autshumato.py @@ -39,7 +39,7 @@ class AutshumatoConfig(datasets.BuilderConfig): - """ BuilderConfig for NewDataset""" + """BuilderConfig for NewDataset""" def __init__(self, langs, zip_file, **kwargs): """ @@ -206,7 +206,7 @@ def _split_generators_translation(self, dl_manager): ] def _generate_examples(self, source_files, target_files, split): - """ Yields examples. """ + """Yields examples.""" if len(self.config.langs) == 2: return self._generate_examples_translation(source_files, target_files, split) elif len(self.config.langs) == 1: diff --git a/datasets/banking77/banking77.py b/datasets/banking77/banking77.py index 5e5151393ca..d85618c2506 100644 --- a/datasets/banking77/banking77.py +++ b/datasets/banking77/banking77.py @@ -159,7 +159,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" with open(filepath, encoding="utf-8") as f: csv_reader = csv.reader(f, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True) # call next to skip header diff --git a/datasets/bbc_hindi_nli/bbc_hindi_nli.py b/datasets/bbc_hindi_nli/bbc_hindi_nli.py index f642a143ada..d30151d3722 100644 --- a/datasets/bbc_hindi_nli/bbc_hindi_nli.py +++ b/datasets/bbc_hindi_nli/bbc_hindi_nli.py @@ -144,7 +144,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter="\t") diff --git a/datasets/bprec/bprec.py b/datasets/bprec/bprec.py index 0a4bbd8b385..f2f2fc8bde4 100644 --- a/datasets/bprec/bprec.py +++ b/datasets/bprec/bprec.py @@ -178,7 +178,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filedirs, split="tele"): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/brwac/brwac.py b/datasets/brwac/brwac.py index bd24d6c4468..b5cec9e779c 100644 --- a/datasets/brwac/brwac.py +++ b/datasets/brwac/brwac.py @@ -97,7 +97,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: diff --git a/datasets/bsd_ja_en/bsd_ja_en.py b/datasets/bsd_ja_en/bsd_ja_en.py index 0d0d5641789..4277a162fd8 100644 --- a/datasets/bsd_ja_en/bsd_ja_en.py +++ b/datasets/bsd_ja_en/bsd_ja_en.py @@ -78,7 +78,7 @@ class BsdJaEn(datasets.GeneratorBasedBuilder): - """Japanese-English Business Scene Dialogue (BSD) dataset. """ + """Japanese-English Business Scene Dialogue (BSD) dataset.""" VERSION = datasets.Version("1.0.0") @@ -131,7 +131,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = json.load(f) diff --git a/datasets/c3/c3.py b/datasets/c3/c3.py index 893011d6c9a..b3e3ee40900 100644 --- a/datasets/c3/c3.py +++ b/datasets/c3/c3.py @@ -39,7 +39,7 @@ class C3Config(datasets.BuilderConfig): - """ BuilderConfig for NewDataset""" + """BuilderConfig for NewDataset""" def __init__(self, type_, **kwargs): """ @@ -138,7 +138,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filename, split): - """ Yields examples. """ + """Yields examples.""" with open(filename, "r", encoding="utf-8") as sf: data = json.load(sf) for id_, (documents, questions, document_id) in enumerate(data): diff --git a/datasets/caner/caner.py b/datasets/caner/caner.py index d61e285928b..36a003283b5 100644 --- a/datasets/caner/caner.py +++ b/datasets/caner/caner.py @@ -112,7 +112,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: reader = csv.reader(csv_file, delimiter=",") diff --git a/datasets/cbt/cbt.py b/datasets/cbt/cbt.py index 80766533453..8d7114c1933 100644 --- a/datasets/cbt/cbt.py +++ b/datasets/cbt/cbt.py @@ -154,7 +154,7 @@ def _split_generators(self, dl_manager): def _generate_examples( self, filepath # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" # This method handles input defined in _split_generators to yield (key, example) tuples from the dataset. # The `key` is here for legacy reason (tfds) and is not important in itself. diff --git a/datasets/ccaligned_multilingual/ccaligned_multilingual.py b/datasets/ccaligned_multilingual/ccaligned_multilingual.py index 9941e30e81f..897b08ee217 100644 --- a/datasets/ccaligned_multilingual/ccaligned_multilingual.py +++ b/datasets/ccaligned_multilingual/ccaligned_multilingual.py @@ -189,7 +189,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, from_english=False): - """ Yields examples. """ + """Yields examples.""" lc = self.config.language_code reverse = lc in reverse_mapped_sentences with open(filepath, encoding="utf-8") as f: diff --git a/datasets/cdsc/cdsc.py b/datasets/cdsc/cdsc.py index 9e4d3743385..8fea6601f04 100644 --- a/datasets/cdsc/cdsc.py +++ b/datasets/cdsc/cdsc.py @@ -123,7 +123,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/cdt/cdt.py b/datasets/cdt/cdt.py index 6ea60959c27..b09a400db41 100644 --- a/datasets/cdt/cdt.py +++ b/datasets/cdt/cdt.py @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/clickbait_news_bg/clickbait_news_bg.py b/datasets/clickbait_news_bg/clickbait_news_bg.py index 2c61ac5b466..3058cb9407e 100644 --- a/datasets/clickbait_news_bg/clickbait_news_bg.py +++ b/datasets/clickbait_news_bg/clickbait_news_bg.py @@ -96,7 +96,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" keys = [ "fake_news_score", "click_bait_score", diff --git a/datasets/climate_fever/climate_fever.py b/datasets/climate_fever/climate_fever.py index bbbf6d9c095..02e93a3436f 100644 --- a/datasets/climate_fever/climate_fever.py +++ b/datasets/climate_fever/climate_fever.py @@ -106,7 +106,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): doc = json.loads(row) diff --git a/datasets/coached_conv_pref/coached_conv_pref.py b/datasets/coached_conv_pref/coached_conv_pref.py index 9196fffcf3b..85bb5348c38 100644 --- a/datasets/coached_conv_pref/coached_conv_pref.py +++ b/datasets/coached_conv_pref/coached_conv_pref.py @@ -136,7 +136,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # Empty Segment list with annotations dictionary # First prompt of a conversation does not contain the segment dictionary diff --git a/datasets/code_search_net/code_search_net.py b/datasets/code_search_net/code_search_net.py index beeb950ad9b..f0f09bba65e 100644 --- a/datasets/code_search_net/code_search_net.py +++ b/datasets/code_search_net/code_search_net.py @@ -57,7 +57,7 @@ class CodeSearchNet(datasets.GeneratorBasedBuilder): - """"CodeSearchNet corpus: proxy dataset for semantic code search.""" + """ "CodeSearchNet corpus: proxy dataset for semantic code search.""" VERSION = datasets.Version("1.0.0", "Add CodeSearchNet corpus dataset") BUILDER_CONFIGS = [ diff --git a/datasets/common_voice/common_voice.py b/datasets/common_voice/common_voice.py index e483604d4d5..09a15ea2d48 100644 --- a/datasets/common_voice/common_voice.py +++ b/datasets/common_voice/common_voice.py @@ -696,7 +696,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, path_to_clips): - """ Yields examples. """ + """Yields examples.""" data_fields = list(self._info().features.keys()) path_idx = data_fields.index("path") diff --git a/datasets/compguesswhat/compguesswhat.py b/datasets/compguesswhat/compguesswhat.py index 070a3861a99..e49b677e8fb 100644 --- a/datasets/compguesswhat/compguesswhat.py +++ b/datasets/compguesswhat/compguesswhat.py @@ -6,7 +6,7 @@ class CompguesswhatConfig(datasets.BuilderConfig): - """ BuilderConfig for CompGuessWhat?!""" + """BuilderConfig for CompGuessWhat?!""" def __init__(self, data_url, splits, gameplay_scenario, **kwargs): """ diff --git a/datasets/conceptnet5/conceptnet5.py b/datasets/conceptnet5/conceptnet5.py index 11234b12bd5..84a019b3170 100755 --- a/datasets/conceptnet5/conceptnet5.py +++ b/datasets/conceptnet5/conceptnet5.py @@ -147,7 +147,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the conceptnet5 graph if the config is 'conceptnet5', otherwise yields the sentences for omcs. """ + """Yields examples from the conceptnet5 graph if the config is 'conceptnet5', otherwise yields the sentences for omcs.""" with open(filepath, "rb") as f: for id_, row in enumerate(f): diff --git a/datasets/cord19/cord19.py b/datasets/cord19/cord19.py index 00a4c23459d..25963eaefbb 100644 --- a/datasets/cord19/cord19.py +++ b/datasets/cord19/cord19.py @@ -147,7 +147,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" metadata_filepath = filepath["metadata"] diff --git a/datasets/cos_e/cos_e.py b/datasets/cos_e/cos_e.py index ea566927083..2b92584c2ba 100644 --- a/datasets/cos_e/cos_e.py +++ b/datasets/cos_e/cos_e.py @@ -94,7 +94,7 @@ def _get_choices_and_answer(cqa): class CosEConfig(datasets.BuilderConfig): - """ BuilderConfig for CosE""" + """BuilderConfig for CosE""" def __init__(self, **kwargs): """ diff --git a/datasets/counter/counter.py b/datasets/counter/counter.py index d76e2818508..9cbd217cbdd 100644 --- a/datasets/counter/counter.py +++ b/datasets/counter/counter.py @@ -126,7 +126,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" def parse_file(file): tree = ET.parse(file) diff --git a/datasets/covid_tweets_japanese/covid_tweets_japanese.py b/datasets/covid_tweets_japanese/covid_tweets_japanese.py index 84369949da1..9d5e3981fa4 100644 --- a/datasets/covid_tweets_japanese/covid_tweets_japanese.py +++ b/datasets/covid_tweets_japanese/covid_tweets_japanese.py @@ -79,7 +79,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with bz2.open(filepath, "rt") as f: data = csv.reader(f) diff --git a/datasets/craigslist_bargains/craigslist_bargains.py b/datasets/craigslist_bargains/craigslist_bargains.py index 9513e26a709..a36cdc6409b 100644 --- a/datasets/craigslist_bargains/craigslist_bargains.py +++ b/datasets/craigslist_bargains/craigslist_bargains.py @@ -133,7 +133,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # Set default values for items when the information is missing # `items` is the description of the item advertised on craigslist diff --git a/datasets/crawl_domain/crawl_domain.py b/datasets/crawl_domain/crawl_domain.py index 2fefafec322..95dc84dc06c 100644 --- a/datasets/crawl_domain/crawl_domain.py +++ b/datasets/crawl_domain/crawl_domain.py @@ -97,7 +97,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): yield id_, { diff --git a/datasets/cryptonite/cryptonite.py b/datasets/cryptonite/cryptonite.py index e4a04f6ed51..ed9b30c832c 100644 --- a/datasets/cryptonite/cryptonite.py +++ b/datasets/cryptonite/cryptonite.py @@ -107,7 +107,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/cs_restaurants/cs_restaurants.py b/datasets/cs_restaurants/cs_restaurants.py index d9f8f38f945..88e79fbdedc 100644 --- a/datasets/cs_restaurants/cs_restaurants.py +++ b/datasets/cs_restaurants/cs_restaurants.py @@ -89,7 +89,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf8") as f: data = json.load(f) diff --git a/datasets/cuad/cuad.py b/datasets/cuad/cuad.py index 0c680568ca6..3ac74f70d98 100644 --- a/datasets/cuad/cuad.py +++ b/datasets/cuad/cuad.py @@ -104,7 +104,7 @@ def _split_generators(self, dl_manager): def _generate_examples( self, filepath, split # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" with open(filepath, encoding="utf-8") as f: cuad = json.load(f) diff --git a/datasets/curiosity_dialogs/curiosity_dialogs.py b/datasets/curiosity_dialogs/curiosity_dialogs.py index b9228a59897..4b20ec01a44 100644 --- a/datasets/curiosity_dialogs/curiosity_dialogs.py +++ b/datasets/curiosity_dialogs/curiosity_dialogs.py @@ -156,7 +156,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # Bool entries are converted to string entries because of PyArrow error with open(filepath, encoding="utf-8") as f: diff --git a/datasets/daily_dialog/daily_dialog.py b/datasets/daily_dialog/daily_dialog.py index 6892873c861..a7277bb4e78 100644 --- a/datasets/daily_dialog/daily_dialog.py +++ b/datasets/daily_dialog/daily_dialog.py @@ -129,7 +129,7 @@ def _split_generators(self, dl_manager: datasets.DownloadManager): ] def _generate_examples(self, file_path, act_path, emotion_path, split): - """ Yields examples. """ + """Yields examples.""" # Yields (key, example) tuples from the dataset with open(file_path, "r", encoding="utf-8") as f, open(act_path, "r", encoding="utf-8") as act, open( emotion_path, "r", encoding="utf-8" diff --git a/datasets/dane/dane.py b/datasets/dane/dane.py index df45622cf35..e66b814a838 100644 --- a/datasets/dane/dane.py +++ b/datasets/dane/dane.py @@ -209,7 +209,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: guid = 0 diff --git a/datasets/dbpedia_14/dbpedia_14.py b/datasets/dbpedia_14/dbpedia_14.py index b959bf63b79..fd3acce6534 100644 --- a/datasets/dbpedia_14/dbpedia_14.py +++ b/datasets/dbpedia_14/dbpedia_14.py @@ -133,7 +133,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.reader(f, delimiter=",", quoting=csv.QUOTE_NONNUMERIC) diff --git a/datasets/deal_or_no_dialog/deal_or_no_dialog.py b/datasets/deal_or_no_dialog/deal_or_no_dialog.py index 608ffbfcfb6..f55f1b138cc 100644 --- a/datasets/deal_or_no_dialog/deal_or_no_dialog.py +++ b/datasets/deal_or_no_dialog/deal_or_no_dialog.py @@ -129,7 +129,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split="train"): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "dialogues": with open(filepath, encoding="utf-8") as f: for idx, line in enumerate(f): diff --git a/datasets/dengue_filipino/dengue_filipino.py b/datasets/dengue_filipino/dengue_filipino.py index 86f521d0d14..974cc3cc477 100644 --- a/datasets/dengue_filipino/dengue_filipino.py +++ b/datasets/dengue_filipino/dengue_filipino.py @@ -102,7 +102,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True diff --git a/datasets/dialog_re/dialog_re.py b/datasets/dialog_re/dialog_re.py index 2a676b1ba7e..15d2ffe7d80 100644 --- a/datasets/dialog_re/dialog_re.py +++ b/datasets/dialog_re/dialog_re.py @@ -126,7 +126,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: dataset = json.load(f) diff --git a/datasets/diplomacy_detection/diplomacy_detection.py b/datasets/diplomacy_detection/diplomacy_detection.py index 0c798e9ce79..93157c2b2b1 100644 --- a/datasets/diplomacy_detection/diplomacy_detection.py +++ b/datasets/diplomacy_detection/diplomacy_detection.py @@ -115,7 +115,7 @@ class DiplomacyDetection(datasets.GeneratorBasedBuilder): - """Diplomacy: A Dataset for Deception Detection. """ + """Diplomacy: A Dataset for Deception Detection.""" VERSION = datasets.Version("1.1.0") @@ -162,7 +162,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): data = json.loads(row) diff --git a/datasets/discofuse/discofuse.py b/datasets/discofuse/discofuse.py index ffd5985b7b4..707b217fc4d 100644 --- a/datasets/discofuse/discofuse.py +++ b/datasets/discofuse/discofuse.py @@ -27,7 +27,7 @@ class DiscofuseConfig(datasets.BuilderConfig): - """ BuilderConfig for Discofuse""" + """BuilderConfig for Discofuse""" def __init__(self, data_url, balanced=False, **kwargs): """ diff --git a/datasets/dutch_social/dutch_social.py b/datasets/dutch_social/dutch_social.py index 4206f59bb2e..9fd31433a30 100644 --- a/datasets/dutch_social/dutch_social.py +++ b/datasets/dutch_social/dutch_social.py @@ -173,7 +173,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split, key=None): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/dyk/dyk.py b/datasets/dyk/dyk.py index eb095bcae36..377dccb53be 100644 --- a/datasets/dyk/dyk.py +++ b/datasets/dyk/dyk.py @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/ecthr_cases/ecthr_cases.py b/datasets/ecthr_cases/ecthr_cases.py index 0dee5468bd3..73677d8f4af 100644 --- a/datasets/ecthr_cases/ecthr_cases.py +++ b/datasets/ecthr_cases/ecthr_cases.py @@ -179,7 +179,7 @@ def _split_generators(self, dl_manager): def _generate_examples( self, filepath, split # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/ehealth_kd/ehealth_kd.py b/datasets/ehealth_kd/ehealth_kd.py index 65e3d1e0a36..90a45070a90 100644 --- a/datasets/ehealth_kd/ehealth_kd.py +++ b/datasets/ehealth_kd/ehealth_kd.py @@ -131,7 +131,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, txt_path, ann_path): - """ Yields examples. """ + """Yields examples.""" with open(txt_path, encoding="utf-8") as txt_file, open(ann_path, encoding="utf-8") as ann_file: _id = 0 entities = [] diff --git a/datasets/emo/emo.py b/datasets/emo/emo.py index 601ec21f500..38ec13d120b 100644 --- a/datasets/emo/emo.py +++ b/datasets/emo/emo.py @@ -56,7 +56,7 @@ def __init__(self, **kwargs): class Emo(datasets.GeneratorBasedBuilder): - """ SemEval-2019 Task 3: EmoContext Contextual Emotion Detection in Text. Version 1.0.0 """ + """SemEval-2019 Task 3: EmoContext Contextual Emotion Detection in Text. Version 1.0.0""" VERSION = datasets.Version("1.0.0") @@ -106,7 +106,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "rb") as f: data = json.load(f) for id_, text, label in zip(data["text"].keys(), data["text"].values(), data["Label"].values()): diff --git a/datasets/enriched_web_nlg/enriched_web_nlg.py b/datasets/enriched_web_nlg/enriched_web_nlg.py index 37aab8e4adb..d29091f68b9 100644 --- a/datasets/enriched_web_nlg/enriched_web_nlg.py +++ b/datasets/enriched_web_nlg/enriched_web_nlg.py @@ -226,7 +226,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filedirs): - """ Yields examples. """ + """Yields examples.""" id_ = 0 for xml_location in filedirs: diff --git a/datasets/eth_py150_open/eth_py150_open.py b/datasets/eth_py150_open/eth_py150_open.py index d94d4ace8b9..f580eff687c 100644 --- a/datasets/eth_py150_open/eth_py150_open.py +++ b/datasets/eth_py150_open/eth_py150_open.py @@ -124,7 +124,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/eu_regulatory_ir/eu_regulatory_ir.py b/datasets/eu_regulatory_ir/eu_regulatory_ir.py index bd4a57565eb..9e30f21809e 100644 --- a/datasets/eu_regulatory_ir/eu_regulatory_ir.py +++ b/datasets/eu_regulatory_ir/eu_regulatory_ir.py @@ -132,7 +132,7 @@ def _split_generators(self, dl_manager): def _generate_examples( self, filepath, split # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" # This method handles input defined in _split_generators to yield (key, example) tuples from the dataset. # The `key` is here for legacy reason (tfds) and is not important in itself. diff --git a/datasets/eurlex/eurlex.py b/datasets/eurlex/eurlex.py index 8b104c29fbc..17632453ff9 100644 --- a/datasets/eurlex/eurlex.py +++ b/datasets/eurlex/eurlex.py @@ -118,7 +118,7 @@ def _split_generators(self, dl_manager): def _generate_examples( self, filepath, split # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/europarl_bilingual/europarl_bilingual.py b/datasets/europarl_bilingual/europarl_bilingual.py index ccb4be5f963..8e838fce0c3 100644 --- a/datasets/europarl_bilingual/europarl_bilingual.py +++ b/datasets/europarl_bilingual/europarl_bilingual.py @@ -77,7 +77,7 @@ class EuroparlBilingualConfig(datasets.BuilderConfig): - """ Slightly custom config to require source and target languages. """ + """Slightly custom config to require source and target languages.""" def __init__(self, *args, lang1=None, lang2=None, **kwargs): super().__init__( @@ -96,7 +96,7 @@ def _is_valid(self): class EuroparlBilingual(datasets.GeneratorBasedBuilder): - """ Europarl contains aligned sentences in multiple west language pairs.""" + """Europarl contains aligned sentences in multiple west language pairs.""" VERSION = datasets.Version(_VERSION) @@ -107,7 +107,7 @@ class EuroparlBilingual(datasets.GeneratorBasedBuilder): ] def _info(self): - """ This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset. """ + """This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset.""" features = datasets.Features( { "translation": datasets.Translation(languages=(self.config.lang1, self.config.lang2)), diff --git a/datasets/evidence_infer_treatment/evidence_infer_treatment.py b/datasets/evidence_infer_treatment/evidence_infer_treatment.py index c68a4b55e25..3c12fecd26b 100644 --- a/datasets/evidence_infer_treatment/evidence_infer_treatment.py +++ b/datasets/evidence_infer_treatment/evidence_infer_treatment.py @@ -48,7 +48,7 @@ class EvidenceInferenceConfig(datasets.BuilderConfig): - """ BuilderConfig for NewDataset""" + """BuilderConfig for NewDataset""" def __init__(self, zip_file, **kwargs): """ @@ -219,7 +219,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, directory, items): - """ Yields examples. """ + """Yields examples.""" for id_, item in enumerate(items): pmcid = item["PMCID"] filename = os.path.join(directory, f"PMC{pmcid}.txt") diff --git a/datasets/factckbr/factckbr.py b/datasets/factckbr/factckbr.py index 9e16422dce6..259d49efa26 100644 --- a/datasets/factckbr/factckbr.py +++ b/datasets/factckbr/factckbr.py @@ -111,7 +111,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as tsv_file: reader = csv.reader(tsv_file, delimiter="\t") diff --git a/datasets/fake_news_english/fake_news_english.py b/datasets/fake_news_english/fake_news_english.py index e4734987aa3..8be2b3d099a 100644 --- a/datasets/fake_news_english/fake_news_english.py +++ b/datasets/fake_news_english/fake_news_english.py @@ -79,7 +79,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "rb") as f: f = pd.read_excel(f, engine="openpyxl") for id_, row in f.iterrows(): diff --git a/datasets/fake_news_filipino/fake_news_filipino.py b/datasets/fake_news_filipino/fake_news_filipino.py index 59946eef5c7..5743acd10db 100644 --- a/datasets/fake_news_filipino/fake_news_filipino.py +++ b/datasets/fake_news_filipino/fake_news_filipino.py @@ -76,7 +76,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True diff --git a/datasets/few_rel/few_rel.py b/datasets/few_rel/few_rel.py index 828f7771ce2..b7cdbb53af9 100644 --- a/datasets/few_rel/few_rel.py +++ b/datasets/few_rel/few_rel.py @@ -131,7 +131,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, pid2name, return_names): - """ Yields examples. """ + """Yields examples.""" pid2name_dict = {} with open(pid2name, encoding="utf-8") as f: data = json.load(f) diff --git a/datasets/financial_phrasebank/financial_phrasebank.py b/datasets/financial_phrasebank/financial_phrasebank.py index 4b0abdc53d7..69f4fa1d681 100644 --- a/datasets/financial_phrasebank/financial_phrasebank.py +++ b/datasets/financial_phrasebank/financial_phrasebank.py @@ -141,7 +141,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="iso-8859-1") as f: for id_, line in enumerate(f): sentence, label = line.rsplit("@", 1) diff --git a/datasets/flue/flue.py b/datasets/flue/flue.py index b7ecb6d979e..30f6adc0b8b 100644 --- a/datasets/flue/flue.py +++ b/datasets/flue/flue.py @@ -496,7 +496,7 @@ def _cleaner(self, text): return text def _wsdv_prepare_data(self, dirpath): - """ Get data paths from FSE dir""" + """Get data paths from FSE dir""" paths = {} for f in os.listdir(dirpath): @@ -520,10 +520,10 @@ def _wsdv_prepare_data(self, dirpath): # The WSDDatasetReader classes come from https://github.com/getalp/Flaubert/blob/master/flue/wsd/verbs/modules/dataset.py class WSDDatasetReader: - """ Class to read a WSD data directory. The directory should contain .data.xml and .gold.key.txt files""" + """Class to read a WSD data directory. The directory should contain .data.xml and .gold.key.txt files""" def get_data_paths(self, indir): - """ Get file paths from WSD dir """ + """Get file paths from WSD dir""" xml_fpath, gold_fpath = None, None for f in os.listdir(indir): @@ -546,7 +546,7 @@ def read_gold(self, infile): } def read_from_data_dirs(self, data_dirs): - """ Read WSD data and return as WSDDataset """ + """Read WSD data and return as WSDDataset""" for d in data_dirs: xml_fpath, gold_fpath = self.get_data_paths(d) @@ -614,13 +614,13 @@ def read_from_data_dirs(self, data_dirs): ) def read_sentences(self, data_dir, keep_mwe=True): - """ Read sentences from WSD data""" + """Read sentences from WSD data""" xml_fpath, _ = self.get_data_paths(data_dir) return self.read_sentences_from_xml(xml_fpath, keep_mwe=keep_mwe) def read_sentences_from_xml(self, infile, keep_mwe=False): - """ Read sentences from xml file """ + """Read sentences from xml file""" # Parse xml tree = etree.parse(infile) @@ -635,5 +635,5 @@ def read_sentences_from_xml(self, infile, keep_mwe=False): yield sent def read_target_keys(self, infile): - """ Read target keys """ + """Read target keys""" return [x.rstrip("\n") for x in open(infile, encoding="utf-8").readlines()] diff --git a/datasets/freebase_qa/freebase_qa.py b/datasets/freebase_qa/freebase_qa.py index 7f277dd035d..5f2bb811bb5 100644 --- a/datasets/freebase_qa/freebase_qa.py +++ b/datasets/freebase_qa/freebase_qa.py @@ -107,7 +107,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: dataset = json.load(f) diff --git a/datasets/gem/gem.py b/datasets/gem/gem.py index 1920fc19432..d23d1868b89 100644 --- a/datasets/gem/gem.py +++ b/datasets/gem/gem.py @@ -922,7 +922,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split, filepaths=None, lang=None): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "common_gen": if split.startswith("challenge"): exples = json.load(open(filepath, encoding="utf-8")) diff --git a/datasets/generics_kb/generics_kb.py b/datasets/generics_kb/generics_kb.py index 808db774697..7012e0a549b 100755 --- a/datasets/generics_kb/generics_kb.py +++ b/datasets/generics_kb/generics_kb.py @@ -52,7 +52,7 @@ class GenericsKb(datasets.GeneratorBasedBuilder): - """ The GenericsKB is the first large-scale resource containing naturally occurring generic sentences, and is rich in high-quality, general, semantically complete statements.""" + """The GenericsKB is the first large-scale resource containing naturally occurring generic sentences, and is rich in high-quality, general, semantically complete statements.""" VERSION = datasets.Version("1.0.0") @@ -178,7 +178,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "generics_kb_waterloo" or self.config.name == "generics_kb_simplewiki": diff --git a/datasets/glucose/glucose.py b/datasets/glucose/glucose.py index 31bc3f46d45..2c683c24604 100644 --- a/datasets/glucose/glucose.py +++ b/datasets/glucose/glucose.py @@ -50,7 +50,7 @@ class Glucose(datasets.GeneratorBasedBuilder): - """GLUCOSE: GeneraLized and COntextualized Story Explanations, is a novel conceptual framework and dataset for commonsense reasoning. """ + """GLUCOSE: GeneraLized and COntextualized Story Explanations, is a novel conceptual framework and dataset for commonsense reasoning.""" VERSION = datasets.Version("1.1.0") BUILDER_CONFIGS = [ diff --git a/datasets/gnad10/gnad10.py b/datasets/gnad10/gnad10.py index 9d3e9bd3409..db960f8c836 100644 --- a/datasets/gnad10/gnad10.py +++ b/datasets/gnad10/gnad10.py @@ -77,7 +77,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """Generate German news articles examples. """ + """Generate German news articles examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader(csv_file, delimiter=";", quotechar="'", quoting=csv.QUOTE_ALL) diff --git a/datasets/google_wellformed_query/google_wellformed_query.py b/datasets/google_wellformed_query/google_wellformed_query.py index b4dffbcf797..ed8086ab7c4 100644 --- a/datasets/google_wellformed_query/google_wellformed_query.py +++ b/datasets/google_wellformed_query/google_wellformed_query.py @@ -76,7 +76,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="utf-8") as file: reader = file.read().split("\n") for idx, row in enumerate(reader): diff --git a/datasets/guardian_authorship/guardian_authorship.py b/datasets/guardian_authorship/guardian_authorship.py index 881130debd7..58f2d13fbbd 100644 --- a/datasets/guardian_authorship/guardian_authorship.py +++ b/datasets/guardian_authorship/guardian_authorship.py @@ -65,7 +65,7 @@ # to add specific attributes. # here we give an example for three sub-set of the dataset with difference sizes. class GuardianAuthorshipConfig(datasets.BuilderConfig): - """ BuilderConfig for NewDataset""" + """BuilderConfig for NewDataset""" def __init__(self, train_folder, valid_folder, test_folder, **kwargs): """ @@ -304,7 +304,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir, samples_folders, split): - """ Yields examples. """ + """Yields examples.""" # Yields (key, example) tuples from the dataset # Training and validation are on 1 topic/genre, while testing is on multiple topics diff --git a/datasets/harem/harem.py b/datasets/harem/harem.py index 809be23f812..940ad846c8a 100644 --- a/datasets/harem/harem.py +++ b/datasets/harem/harem.py @@ -125,7 +125,7 @@ def text_generator(tokens): def tokenize(text: str) -> Tuple[List[Token], List[int]]: - """ Perform whitespace and punctuation tokenization keeping track of char alignment""" + """Perform whitespace and punctuation tokenization keeping track of char alignment""" doc_tokens = [] char_to_word_offset = [] @@ -248,7 +248,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) diff --git a/datasets/has_part/has_part.py b/datasets/has_part/has_part.py index 6609d25a4a6..e48ef2b3439 100644 --- a/datasets/has_part/has_part.py +++ b/datasets/has_part/has_part.py @@ -102,7 +102,7 @@ def _parse_metadata(self, md): return dd def _generate_examples(self, input_file, split): - """ Yields examples. """ + """Yields examples.""" with open(input_file, encoding="utf-8") as f: for id_, line in enumerate(f): _, arg1, arg2, score, metadata = line.split("\t") diff --git a/datasets/hate_offensive/hate_offensive.py b/datasets/hate_offensive/hate_offensive.py index ce59b4807d6..9a68d4a5b4e 100644 --- a/datasets/hate_offensive/hate_offensive.py +++ b/datasets/hate_offensive/hate_offensive.py @@ -41,7 +41,7 @@ class HateOffensive(datasets.GeneratorBasedBuilder): - """Automated Hate Speech Detection and the Problem of Offensive Language """ + """Automated Hate Speech Detection and the Problem of Offensive Language""" VERSION = datasets.Version("1.1.0") @@ -70,7 +70,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": os.path.join(data_dir)})] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, lineterminator="\n", delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True diff --git a/datasets/hate_speech_filipino/hate_speech_filipino.py b/datasets/hate_speech_filipino/hate_speech_filipino.py index 88a4ae075e5..b526d116ab7 100644 --- a/datasets/hate_speech_filipino/hate_speech_filipino.py +++ b/datasets/hate_speech_filipino/hate_speech_filipino.py @@ -95,7 +95,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True diff --git a/datasets/hate_speech_offensive/hate_speech_offensive.py b/datasets/hate_speech_offensive/hate_speech_offensive.py index 79f1014c2ab..eaf5d4f2de0 100644 --- a/datasets/hate_speech_offensive/hate_speech_offensive.py +++ b/datasets/hate_speech_offensive/hate_speech_offensive.py @@ -87,7 +87,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.reader(f) diff --git a/datasets/hate_speech_pl/hate_speech_pl.py b/datasets/hate_speech_pl/hate_speech_pl.py index 78a79dac2c3..e54e175456b 100644 --- a/datasets/hate_speech_pl/hate_speech_pl.py +++ b/datasets/hate_speech_pl/hate_speech_pl.py @@ -91,7 +91,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths): - """ Yields examples. """ + """Yields examples.""" for file_id_, filepath in enumerate(filepaths): with open(filepath, encoding="utf-8") as f: csv_reader = csv.DictReader(f, delimiter=",", escapechar="\\") diff --git a/datasets/hate_speech_portuguese/hate_speech_portuguese.py b/datasets/hate_speech_portuguese/hate_speech_portuguese.py index c941b857527..577644e09ec 100644 --- a/datasets/hate_speech_portuguese/hate_speech_portuguese.py +++ b/datasets/hate_speech_portuguese/hate_speech_portuguese.py @@ -91,7 +91,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.reader(f) diff --git a/datasets/hda_nli_hindi/hda_nli_hindi.py b/datasets/hda_nli_hindi/hda_nli_hindi.py index 046536aedae..bb016547787 100644 --- a/datasets/hda_nli_hindi/hda_nli_hindi.py +++ b/datasets/hda_nli_hindi/hda_nli_hindi.py @@ -137,7 +137,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter="\t") diff --git a/datasets/head_qa/head_qa.py b/datasets/head_qa/head_qa.py index b9c7dfd63f7..b22de3cd9d3 100644 --- a/datasets/head_qa/head_qa.py +++ b/datasets/head_qa/head_qa.py @@ -117,7 +117,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: head_qa = json.load(f) for exam in head_qa["exams"]: diff --git a/datasets/hind_encorp/hind_encorp.py b/datasets/hind_encorp/hind_encorp.py index dcaccc2b943..a4b42fa3318 100644 --- a/datasets/hind_encorp/hind_encorp.py +++ b/datasets/hind_encorp/hind_encorp.py @@ -105,7 +105,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, line in enumerate(f): diff --git a/datasets/hindi_discourse/hindi_discourse.py b/datasets/hindi_discourse/hindi_discourse.py index 6768fc1db52..6fb44886c44 100644 --- a/datasets/hindi_discourse/hindi_discourse.py +++ b/datasets/hindi_discourse/hindi_discourse.py @@ -78,7 +78,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: hindiDiscourse = json.load(f) diff --git a/datasets/hkcancor/hkcancor.py b/datasets/hkcancor/hkcancor.py index d90ad2853a1..ded95c7f509 100644 --- a/datasets/hkcancor/hkcancor.py +++ b/datasets/hkcancor/hkcancor.py @@ -242,7 +242,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir, split): - """ Yields examples. """ + """Yields examples.""" downloaded_files = [os.path.join(data_dir, fn) for fn in sorted(os.listdir(data_dir))] for filepath in downloaded_files: diff --git a/datasets/humicroedit/humicroedit.py b/datasets/humicroedit/humicroedit.py index 539080ec220..bd6ec1bdb95 100644 --- a/datasets/humicroedit/humicroedit.py +++ b/datasets/humicroedit/humicroedit.py @@ -135,7 +135,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" label_names = ["equal", "sentence1", "sentence2"] with open(filepath, encoding="utf-8") as csv_file: diff --git a/datasets/igbo_monolingual/igbo_monolingual.py b/datasets/igbo_monolingual/igbo_monolingual.py index d08ce919632..c84274be4aa 100644 --- a/datasets/igbo_monolingual/igbo_monolingual.py +++ b/datasets/igbo_monolingual/igbo_monolingual.py @@ -271,7 +271,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "eze_goes_to_school": with open(filepath, "r", encoding="utf-8") as f: json_data = json.load(f) diff --git a/datasets/igbo_ner/igbo_ner.py b/datasets/igbo_ner/igbo_ner.py index 65d80a2dd4a..6d3aa5d4b32 100644 --- a/datasets/igbo_ner/igbo_ner.py +++ b/datasets/igbo_ner/igbo_ner.py @@ -97,7 +97,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" dictionary = {} with open(filepath, "r", encoding="utf-8-sig") as f: if self.config.name == "ner_data": diff --git a/datasets/ilist/ilist.py b/datasets/ilist/ilist.py index f7cea0a9041..91e4fd58034 100644 --- a/datasets/ilist/ilist.py +++ b/datasets/ilist/ilist.py @@ -92,7 +92,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="utf-8") as file: for idx, row in enumerate(file): row = row.strip("\n").split("\t") diff --git a/datasets/imppres/imppres.py b/datasets/imppres/imppres.py index 41084241043..d6a8d97ff3f 100644 --- a/datasets/imppres/imppres.py +++ b/datasets/imppres/imppres.py @@ -224,7 +224,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/indonlu/indonlu.py b/datasets/indonlu/indonlu.py index 450d246da7f..75b236e467a 100644 --- a/datasets/indonlu/indonlu.py +++ b/datasets/indonlu/indonlu.py @@ -571,7 +571,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" csv_file = ["emot", "wrete", "facqa", "casa", "hoasa"] tsv_file = ["smsa"] txt_file = ["terma", "keps"] diff --git a/datasets/irc_disentangle/irc_disentangle.py b/datasets/irc_disentangle/irc_disentangle.py index 88fb266c559..526fa6ec220 100644 --- a/datasets/irc_disentangle/irc_disentangle.py +++ b/datasets/irc_disentangle/irc_disentangle.py @@ -182,7 +182,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "ubuntu": # run loop for each date diff --git a/datasets/isixhosa_ner_corpus/isixhosa_ner_corpus.py b/datasets/isixhosa_ner_corpus/isixhosa_ner_corpus.py index 05859eefe34..1bce70d3dc6 100644 --- a/datasets/isixhosa_ner_corpus/isixhosa_ner_corpus.py +++ b/datasets/isixhosa_ner_corpus/isixhosa_ner_corpus.py @@ -56,7 +56,7 @@ def __init__(self, **kwargs): class IsixhosaNerCorpus(datasets.GeneratorBasedBuilder): - """ Isizulu Ner dataset""" + """Isizulu Ner dataset""" BUILDER_CONFIGS = [ IsixhosaNerCorpusConfig( diff --git a/datasets/isizulu_ner_corpus/isizulu_ner_corpus.py b/datasets/isizulu_ner_corpus/isizulu_ner_corpus.py index e689263100b..8296efeba06 100644 --- a/datasets/isizulu_ner_corpus/isizulu_ner_corpus.py +++ b/datasets/isizulu_ner_corpus/isizulu_ner_corpus.py @@ -56,7 +56,7 @@ def __init__(self, **kwargs): class IsizuluNerCorpus(datasets.GeneratorBasedBuilder): - """ Isizulu Ner dataset""" + """Isizulu Ner dataset""" BUILDER_CONFIGS = [ IsizuluNerCorpusConfig( diff --git a/datasets/iwslt2017/iwslt2017.py b/datasets/iwslt2017/iwslt2017.py index 39366b04bcb..5253f164119 100644 --- a/datasets/iwslt2017/iwslt2017.py +++ b/datasets/iwslt2017/iwslt2017.py @@ -44,7 +44,7 @@ class IWSLT2017Config(datasets.BuilderConfig): - """ BuilderConfig for NewDataset""" + """BuilderConfig for NewDataset""" def __init__(self, pair, is_multilingual, **kwargs): """ @@ -185,7 +185,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, source_files, target_files, split): - """ Yields examples. """ + """Yields examples.""" id_ = 0 source, target = self.config.pair.split("-") for source_file, target_file in zip(source_files, target_files): diff --git a/datasets/jfleg/jfleg.py b/datasets/jfleg/jfleg.py index 29cd2f64c86..1ad7f3a423d 100644 --- a/datasets/jfleg/jfleg.py +++ b/datasets/jfleg/jfleg.py @@ -120,7 +120,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" source_file = filepath["src"] with open(source_file, encoding="utf-8") as f: diff --git a/datasets/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py b/datasets/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py index ad374ba5ef0..32abb5db4ec 100644 --- a/datasets/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py +++ b/datasets/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py @@ -103,7 +103,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, split="train", train_path=None, test_text_path=None, test_labels_path=None): - """ Yields examples. """ + """Yields examples.""" # This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/kd_conv/kd_conv.py b/datasets/kd_conv/kd_conv.py index ccd6f6e1c7b..45df343f898 100644 --- a/datasets/kd_conv/kd_conv.py +++ b/datasets/kd_conv/kd_conv.py @@ -156,7 +156,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir, split): - """ Yields examples. """ + """Yields examples.""" if "dialogues" in self.config.name: if "all" in self.config.name: file_dict = { diff --git a/datasets/kinnews_kirnews/kinnews_kirnews.py b/datasets/kinnews_kirnews/kinnews_kirnews.py index 8aa86cc77bb..4a0f779dde8 100644 --- a/datasets/kinnews_kirnews/kinnews_kirnews.py +++ b/datasets/kinnews_kirnews/kinnews_kirnews.py @@ -122,7 +122,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( diff --git a/datasets/lama/lama.py b/datasets/lama/lama.py index f81caca35db..319c89a0f9e 100755 --- a/datasets/lama/lama.py +++ b/datasets/lama/lama.py @@ -220,7 +220,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the LAMA dataset. """ + """Yields examples from the LAMA dataset.""" if self.config.name == "trex": paths = filepath relations_path = paths[0] diff --git a/datasets/lambada/lambada.py b/datasets/lambada/lambada.py index 93b30e7739a..4838e0cbb69 100644 --- a/datasets/lambada/lambada.py +++ b/datasets/lambada/lambada.py @@ -120,7 +120,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if split == "train": recursion_pattern = f"{filepath}/*/*.txt" for idx, novel_path in enumerate(glob.iglob(recursion_pattern, recursive=True)): diff --git a/datasets/lener_br/lener_br.py b/datasets/lener_br/lener_br.py index 88066fd9bbd..e3b08efd606 100644 --- a/datasets/lener_br/lener_br.py +++ b/datasets/lener_br/lener_br.py @@ -126,7 +126,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) diff --git a/datasets/liar/liar.py b/datasets/liar/liar.py index 86a3e5e633c..b2c3d2ed9f3 100644 --- a/datasets/liar/liar.py +++ b/datasets/liar/liar.py @@ -115,7 +115,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as tsv_file: reader = csv.reader(tsv_file, delimiter="\t", quoting=csv.QUOTE_NONE) diff --git a/datasets/liveqa/liveqa.py b/datasets/liveqa/liveqa.py index 5dae36e9d52..d2a5d16b184 100644 --- a/datasets/liveqa/liveqa.py +++ b/datasets/liveqa/liveqa.py @@ -93,7 +93,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" data_raw = "" for filepath in filepaths: diff --git a/datasets/m_lama/m_lama.py b/datasets/m_lama/m_lama.py index ae920bb4112..bc2ddef2fc2 100644 --- a/datasets/m_lama/m_lama.py +++ b/datasets/m_lama/m_lama.py @@ -219,7 +219,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the mLAMA dataset. """ + """Yields examples from the mLAMA dataset.""" id_ = -1 for language in self.config.languages: # load templates diff --git a/datasets/mac_morpho/mac_morpho.py b/datasets/mac_morpho/mac_morpho.py index 2c8f898a2fb..20246d3f8c0 100644 --- a/datasets/mac_morpho/mac_morpho.py +++ b/datasets/mac_morpho/mac_morpho.py @@ -144,7 +144,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) with open(filepath, "r", encoding="utf-8") as f: diff --git a/datasets/makhzan/makhzan.py b/datasets/makhzan/makhzan.py index 14d46424dbd..43a2e8f6d4d 100644 --- a/datasets/makhzan/makhzan.py +++ b/datasets/makhzan/makhzan.py @@ -82,7 +82,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" data_dir_path = Path(data_dir) data_dir_path = data_dir_path / ("makhzan-" + _SHA) / "text" file_paths = sorted(data_dir_path.glob(r"*.xml")) diff --git a/datasets/mc_taco/mc_taco.py b/datasets/mc_taco/mc_taco.py index 8a60573fca1..be1a13ea8b4 100644 --- a/datasets/mc_taco/mc_taco.py +++ b/datasets/mc_taco/mc_taco.py @@ -99,7 +99,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, diff --git a/datasets/medical_questions_pairs/medical_questions_pairs.py b/datasets/medical_questions_pairs/medical_questions_pairs.py index 737c05ec725..6e28f44746f 100644 --- a/datasets/medical_questions_pairs/medical_questions_pairs.py +++ b/datasets/medical_questions_pairs/medical_questions_pairs.py @@ -71,7 +71,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": data_file})] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.reader(f) for id_, row in enumerate(data): diff --git a/datasets/meta_woz/meta_woz.py b/datasets/meta_woz/meta_woz.py index 1d8909e3b0a..cb6de060678 100644 --- a/datasets/meta_woz/meta_woz.py +++ b/datasets/meta_woz/meta_woz.py @@ -118,7 +118,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "tasks": filepath = os.path.join(data_dir, "tasks.txt") with open(filepath, encoding="utf-8") as f: diff --git a/datasets/metooma/metooma.py b/datasets/metooma/metooma.py index a4754c9a46e..c7b71f521e1 100644 --- a/datasets/metooma/metooma.py +++ b/datasets/metooma/metooma.py @@ -93,7 +93,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, diff --git a/datasets/mkb/mkb.py b/datasets/mkb/mkb.py index 2d57a64ebf4..80739ddae41 100644 --- a/datasets/mkb/mkb.py +++ b/datasets/mkb/mkb.py @@ -136,7 +136,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, labelpath): - """ Yields examples. """ + """Yields examples.""" src_tag, tgt_tag = self.config.language_pair.split("-") with open(filepath, encoding="utf-8") as f1, open(labelpath, encoding="utf-8") as f2: src = f1.read().split("\n")[:-1] diff --git a/datasets/msr_sqa/msr_sqa.py b/datasets/msr_sqa/msr_sqa.py index e6566f6d4ba..b1d6173d6b6 100644 --- a/datasets/msr_sqa/msr_sqa.py +++ b/datasets/msr_sqa/msr_sqa.py @@ -152,7 +152,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, data_dir): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t") diff --git a/datasets/multi_re_qa/multi_re_qa.py b/datasets/multi_re_qa/multi_re_qa.py index 5c39a73e2f8..ab1f48d66a6 100644 --- a/datasets/multi_re_qa/multi_re_qa.py +++ b/datasets/multi_re_qa/multi_re_qa.py @@ -211,7 +211,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/multi_x_science_sum/multi_x_science_sum.py b/datasets/multi_x_science_sum/multi_x_science_sum.py index 34800f11af5..6978b052b4f 100644 --- a/datasets/multi_x_science_sum/multi_x_science_sum.py +++ b/datasets/multi_x_science_sum/multi_x_science_sum.py @@ -41,7 +41,7 @@ class MultiXScienceSum(datasets.GeneratorBasedBuilder): - """"Multi-XScience Dataset.""" + """ "Multi-XScience Dataset.""" VERSION = datasets.Version("1.1.0") diff --git a/datasets/mutual_friends/mutual_friends.py b/datasets/mutual_friends/mutual_friends.py index f9309368d5b..a434771325b 100644 --- a/datasets/mutual_friends/mutual_friends.py +++ b/datasets/mutual_friends/mutual_friends.py @@ -144,7 +144,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: mutualfriends = json.load(f) diff --git a/datasets/narrativeqa_manual/narrativeqa_manual.py b/datasets/narrativeqa_manual/narrativeqa_manual.py index 53b3dd9121b..0964bb65b1e 100644 --- a/datasets/narrativeqa_manual/narrativeqa_manual.py +++ b/datasets/narrativeqa_manual/narrativeqa_manual.py @@ -158,7 +158,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir, manual_dir, split): - """ Yields examples. """ + """Yields examples.""" documents = {} with open(data_dir["documents"], encoding="utf-8") as f: diff --git a/datasets/natural_questions/natural_questions.py b/datasets/natural_questions/natural_questions.py index ae260aa2e95..381ffeab106 100644 --- a/datasets/natural_questions/natural_questions.py +++ b/datasets/natural_questions/natural_questions.py @@ -133,7 +133,7 @@ def _parse_example(line): html_bytes = ex_json["document_html"].encode("utf-8") def _parse_short_answer(short_ans): - """"Extract text of short answer.""" + """ "Extract text of short answer.""" ans_bytes = html_bytes[short_ans["start_byte"] : short_ans["end_byte"]] # Remove non-breaking spaces. ans_bytes = ans_bytes.replace(b"\xc2\xa0", b" ") diff --git a/datasets/ncslgr/ncslgr.py b/datasets/ncslgr/ncslgr.py index d88bac85c5f..b1c583ae3a0 100644 --- a/datasets/ncslgr/ncslgr.py +++ b/datasets/ncslgr/ncslgr.py @@ -126,7 +126,7 @@ def get_tier_values(name: str): yield {"gloss": " ".join(relevant_gloss), "text": text} def _generate_examples(self, eaf_path: str, videos_path: str): - """ Yields examples. """ + """Yields examples.""" for i, eaf_file in enumerate(tqdm(os.listdir(eaf_path))): eaf_file_path = os.path.join(eaf_path, eaf_file) diff --git a/datasets/nell/nell.py b/datasets/nell/nell.py index 49da0ce9915..0f6cd8009b5 100755 --- a/datasets/nell/nell.py +++ b/datasets/nell/nell.py @@ -152,7 +152,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the NELL belief knowledge base and candidate bleifs knowledge base if the config is 'nell_belief' and 'nell_candidate', respectively, otherwise yields the sentences for two dataset if the config is 'nell_belief_sentences' and 'nell_candidate_sentences' respectively. """ + """Yields examples from the NELL belief knowledge base and candidate bleifs knowledge base if the config is 'nell_belief' and 'nell_candidate', respectively, otherwise yields the sentences for two dataset if the config is 'nell_belief_sentences' and 'nell_candidate_sentences' respectively.""" with open(filepath, encoding="utf-8") as f: id_ = -1 diff --git a/datasets/neural_code_search/neural_code_search.py b/datasets/neural_code_search/neural_code_search.py index 8d4fe75bc30..563cf32bdb8 100644 --- a/datasets/neural_code_search/neural_code_search.py +++ b/datasets/neural_code_search/neural_code_search.py @@ -132,7 +132,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, datapath, split): - """ Yields examples. """ + """Yields examples.""" id_ = 0 for dp in datapath: if self.config.name == "evaluation_dataset": diff --git a/datasets/newsph_nli/newsph_nli.py b/datasets/newsph_nli/newsph_nli.py index b149b971183..627e5005bcd 100644 --- a/datasets/newsph_nli/newsph_nli.py +++ b/datasets/newsph_nli/newsph_nli.py @@ -96,7 +96,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader( csv_file, quotechar='"', delimiter=",", quoting=csv.QUOTE_ALL, skipinitialspace=True diff --git a/datasets/newspop/newspop.py b/datasets/newspop/newspop.py index d2fdd92b80a..3eb80322f19 100644 --- a/datasets/newspop/newspop.py +++ b/datasets/newspop/newspop.py @@ -85,7 +85,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: csv_reader = csv.reader( f, diff --git a/datasets/newsqa/newsqa.py b/datasets/newsqa/newsqa.py index cf390b73dc8..ba090446914 100644 --- a/datasets/newsqa/newsqa.py +++ b/datasets/newsqa/newsqa.py @@ -236,7 +236,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "combined-csv": with open(filepath, encoding="utf-8") as csv_file: diff --git a/datasets/nkjp-ner/nkjp-ner.py b/datasets/nkjp-ner/nkjp-ner.py index ea38f9eea6c..37a6482b3c4 100644 --- a/datasets/nkjp-ner/nkjp-ner.py +++ b/datasets/nkjp-ner/nkjp-ner.py @@ -95,7 +95,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/nli_tr/nli_tr.py b/datasets/nli_tr/nli_tr.py index 709f6c63881..6e24bbb4a24 100644 --- a/datasets/nli_tr/nli_tr.py +++ b/datasets/nli_tr/nli_tr.py @@ -45,7 +45,7 @@ class NLITRConfig(datasets.BuilderConfig): - """ BuilderConfig for NLI-TR""" + """BuilderConfig for NLI-TR""" def __init__(self, version=None, data_url=None, **kwargs): super(NLITRConfig, self).__init__(version=datasets.Version(version, ""), **kwargs) @@ -150,7 +150,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with codecs.open(filepath, encoding="utf-8") as f: for idx, row in enumerate(f): diff --git a/datasets/nq_open/nq_open.py b/datasets/nq_open/nq_open.py index 030544d099f..111d7d23283 100644 --- a/datasets/nq_open/nq_open.py +++ b/datasets/nq_open/nq_open.py @@ -82,7 +82,7 @@ def __init__(self, **kwargs): class NQOpen(datasets.GeneratorBasedBuilder): - """ NQ_Open open domain question answering dataset. """ + """NQ_Open open domain question answering dataset.""" BUILDER_CONFIGS = [ NQOpenConfig( diff --git a/datasets/numeric_fused_head/numeric_fused_head.py b/datasets/numeric_fused_head/numeric_fused_head.py index df3d84e7ae9..1a9b704841f 100644 --- a/datasets/numeric_fused_head/numeric_fused_head.py +++ b/datasets/numeric_fused_head/numeric_fused_head.py @@ -116,7 +116,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: if self.config.name == "identification": r = csv.DictReader(f, delimiter="\t") diff --git a/datasets/oclar/oclar.py b/datasets/oclar/oclar.py index d7dc1fd01c6..3c9bdd51c63 100644 --- a/datasets/oclar/oclar.py +++ b/datasets/oclar/oclar.py @@ -88,7 +88,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader(csv_file, delimiter=",", skipinitialspace=True) next(csv_reader, None) # skipping headers diff --git a/datasets/offcombr/offcombr.py b/datasets/offcombr/offcombr.py index 8635d159968..8bd265fd516 100644 --- a/datasets/offcombr/offcombr.py +++ b/datasets/offcombr/offcombr.py @@ -89,7 +89,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/ohsumed/ohsumed.py b/datasets/ohsumed/ohsumed.py index 7cbaa85576e..f9ace033272 100644 --- a/datasets/ohsumed/ohsumed.py +++ b/datasets/ohsumed/ohsumed.py @@ -146,7 +146,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/ollie/ollie.py b/datasets/ollie/ollie.py index 579a4be4cf8..1f2b9c54d66 100755 --- a/datasets/ollie/ollie.py +++ b/datasets/ollie/ollie.py @@ -131,7 +131,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples from the Ollie predicates and sentences. """ + """Yields examples from the Ollie predicates and sentences.""" with bz2.open(filepath, "rt") as f: id_ = -1 diff --git a/datasets/omp/omp.py b/datasets/omp/omp.py index 703c8fd0962..70a301b158a 100644 --- a/datasets/omp/omp.py +++ b/datasets/omp/omp.py @@ -246,7 +246,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if self.config.name in ["posts_labeled", "posts_unlabeled"]: posts_labeled = pd.read_csv( diff --git a/datasets/openslr/openslr.py b/datasets/openslr/openslr.py index 3df03dcb633..c01c9692824 100644 --- a/datasets/openslr/openslr.py +++ b/datasets/openslr/openslr.py @@ -490,7 +490,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, path_to_indexs, path_to_datas): - """ Yields examples. """ + """Yields examples.""" counter = -1 if self.config.name in ["SLR35", "SLR36"]: diff --git a/datasets/openwebtext/openwebtext.py b/datasets/openwebtext/openwebtext.py index 20d2e569a80..7d5ceaf479f 100644 --- a/datasets/openwebtext/openwebtext.py +++ b/datasets/openwebtext/openwebtext.py @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, txt_files): - """ Yields examples. """ + """Yields examples.""" for idx, filepath in enumerate(txt_files): with open(filepath, encoding="utf-8") as f: yield idx, {"text": re.sub("\n\n\n+", "\n\n", f.read()).strip()} diff --git a/datasets/opus100/opus100.py b/datasets/opus100/opus100.py index 01522d4310e..833a1d286b8 100644 --- a/datasets/opus100/opus100.py +++ b/datasets/opus100/opus100.py @@ -255,7 +255,7 @@ def _split_generators(self, dl_manager): return output def _generate_examples(self, filepath, labelpath): - """ Yields examples. """ + """Yields examples.""" src_tag, tgt_tag = self.config.language_pair.split("-") with open(filepath, encoding="utf-8") as f1, open(labelpath, encoding="utf-8") as f2: src = f1.read().split("\n")[:-1] diff --git a/datasets/opus_dogc/opus_dogc.py b/datasets/opus_dogc/opus_dogc.py index 0ad93de327a..207f814f840 100644 --- a/datasets/opus_dogc/opus_dogc.py +++ b/datasets/opus_dogc/opus_dogc.py @@ -45,7 +45,7 @@ class OpusDogcConfig(datasets.BuilderConfig): - """ BuilderConfig for OpusDogcConfig.""" + """BuilderConfig for OpusDogcConfig.""" def __init__(self, file_format=None, **kwargs): """ diff --git a/datasets/opus_rf/opus_rf.py b/datasets/opus_rf/opus_rf.py index 4b8be9177db..d90661d3190 100644 --- a/datasets/opus_rf/opus_rf.py +++ b/datasets/opus_rf/opus_rf.py @@ -79,7 +79,7 @@ def __init__(self, *args, lang1=None, lang2=None, **kwargs): class OpusRF(datasets.GeneratorBasedBuilder): - """RF is a tiny parallel corpus of the Declarations of the Swedish Government and its translations. """ + """RF is a tiny parallel corpus of the Declarations of the Swedish Government and its translations.""" BUILDER_CONFIGS = [ OpusRFTranslationsConfig( @@ -118,7 +118,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, datapath): - """ Yields examples. """ + """Yields examples.""" l1, l2 = self.config.lang1, self.config.lang2 folder = l1 + "-" + l2 l1_file = _BASE_NAME.format(folder, l1) diff --git a/datasets/orange_sum/orange_sum.py b/datasets/orange_sum/orange_sum.py index 697fd14a97f..67f65b4a0c8 100644 --- a/datasets/orange_sum/orange_sum.py +++ b/datasets/orange_sum/orange_sum.py @@ -100,7 +100,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open( os.path.join(filepath, self.config.name, "{}.source".format(split)), encoding="utf-8" ) as f_source, open( diff --git a/datasets/para_pat/para_pat.py b/datasets/para_pat/para_pat.py index 2e0fc50b54d..4fa4f2ea7dc 100644 --- a/datasets/para_pat/para_pat.py +++ b/datasets/para_pat/para_pat.py @@ -256,7 +256,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" source, target = self.config.language_pair with open(filepath, encoding="utf-8") as f: if self.config.name in type1_datasets_features: diff --git a/datasets/paws-x/paws-x.py b/datasets/paws-x/paws-x.py index 9ac5d967ed8..2c3ea666428 100644 --- a/datasets/paws-x/paws-x.py +++ b/datasets/paws-x/paws-x.py @@ -156,7 +156,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) diff --git a/datasets/paws/paws.py b/datasets/paws/paws.py index 41900138919..fdba359de0b 100644 --- a/datasets/paws/paws.py +++ b/datasets/paws/paws.py @@ -183,7 +183,7 @@ def _split_generators(self, dl_manager): raise NotImplementedError("{} does not exist".format(self.config.name)) def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.DictReader(f, delimiter="\t") diff --git a/datasets/pec/pec.py b/datasets/pec/pec.py index b1aec84adcd..b01ec5358ce 100644 --- a/datasets/pec/pec.py +++ b/datasets/pec/pec.py @@ -35,7 +35,7 @@ class PECConfig(datasets.BuilderConfig): - """ BuilderConfig for PEC""" + """BuilderConfig for PEC""" def __init__(self, domain="all", **kwargs): """ @@ -142,7 +142,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split, persona): - """ Yields examples. """ + """Yields examples.""" # TODO: Yields (key, example) tuples from the dataset context_speakers = [] context = [] diff --git a/datasets/peer_read/peer_read.py b/datasets/peer_read/peer_read.py index f08425b24cc..e6ef7870d8e 100644 --- a/datasets/peer_read/peer_read.py +++ b/datasets/peer_read/peer_read.py @@ -235,7 +235,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" for id_, filepath in enumerate(sorted(filepaths)): with open(filepath, encoding="utf-8", errors="replace") as f: data = json.load(f) diff --git a/datasets/persian_ner/persian_ner.py b/datasets/persian_ner/persian_ner.py index 13154c58f61..8b404e7d7a1 100644 --- a/datasets/persian_ner/persian_ner.py +++ b/datasets/persian_ner/persian_ner.py @@ -137,7 +137,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="utf-8") as f: id_ = 0 diff --git a/datasets/pib/pib.py b/datasets/pib/pib.py index 851edba5101..f976dadd7f1 100644 --- a/datasets/pib/pib.py +++ b/datasets/pib/pib.py @@ -145,7 +145,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, labelpath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f1, open(labelpath, encoding="utf-8") as f2: src = f1.read().split("\n")[:-1] tgt = f2.read().split("\n")[:-1] diff --git a/datasets/piqa/piqa.py b/datasets/piqa/piqa.py index c1c8bcf51ae..0f0d96172f9 100644 --- a/datasets/piqa/piqa.py +++ b/datasets/piqa/piqa.py @@ -116,7 +116,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, input_filepath, label_filepath=None): - """ Yields examples. """ + """Yields examples.""" with open(input_filepath, encoding="utf-8") as input_file: inputs = input_file.read().splitlines() diff --git a/datasets/polemo2/polemo2.py b/datasets/polemo2/polemo2.py index 8c6513c665b..3460bc5b34e 100644 --- a/datasets/polemo2/polemo2.py +++ b/datasets/polemo2/polemo2.py @@ -120,7 +120,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/poleval2019_cyberbullying/poleval2019_cyberbullying.py b/datasets/poleval2019_cyberbullying/poleval2019_cyberbullying.py index 9dc7c28f634..79eef6a9199 100644 --- a/datasets/poleval2019_cyberbullying/poleval2019_cyberbullying.py +++ b/datasets/poleval2019_cyberbullying/poleval2019_cyberbullying.py @@ -133,7 +133,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if split == "train": text_path = os.path.join(filepath, "training_set_clean_only_text.txt") diff --git a/datasets/polsum/polsum.py b/datasets/polsum/polsum.py index 725340b385b..a9ed16781f6 100644 --- a/datasets/polsum/polsum.py +++ b/datasets/polsum/polsum.py @@ -130,7 +130,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/proto_qa/proto_qa.py b/datasets/proto_qa/proto_qa.py index ebc29cfef74..b3cff552054 100755 --- a/datasets/proto_qa/proto_qa.py +++ b/datasets/proto_qa/proto_qa.py @@ -160,7 +160,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" if self.config.name == "proto_qa_cs_assessments": with open(filepath, encoding="utf-8") as f: diff --git a/datasets/psc/psc.py b/datasets/psc/psc.py index 509f07e5ae7..8f8b2680e2a 100644 --- a/datasets/psc/psc.py +++ b/datasets/psc/psc.py @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for id_, row in enumerate(reader): diff --git a/datasets/ptb_text_only/ptb_text_only.py b/datasets/ptb_text_only/ptb_text_only.py index c1a74832803..a11ba347a25 100644 --- a/datasets/ptb_text_only/ptb_text_only.py +++ b/datasets/ptb_text_only/ptb_text_only.py @@ -136,7 +136,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/pubmed/pubmed.py b/datasets/pubmed/pubmed.py index 4a90d630ee3..59b3b2dfb5c 100644 --- a/datasets/pubmed/pubmed.py +++ b/datasets/pubmed/pubmed.py @@ -366,7 +366,7 @@ def update_citation(self, article): article["PubmedData"]["ReferenceList"] = citations def _generate_examples(self, filenames): - """ Yields examples. """ + """Yields examples.""" id_ = 0 for filename in filenames: try: diff --git a/datasets/py_ast/py_ast.py b/datasets/py_ast/py_ast.py index 094557f0745..ac4c70b9cf9 100644 --- a/datasets/py_ast/py_ast.py +++ b/datasets/py_ast/py_ast.py @@ -136,7 +136,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/qa_srl/qa_srl.py b/datasets/qa_srl/qa_srl.py index e5f47983ab3..b2a28d5c14d 100644 --- a/datasets/qa_srl/qa_srl.py +++ b/datasets/qa_srl/qa_srl.py @@ -127,7 +127,7 @@ def _split_generators(self, dl_manager): def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: diff --git a/datasets/quac/quac.py b/datasets/quac/quac.py index 57b88426196..45a06bad730 100644 --- a/datasets/quac/quac.py +++ b/datasets/quac/quac.py @@ -122,7 +122,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: squad = json.load(f) for section in squad["data"]: diff --git a/datasets/re_dial/re_dial.py b/datasets/re_dial/re_dial.py index bb27907b22a..753b1fe0132 100644 --- a/datasets/re_dial/re_dial.py +++ b/datasets/re_dial/re_dial.py @@ -119,7 +119,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: examples = f.readlines() diff --git a/datasets/reasoning_bg/reasoning_bg.py b/datasets/reasoning_bg/reasoning_bg.py index 7ce0b9a0eaf..0aa13de6689 100644 --- a/datasets/reasoning_bg/reasoning_bg.py +++ b/datasets/reasoning_bg/reasoning_bg.py @@ -100,7 +100,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: content = f.read() diff --git a/datasets/refresd/refresd.py b/datasets/refresd/refresd.py index 701323bd188..e5c889c5c79 100644 --- a/datasets/refresd/refresd.py +++ b/datasets/refresd/refresd.py @@ -80,7 +80,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": data_file_path})] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE) for idx, row in enumerate(reader): diff --git a/datasets/ro_sent/ro_sent.py b/datasets/ro_sent/ro_sent.py index 5927bd8e5d3..60957d4907d 100644 --- a/datasets/ro_sent/ro_sent.py +++ b/datasets/ro_sent/ro_sent.py @@ -109,7 +109,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.DictReader(f, delimiter=",", quotechar='"') diff --git a/datasets/ronec/ronec.py b/datasets/ronec/ronec.py index d0420509a9b..ead1a689176 100644 --- a/datasets/ronec/ronec.py +++ b/datasets/ronec/ronec.py @@ -155,7 +155,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) with open(filepath, encoding="utf-8") as f: diff --git a/datasets/ropes/ropes.py b/datasets/ropes/ropes.py index 954cc0ae466..5bf664b78d9 100644 --- a/datasets/ropes/ropes.py +++ b/datasets/ropes/ropes.py @@ -111,7 +111,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: ropes = json.load(f) for article in ropes["data"]: diff --git a/datasets/rotten_tomatoes/rotten_tomatoes.py b/datasets/rotten_tomatoes/rotten_tomatoes.py index f030e6f5a7b..f9f8edd38e1 100644 --- a/datasets/rotten_tomatoes/rotten_tomatoes.py +++ b/datasets/rotten_tomatoes/rotten_tomatoes.py @@ -65,7 +65,7 @@ def _vocab_text_gen(self, train_file): yield ex["text"] def _split_generators(self, dl_manager): - """ Downloads Rotten Tomatoes sentences. """ + """Downloads Rotten Tomatoes sentences.""" extracted_folder_path = dl_manager.download_and_extract(_DOWNLOAD_URL) return [ datasets.SplitGenerator( diff --git a/datasets/s2orc/s2orc.py b/datasets/s2orc/s2orc.py index 35613c7ed43..4918942e4a3 100644 --- a/datasets/s2orc/s2orc.py +++ b/datasets/s2orc/s2orc.py @@ -126,7 +126,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" for train_files in filepaths: with open(train_files, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/samsum/samsum.py b/datasets/samsum/samsum.py index 793951eae7d..98863a513d4 100644 --- a/datasets/samsum/samsum.py +++ b/datasets/samsum/samsum.py @@ -105,7 +105,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" path, fname = filepath diff --git a/datasets/sanskrit_classic/sanskrit_classic.py b/datasets/sanskrit_classic/sanskrit_classic.py index 1a916354733..9f2432a6e7b 100644 --- a/datasets/sanskrit_classic/sanskrit_classic.py +++ b/datasets/sanskrit_classic/sanskrit_classic.py @@ -86,7 +86,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): diff --git a/datasets/scitail/scitail.py b/datasets/scitail/scitail.py index cb3e70d9adb..7e66f3c5d7c 100644 --- a/datasets/scitail/scitail.py +++ b/datasets/scitail/scitail.py @@ -34,7 +34,7 @@ class ScitailConfig(datasets.BuilderConfig): - """ BuilderConfig for Xquad""" + """BuilderConfig for Xquad""" def __init__(self, **kwargs): """ diff --git a/datasets/scitldr/scitldr.py b/datasets/scitldr/scitldr.py index ccde4f342c9..f07473cbc0e 100644 --- a/datasets/scitldr/scitldr.py +++ b/datasets/scitldr/scitldr.py @@ -155,7 +155,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/selqa/selqa.py b/datasets/selqa/selqa.py index 3f2e8c6d8ea..e475eee0dd3 100644 --- a/datasets/selqa/selqa.py +++ b/datasets/selqa/selqa.py @@ -61,7 +61,7 @@ class SelqaConfig(datasets.BuilderConfig): - """"BuilderConfig for SelQA Dataset""" + """ "BuilderConfig for SelQA Dataset""" def __init__(self, mode, type_, **kwargs): super(SelqaConfig, self).__init__(**kwargs) @@ -249,7 +249,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/sem_eval_2010_task_8/sem_eval_2010_task_8.py b/datasets/sem_eval_2010_task_8/sem_eval_2010_task_8.py index 2e6f004589d..a49603c21af 100644 --- a/datasets/sem_eval_2010_task_8/sem_eval_2010_task_8.py +++ b/datasets/sem_eval_2010_task_8/sem_eval_2010_task_8.py @@ -123,7 +123,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="us-ascii") as file: lines = file.readlines() num_lines_per_sample = 4 diff --git a/datasets/sem_eval_2014_task_1/sem_eval_2014_task_1.py b/datasets/sem_eval_2014_task_1/sem_eval_2014_task_1.py index 4c7f57fc99d..2857368d3a1 100644 --- a/datasets/sem_eval_2014_task_1/sem_eval_2014_task_1.py +++ b/datasets/sem_eval_2014_task_1/sem_eval_2014_task_1.py @@ -108,7 +108,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="us-ascii") as file: lines = file.readlines() diff --git a/datasets/sem_eval_2020_task_11/sem_eval_2020_task_11.py b/datasets/sem_eval_2020_task_11/sem_eval_2020_task_11.py index ab078763720..3a3a7f9e445 100644 --- a/datasets/sem_eval_2020_task_11/sem_eval_2020_task_11.py +++ b/datasets/sem_eval_2020_task_11/sem_eval_2020_task_11.py @@ -162,7 +162,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir, keys, split, labels, tc_test_template=None): - """ Yields examples. """ + """Yields examples.""" # Get the main path for the articles articles_path = os.path.join(data_dir, f"{split}-articles") diff --git a/datasets/sent_comp/sent_comp.py b/datasets/sent_comp/sent_comp.py index e956fa10b32..21e988339b0 100644 --- a/datasets/sent_comp/sent_comp.py +++ b/datasets/sent_comp/sent_comp.py @@ -130,7 +130,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths): - """ Yields examples. """ + """Yields examples.""" id_ = -1 for ix, filepath in enumerate(filepaths): with gzip.open(filepath, mode="rt", encoding="utf-8") as f: diff --git a/datasets/senti_lex/senti_lex.py b/datasets/senti_lex/senti_lex.py index 6f5d7ba087f..9405d5fd47c 100644 --- a/datasets/senti_lex/senti_lex.py +++ b/datasets/senti_lex/senti_lex.py @@ -197,7 +197,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" filepaths = [ os.path.join(data_dir, "sentiment-lexicons", "negative_words_" + self.config.name + ".txt"), diff --git a/datasets/senti_ws/senti_ws.py b/datasets/senti_ws/senti_ws.py index 32eca803d54..98277dc41d6 100644 --- a/datasets/senti_ws/senti_ws.py +++ b/datasets/senti_ws/senti_ws.py @@ -116,7 +116,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, sourcefiles, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/sepedi_ner/sepedi_ner.py b/datasets/sepedi_ner/sepedi_ner.py index 7ce005cf3ef..2941372248a 100644 --- a/datasets/sepedi_ner/sepedi_ner.py +++ b/datasets/sepedi_ner/sepedi_ner.py @@ -59,7 +59,7 @@ def __init__(self, **kwargs): class SepediNer(datasets.GeneratorBasedBuilder): - """ Sepedi Ner dataset""" + """Sepedi Ner dataset""" BUILDER_CONFIGS = [ SepediNerConfig( diff --git a/datasets/sesotho_ner_corpus/sesotho_ner_corpus.py b/datasets/sesotho_ner_corpus/sesotho_ner_corpus.py index c1f7743fd60..ea6f40bb75e 100644 --- a/datasets/sesotho_ner_corpus/sesotho_ner_corpus.py +++ b/datasets/sesotho_ner_corpus/sesotho_ner_corpus.py @@ -65,7 +65,7 @@ def __init__(self, **kwargs): class SesothoNerCorpus(datasets.GeneratorBasedBuilder): - """ SesothoNerCorpus Ner dataset""" + """SesothoNerCorpus Ner dataset""" BUILDER_CONFIGS = [ SesothoNerCorpusConfig( diff --git a/datasets/setswana_ner_corpus/setswana_ner_corpus.py b/datasets/setswana_ner_corpus/setswana_ner_corpus.py index 20ea8e6b05c..8c6cdf4d45f 100644 --- a/datasets/setswana_ner_corpus/setswana_ner_corpus.py +++ b/datasets/setswana_ner_corpus/setswana_ner_corpus.py @@ -56,7 +56,7 @@ def __init__(self, **kwargs): class SetswanaNerCorpus(datasets.GeneratorBasedBuilder): - """ Setswana Ner dataset""" + """Setswana Ner dataset""" BUILDER_CONFIGS = [ SetswanaNerCorpusConfig( diff --git a/datasets/siswati_ner_corpus/siswati_ner_corpus.py b/datasets/siswati_ner_corpus/siswati_ner_corpus.py index 10945dc80d1..c865dd46604 100644 --- a/datasets/siswati_ner_corpus/siswati_ner_corpus.py +++ b/datasets/siswati_ner_corpus/siswati_ner_corpus.py @@ -68,7 +68,7 @@ def __init__(self, **kwargs): class SiswatiNerCorpus(datasets.GeneratorBasedBuilder): - """ SiswatiNerCorpus Ner dataset""" + """SiswatiNerCorpus Ner dataset""" BUILDER_CONFIGS = [ SiswatiNerCorpusConfig( diff --git a/datasets/smartdata/smartdata.py b/datasets/smartdata/smartdata.py index 8ef79181d91..177fa02ec11 100644 --- a/datasets/smartdata/smartdata.py +++ b/datasets/smartdata/smartdata.py @@ -174,7 +174,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" NOT_WHITESPACE = re.compile(r"[^\s]") diff --git a/datasets/snow_simplified_japanese_corpus/snow_simplified_japanese_corpus.py b/datasets/snow_simplified_japanese_corpus/snow_simplified_japanese_corpus.py index 877e971b6a1..b5c65b6aad9 100644 --- a/datasets/snow_simplified_japanese_corpus/snow_simplified_japanese_corpus.py +++ b/datasets/snow_simplified_japanese_corpus/snow_simplified_japanese_corpus.py @@ -145,7 +145,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "rb") as f: df = pd.read_excel(f, engine="openpyxl").astype("str") diff --git a/datasets/so_stacksample/so_stacksample.py b/datasets/so_stacksample/so_stacksample.py index 7da9920016f..3a3f3bdd0c7 100644 --- a/datasets/so_stacksample/so_stacksample.py +++ b/datasets/so_stacksample/so_stacksample.py @@ -137,7 +137,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/sofc_materials_articles/sofc_materials_articles.py b/datasets/sofc_materials_articles/sofc_materials_articles.py index 4e2b622b426..08f029f52fb 100644 --- a/datasets/sofc_materials_articles/sofc_materials_articles.py +++ b/datasets/sofc_materials_articles/sofc_materials_articles.py @@ -49,7 +49,7 @@ class SOFCMaterialsArticles(datasets.GeneratorBasedBuilder): - """""" + """ """ VERSION = datasets.Version("1.1.0") @@ -280,7 +280,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, names, data_dir, split): - """ Yields examples. """ + """Yields examples.""" # The dataset consists of the original article text as well as annotations textfile_base_path = os.path.join(data_dir, "texts") annotations_base_path = os.path.join(data_dir, "annotations") diff --git a/datasets/spanish_billion_words/spanish_billion_words.py b/datasets/spanish_billion_words/spanish_billion_words.py index 0d7a336bcce..dfa0c24e1d6 100644 --- a/datasets/spanish_billion_words/spanish_billion_words.py +++ b/datasets/spanish_billion_words/spanish_billion_words.py @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, directory): - """ Yields examples. """ + """Yields examples.""" files = os.listdir(directory) files = sorted(files) _id = 0 diff --git a/datasets/stsb_multi_mt/stsb_multi_mt.py b/datasets/stsb_multi_mt/stsb_multi_mt.py index 1d9071c18c7..d18ff76083d 100644 --- a/datasets/stsb_multi_mt/stsb_multi_mt.py +++ b/datasets/stsb_multi_mt/stsb_multi_mt.py @@ -165,7 +165,7 @@ def _generate_examples( self, filepath, # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): - """ Yields examples as (key, example) tuples. """ + """Yields examples as (key, example) tuples.""" # This method handles input defined in _split_generators to yield (key, example) tuples from the dataset. # The `key` is here for legacy reason (tfds) and is not important in itself. with open(filepath, newline="", encoding="utf-8") as csvfile: diff --git a/datasets/swag/swag.py b/datasets/swag/swag.py index 2e45cf6eb0a..45e48794c0a 100644 --- a/datasets/swag/swag.py +++ b/datasets/swag/swag.py @@ -157,7 +157,7 @@ def _split_generators(self, dl_manager): return splits def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "r", encoding="utf-8") as f: lines = list(csv.reader(f, delimiter=",")) diff --git a/datasets/swahili/swahili.py b/datasets/swahili/swahili.py index 2d24bc75fce..55c1df750e8 100644 --- a/datasets/swahili/swahili.py +++ b/datasets/swahili/swahili.py @@ -97,7 +97,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" _id = 0 with open(filepath, mode="r", encoding="utf-8") as f: for line in f: diff --git a/datasets/tapaco/tapaco.py b/datasets/tapaco/tapaco.py index 5a5c90a62c4..473caeda562 100644 --- a/datasets/tapaco/tapaco.py +++ b/datasets/tapaco/tapaco.py @@ -199,7 +199,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" base_path = os.path.join(data_dir, "tapaco_v1.0") file_dict = {lang: os.path.join(base_path, lang + ".txt") for lang in self.config.languages} id_ = -1 diff --git a/datasets/ted_talks_iwslt/ted_talks_iwslt.py b/datasets/ted_talks_iwslt/ted_talks_iwslt.py index 0d08b5c3937..70c8dc4b1bb 100644 --- a/datasets/ted_talks_iwslt/ted_talks_iwslt.py +++ b/datasets/ted_talks_iwslt/ted_talks_iwslt.py @@ -193,7 +193,7 @@ class TedTalksIWSLTConfig(datasets.BuilderConfig): - """"Builder Config for the TedTalks IWSLT dataset""" + """ "Builder Config for the TedTalks IWSLT dataset""" def __init__(self, language_pair=(None, None), year=None, **kwargs): """BuilderConfig for TedTalks IWSLT dataset. @@ -290,7 +290,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/telugu_books/telugu_books.py b/datasets/telugu_books/telugu_books.py index 93aeb983ab3..1b4d381a9e5 100644 --- a/datasets/telugu_books/telugu_books.py +++ b/datasets/telugu_books/telugu_books.py @@ -95,7 +95,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader(csv_file) for id_, row in enumerate(csv_reader): diff --git a/datasets/telugu_news/telugu_news.py b/datasets/telugu_news/telugu_news.py index 45a5e4d9d30..1a82cad5834 100644 --- a/datasets/telugu_news/telugu_news.py +++ b/datasets/telugu_news/telugu_news.py @@ -108,7 +108,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as csv_file: csv_reader = csv.reader(csv_file) diff --git a/datasets/times_of_india_news_headlines/times_of_india_news_headlines.py b/datasets/times_of_india_news_headlines/times_of_india_news_headlines.py index 209e222ca74..225a85f8196 100644 --- a/datasets/times_of_india_news_headlines/times_of_india_news_headlines.py +++ b/datasets/times_of_india_news_headlines/times_of_india_news_headlines.py @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager): return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"path": path_to_manual_file})] def _generate_examples(self, path=None): - """ Yields examples. """ + """Yields examples.""" with open(path, encoding="utf8") as csv_file: csv_reader = csv.reader( csv_file, quotechar='"', delimiter=",", skipinitialspace=True, quoting=csv.QUOTE_ALL diff --git a/datasets/tmu_gfm_dataset/tmu_gfm_dataset.py b/datasets/tmu_gfm_dataset/tmu_gfm_dataset.py index 87232179877..1d35b23eb60 100644 --- a/datasets/tmu_gfm_dataset/tmu_gfm_dataset.py +++ b/datasets/tmu_gfm_dataset/tmu_gfm_dataset.py @@ -95,7 +95,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.reader(f) diff --git a/datasets/trec/trec.py b/datasets/trec/trec.py index 83abb7b638e..da020925c29 100644 --- a/datasets/trec/trec.py +++ b/datasets/trec/trec.py @@ -155,7 +155,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" # TODO: Yields (key, example) tuples from the dataset with open(filepath, "rb") as f: for id_, row in enumerate(f): diff --git a/datasets/tuple_ie/tuple_ie.py b/datasets/tuple_ie/tuple_ie.py index 8b8a9a7bd6c..ac07e54ddd2 100644 --- a/datasets/tuple_ie/tuple_ie.py +++ b/datasets/tuple_ie/tuple_ie.py @@ -105,7 +105,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, data_dir): - """ Yields examples. """ + """Yields examples.""" id_ = -1 for domain in self.config.domains: with open(os.path.join(data_dir, _DOMAIN_FILES[domain]), encoding="utf-8") as f: diff --git a/datasets/turk/turk.py b/datasets/turk/turk.py index ebc8285de85..4d844f5a642 100644 --- a/datasets/turk/turk.py +++ b/datasets/turk/turk.py @@ -107,7 +107,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths, split): - """ Yields examples. """ + """Yields examples.""" if split == "valid": split = "tune" files = [open(filepaths[f"{split}.8turkers.tok.norm"], encoding="utf-8")] + [ diff --git a/datasets/turkish_ner/turkish_ner.py b/datasets/turkish_ner/turkish_ner.py index 2e57f6d8a74..c25c203ca4a 100644 --- a/datasets/turkish_ner/turkish_ner.py +++ b/datasets/turkish_ner/turkish_ner.py @@ -152,7 +152,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) with open(filepath, encoding="utf-8") as f: diff --git a/datasets/turkish_shrinked_ner/turkish_shrinked_ner.py b/datasets/turkish_shrinked_ner/turkish_shrinked_ner.py index a03f22f33b3..52553cb741e 100644 --- a/datasets/turkish_shrinked_ner/turkish_shrinked_ner.py +++ b/datasets/turkish_shrinked_ner/turkish_shrinked_ner.py @@ -198,7 +198,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" logger.info("⏳ Generating examples from = %s", filepath) with open(filepath, encoding="utf-8") as f: diff --git a/datasets/tweet_eval/tweet_eval.py b/datasets/tweet_eval/tweet_eval.py index 94ad00e2bf5..ceccb463eaf 100644 --- a/datasets/tweet_eval/tweet_eval.py +++ b/datasets/tweet_eval/tweet_eval.py @@ -239,7 +239,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, text_path, labels_path): - """ Yields examples. """ + """Yields examples.""" with open(text_path, encoding="utf-8") as f: texts = f.readlines() diff --git a/datasets/tweet_qa/tweet_qa.py b/datasets/tweet_qa/tweet_qa.py index e5582da792f..c097408e261 100644 --- a/datasets/tweet_qa/tweet_qa.py +++ b/datasets/tweet_qa/tweet_qa.py @@ -97,7 +97,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: tweet_qa = json.load(f) diff --git a/datasets/tydiqa/tydiqa.py b/datasets/tydiqa/tydiqa.py index ff13ef85ad2..db9d8f6a6f7 100644 --- a/datasets/tydiqa/tydiqa.py +++ b/datasets/tydiqa/tydiqa.py @@ -41,7 +41,7 @@ class TydiqaConfig(datasets.BuilderConfig): - """ BuilderConfig for Tydiqa""" + """BuilderConfig for Tydiqa""" def __init__(self, **kwargs): """ diff --git a/datasets/web_nlg/web_nlg.py b/datasets/web_nlg/web_nlg.py index cb0142d6f2a..286d75ccf29 100644 --- a/datasets/web_nlg/web_nlg.py +++ b/datasets/web_nlg/web_nlg.py @@ -247,7 +247,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filedirs): - """ Yields examples. """ + """Yields examples.""" id_ = 0 for xml_location in filedirs: diff --git a/datasets/wi_locness/wi_locness.py b/datasets/wi_locness/wi_locness.py index cd711cb0e99..d8d35820df4 100644 --- a/datasets/wi_locness/wi_locness.py +++ b/datasets/wi_locness/wi_locness.py @@ -187,7 +187,7 @@ def _split_generators(self, dl_manager): assert False def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" if split == "validation": split = "dev" diff --git a/datasets/wiki_asp/wiki_asp.py b/datasets/wiki_asp/wiki_asp.py index 11b885ab353..9bdd5960b42 100644 --- a/datasets/wiki_asp/wiki_asp.py +++ b/datasets/wiki_asp/wiki_asp.py @@ -196,7 +196,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, row in enumerate(f): data = json.loads(row) diff --git a/datasets/wiki_bio/wiki_bio.py b/datasets/wiki_bio/wiki_bio.py index 0113585c1e6..86845cdc8b0 100644 --- a/datasets/wiki_bio/wiki_bio.py +++ b/datasets/wiki_bio/wiki_bio.py @@ -158,7 +158,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, id_file, infobox_file, nb_lines_file, sentences_file, article_title_file): - """ Yields examples.""" + """Yields examples.""" with open(id_file, "r", encoding="utf-8") as id_src, open( infobox_file, "r", encoding="utf-8" ) as infobox_src, open(nb_lines_file, "r", encoding="utf-8") as nb_lines_src, open( diff --git a/datasets/wiki_lingua/wiki_lingua.py b/datasets/wiki_lingua/wiki_lingua.py index a719e8b9914..58c839edf96 100644 --- a/datasets/wiki_lingua/wiki_lingua.py +++ b/datasets/wiki_lingua/wiki_lingua.py @@ -190,7 +190,7 @@ def _process_article(self, article): return processed_article def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, "rb") as f: data = pickle.load(f) for id_, row in enumerate(data.items()): diff --git a/datasets/wikicorpus/wikicorpus.py b/datasets/wikicorpus/wikicorpus.py index 9f22c535d58..2b4ac26cc79 100644 --- a/datasets/wikicorpus/wikicorpus.py +++ b/datasets/wikicorpus/wikicorpus.py @@ -55,7 +55,7 @@ class WikicorpusConfig(datasets.BuilderConfig): - """ BuilderConfig for Wikicorpus.""" + """BuilderConfig for Wikicorpus.""" def __init__(self, form=None, language=None, **kwargs): """ diff --git a/datasets/wino_bias/wino_bias.py b/datasets/wino_bias/wino_bias.py index bb469ba251b..a9a12a32f99 100644 --- a/datasets/wino_bias/wino_bias.py +++ b/datasets/wino_bias/wino_bias.py @@ -254,7 +254,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: id_ = 0 document_id = None diff --git a/datasets/winograd_wsc/winograd_wsc.py b/datasets/winograd_wsc/winograd_wsc.py index 4daf425f4a9..849a9a9c281 100644 --- a/datasets/winograd_wsc/winograd_wsc.py +++ b/datasets/winograd_wsc/winograd_wsc.py @@ -45,19 +45,19 @@ class WinogradWSCConfig(datasets.BuilderConfig): - """ BuilderConfig for WinogradWSC. """ + """BuilderConfig for WinogradWSC.""" def __init__(self, *args, language=None, inds=None, **kwargs): super().__init__(*args, **kwargs) self.inds = set(inds) if inds is not None else None def is_in_range(self, id): - """ Takes an index and tells you if it belongs to the configuration's subset """ + """Takes an index and tells you if it belongs to the configuration's subset""" return id in self.inds if self.inds is not None else True class WinogradWSC(datasets.GeneratorBasedBuilder): - """ The Winograd Schema Challenge Dataset """ + """The Winograd Schema Challenge Dataset""" BUILDER_CONFIG_CLASS = WinogradWSCConfig BUILDER_CONFIGS = [ diff --git a/datasets/winogrande/winogrande.py b/datasets/winogrande/winogrande.py index 1b863095d47..3296dcaf23b 100644 --- a/datasets/winogrande/winogrande.py +++ b/datasets/winogrande/winogrande.py @@ -31,7 +31,7 @@ class WinograndeConfig(datasets.BuilderConfig): - """ BuilderConfig for Discofuse""" + """BuilderConfig for Discofuse""" def __init__(self, data_size, **kwargs): """ diff --git a/datasets/wmt20_mlqe_task1/wmt20_mlqe_task1.py b/datasets/wmt20_mlqe_task1/wmt20_mlqe_task1.py index 34781e24960..b16a66025fc 100644 --- a/datasets/wmt20_mlqe_task1/wmt20_mlqe_task1.py +++ b/datasets/wmt20_mlqe_task1/wmt20_mlqe_task1.py @@ -156,7 +156,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split, source_lg, target_lg): - """ Yields examples. """ + """Yields examples.""" docids_filepath = os.path.join(filepath, f"{split}.doc_ids") with open(docids_filepath, encoding="utf-8") as f: docids = f.read().splitlines()[1:] diff --git a/datasets/wmt20_mlqe_task2/wmt20_mlqe_task2.py b/datasets/wmt20_mlqe_task2/wmt20_mlqe_task2.py index cfac69b6108..b2ab6bf3934 100644 --- a/datasets/wmt20_mlqe_task2/wmt20_mlqe_task2.py +++ b/datasets/wmt20_mlqe_task2/wmt20_mlqe_task2.py @@ -157,7 +157,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split, source_lg, target_lg): - """ Yields examples. """ + """Yields examples.""" def open_and_read(fp): with open(fp, encoding="utf-8") as f: diff --git a/datasets/wmt20_mlqe_task3/wmt20_mlqe_task3.py b/datasets/wmt20_mlqe_task3/wmt20_mlqe_task3.py index fe66c223f57..92b64f58e94 100644 --- a/datasets/wmt20_mlqe_task3/wmt20_mlqe_task3.py +++ b/datasets/wmt20_mlqe_task3/wmt20_mlqe_task3.py @@ -181,7 +181,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" def open_and_read(fp): with open(fp, encoding="utf-8") as f: diff --git a/datasets/wrbsc/wrbsc.py b/datasets/wrbsc/wrbsc.py index 21dd8e24e3e..e2dbf021eec 100644 --- a/datasets/wrbsc/wrbsc.py +++ b/datasets/wrbsc/wrbsc.py @@ -125,7 +125,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" # TODO: This method will receive as arguments the `gen_kwargs` defined in the previous `_split_generators` method. # It is in charge of opening the given file and yielding (key, example) tuples from the dataset # The key is not important, it's more here for legacy reason (legacy from tfds) diff --git a/datasets/xed_en_fi/xed_en_fi.py b/datasets/xed_en_fi/xed_en_fi.py index af6ce6b6086..2aac9b46d3a 100644 --- a/datasets/xed_en_fi/xed_en_fi.py +++ b/datasets/xed_en_fi/xed_en_fi.py @@ -127,7 +127,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: for id_, line in enumerate(f): if self.config.name == "en_neutral": diff --git a/datasets/xquad/xquad.py b/datasets/xquad/xquad.py index f3b8ddd737d..fbe70afe650 100644 --- a/datasets/xquad/xquad.py +++ b/datasets/xquad/xquad.py @@ -32,7 +32,7 @@ class XquadConfig(datasets.BuilderConfig): - """ BuilderConfig for Xquad""" + """BuilderConfig for Xquad""" def __init__(self, lang, **kwargs): """ diff --git a/datasets/xquad_r/xquad_r.py b/datasets/xquad_r/xquad_r.py index ab23e47ee3b..b92f660b585 100644 --- a/datasets/xquad_r/xquad_r.py +++ b/datasets/xquad_r/xquad_r.py @@ -49,7 +49,7 @@ class XquadRConfig(datasets.BuilderConfig): - """ BuilderConfig for XquadR""" + """BuilderConfig for XquadR""" def __init__(self, lang, **kwargs): """ diff --git a/datasets/xsum_factuality/xsum_factuality.py b/datasets/xsum_factuality/xsum_factuality.py index ad82ee3c6d9..9e47fbbae4a 100644 --- a/datasets/xsum_factuality/xsum_factuality.py +++ b/datasets/xsum_factuality/xsum_factuality.py @@ -142,7 +142,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: f_csv = csv.reader(f, delimiter=",", quotechar='"') diff --git a/datasets/yelp_review_full/yelp_review_full.py b/datasets/yelp_review_full/yelp_review_full.py index 66c4cf35576..847c7609fbb 100644 --- a/datasets/yelp_review_full/yelp_review_full.py +++ b/datasets/yelp_review_full/yelp_review_full.py @@ -113,7 +113,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepath, split): - """ Yields examples. """ + """Yields examples.""" with open(filepath, encoding="utf-8") as f: data = csv.reader(f, delimiter=",", quoting=csv.QUOTE_NONNUMERIC) diff --git a/datasets/youtube_caption_corrections/youtube_caption_corrections.py b/datasets/youtube_caption_corrections/youtube_caption_corrections.py index e5f295647eb..7623b5123ae 100644 --- a/datasets/youtube_caption_corrections/youtube_caption_corrections.py +++ b/datasets/youtube_caption_corrections/youtube_caption_corrections.py @@ -87,7 +87,7 @@ def _split_generators(self, dl_manager): ] def _generate_examples(self, filepaths): - """ Yields examples. """ + """Yields examples.""" for fp in filepaths: with open(fp, "r", encoding="utf-8") as json_file: json_lists = list(json_file) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 95585de6ac5..7ca3e52b043 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -87,12 +87,12 @@ def __init__(self, info: DatasetInfo, split: Optional[NamedSplit]): @property def info(self): - """ :class:`datasets.DatasetInfo` object containing all the metadata in the dataset.""" + """:class:`datasets.DatasetInfo` object containing all the metadata in the dataset.""" return self._info @property def split(self): - """ :class:`datasets.NamedSplit` object corresponding to a named dataset split.""" + """:class:`datasets.NamedSplit` object corresponding to a named dataset split.""" return self._split @property @@ -711,7 +711,7 @@ def num_rows(self) -> int: @property def column_names(self) -> List[str]: - """Names of the columns in the dataset. """ + """Names of the columns in the dataset.""" return self._data.column_names @property @@ -1114,7 +1114,7 @@ def rename(columns): return dataset def __len__(self): - """ Number of rows in the dataset.""" + """Number of rows in the dataset.""" return self.num_rows def __iter__(self): @@ -1675,7 +1675,7 @@ class NumExamplesMismatch(Exception): pass def validate_function_output(processed_inputs, indices): - """ Validate output of the map function. """ + """Validate output of the map function.""" if processed_inputs is not None and not isinstance(processed_inputs, (Mapping, pa.Table)): raise TypeError( "Provided `function` which is applied to all elements of table returns a variable of type {}. Make sure provided `function` returns a variable of type `dict` (or a pyarrow table) to update the dataset or `None` if you are only interested in side effects.".format( @@ -1695,7 +1695,7 @@ def validate_function_output(processed_inputs, indices): ) def apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples=False, offset=0): - """ Utility to apply the function on a selection of columns. """ + """Utility to apply the function on a selection of columns.""" nonlocal update_data fn_args = [inputs] if input_columns is None else [inputs[col] for col in input_columns] if offset == 0: diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index c12495ed5e7..e8db1b555e1 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -198,7 +198,7 @@ def __init__( self.pa_writer: Optional[pa.RecordBatchStreamWriter] = None def __len__(self): - """ Return the number of writed and staged examples """ + """Return the number of writed and staged examples""" return self._num_examples + len(self.current_examples) + len(self.current_rows) def __enter__(self): diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index e5029639245..ca493b7ebca 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -53,7 +53,7 @@ def num_rows(self) -> Dict[str, int]: @property def column_names(self) -> Dict[str, List[str]]: - """Names of the columns in each split of the dataset. """ + """Names of the columns in each split of the dataset.""" self._check_values_type() return {k: dataset.column_names for k, dataset in self.items()} diff --git a/src/datasets/metric.py b/src/datasets/metric.py index 59fde07f83c..9950718cef8 100644 --- a/src/datasets/metric.py +++ b/src/datasets/metric.py @@ -72,7 +72,7 @@ def __init__(self, info: MetricInfo): @property def info(self): - """ :class:`datasets.MetricInfo` object containing all the metadata in the metric.""" + """:class:`datasets.MetricInfo` object containing all the metadata in the metric.""" return self._metric_info @property @@ -233,7 +233,7 @@ def _build_data_dir(self): return builder_data_dir def _create_cache_file(self, timeout=1) -> Tuple[str, FileLock]: - """ Create a new cache file. If the default cache file is used, we generated a new hash. """ + """Create a new cache file. If the default cache file is used, we generated a new hash.""" file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{self.process_id}.arrow") filelock = None for i in range(self.max_concurrent_cache_files): @@ -547,7 +547,7 @@ def _download_and_prepare(self, dl_manager): return None def _compute(self, *, predictions=None, references=None, **kwargs) -> Dict[str, Any]: - """ This method defines the common API for all the metrics in the library """ + """This method defines the common API for all the metrics in the library""" raise NotImplementedError def __del__(self): diff --git a/src/datasets/utils/filelock.py b/src/datasets/utils/filelock.py index e0991142824..5d4061d2e24 100644 --- a/src/datasets/utils/filelock.py +++ b/src/datasets/utils/filelock.py @@ -86,7 +86,7 @@ class Timeout(TimeoutError): """ def __init__(self, lock_file): - """""" + """ """ #: The path of the file lock. self.lock_file = lock_file return None @@ -126,7 +126,7 @@ class BaseFileLock: """ def __init__(self, lock_file, timeout=-1): - """""" + """ """ # The path to the lock file. self._lock_file = lock_file @@ -173,7 +173,7 @@ def timeout(self): @timeout.setter def timeout(self, value): - """""" + """ """ self._timeout = float(value) return None diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py index e94fe6abe0f..809b8dc07dc 100644 --- a/src/datasets/utils/metadata.py +++ b/src/datasets/utils/metadata.py @@ -50,7 +50,7 @@ def yaml_block_from_readme(path: Path) -> Optional[str]: def metadata_dict_from_readme(path: Path) -> Optional[Dict[str, List[str]]]: - """"Loads a dataset's metadata from the dataset card (REAMDE.md), as a Python dict""" + """ "Loads a dataset's metadata from the dataset card (REAMDE.md), as a Python dict""" yaml_block = yaml_block_from_readme(path=path) if yaml_block is None: return None