From 14d9afbb7ae1b787c450261ca0ff374551993031 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:41:04 +0100 Subject: [PATCH] Fix missing info when loading some datasets from Parquet export (#6635) * set default config when single config parquet export * fix tests * fix test * Update src/datasets/utils/metadata.py Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --------- Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- src/datasets/utils/metadata.py | 2 +- tests/test_load.py | 4 ++-- tests/test_metadata_util.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py index 9d2e6d6389e..d2e763e9cb6 100644 --- a/src/datasets/utils/metadata.py +++ b/src/datasets/utils/metadata.py @@ -253,7 +253,7 @@ def to_dataset_card_data(self, dataset_card_data: DatasetCardData) -> None: def get_default_config_name(self) -> Optional[str]: default_config_name = None for config_name, metadata_config in self.items(): - if config_name == "default" or metadata_config.get("default"): + if len(self) == 1 or config_name == "default" or metadata_config.get("default"): if default_config_name is None: default_config_name = config_name else: diff --git a/tests/test_load.py b/tests/test_load.py index 269d7991e62..d6dab4f4707 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -523,7 +523,7 @@ def test_LocalDatasetModuleFactoryWithoutScript_with_single_config_in_metadata(s assert module_builder_configs[0].drop_labels is True # parameter is passed from metadata # config named "default" is automatically considered to be a default config - assert module_factory_result.builder_configs_parameters.default_config_name is None + assert module_factory_result.builder_configs_parameters.default_config_name == "custom" # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly assert "drop_labels" not in module_factory_result.builder_kwargs @@ -695,7 +695,7 @@ def test_HubDatasetModuleFactoryWithoutScript_with_one_default_config_in_metadat assert module_builder_configs[0].drop_labels is True # parameter is passed from metadata # config named "default" is automatically considered to be a default config - assert module_factory_result.builder_configs_parameters.default_config_name is None + assert module_factory_result.builder_configs_parameters.default_config_name == "custom" # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly assert "drop_labels" not in module_factory_result.builder_kwargs diff --git a/tests/test_metadata_util.py b/tests/test_metadata_util.py index d2d82903bed..3aeaea5f5b1 100644 --- a/tests/test_metadata_util.py +++ b/tests/test_metadata_util.py @@ -224,7 +224,7 @@ def test_from_yaml_string(self): @pytest.mark.parametrize( "readme_content, expected_metadata_configs_dict, expected_default_config_name", [ - (README_METADATA_SINGLE_CONFIG, EXPECTED_METADATA_SINGLE_CONFIG, None), + (README_METADATA_SINGLE_CONFIG, EXPECTED_METADATA_SINGLE_CONFIG, "custom"), (README_METADATA_TWO_CONFIGS_WITH_DEFAULT_FLAG, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_FLAG, "v2"), (README_METADATA_TWO_CONFIGS_WITH_DEFAULT_NAME, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_NAME, "default"), ],