diff --git a/datachecks/core/configuration/configuration_parser.py b/datachecks/core/configuration/configuration_parser.py index a859a1f0..4b669b4b 100644 --- a/datachecks/core/configuration/configuration_parser.py +++ b/datachecks/core/configuration/configuration_parser.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import glob from abc import ABC +from pathlib import Path from typing import Dict, List, TypeVar, Union from pyparsing import Combine, Group, Literal @@ -292,13 +294,8 @@ def parse(self, config_list: List[Dict]) -> Dict[str, MetricConfiguration]: return metric_configurations -def load_configuration_from_yaml_str(yaml_string: str) -> Configuration: - """ - Load configuration from a yaml string - """ +def _parse_configuration_from_dict(config_dict: Dict) -> Configuration: try: - config_dict: Dict = parse_config(data=yaml_string) - data_source_configurations = DataSourceConfigParser().parse( config_list=config_dict["data_sources"] ) @@ -310,7 +307,7 @@ def load_configuration_from_yaml_str(yaml_string: str) -> Configuration: data_sources=data_source_configurations, metrics=metric_configurations ) - if "storage" in config_dict: + if "storage" in config_dict and config_dict["storage"] is not None: configuration.storage = StorageConfigParser().parse( config=config_dict["storage"] ) @@ -321,13 +318,58 @@ def load_configuration_from_yaml_str(yaml_string: str) -> Configuration: ) -def load_configuration(file_path: str) -> Configuration: +def load_configuration_from_yaml_str(yaml_string: str) -> Configuration: + """ + Load configuration from a yaml string + """ + try: + config_dict: Dict = parse_config(data=yaml_string) + except Exception as ex: + raise DataChecksConfigurationError( + message=f"Failed to parse configuration: {str(ex)}" + ) + return _parse_configuration_from_dict(config_dict=config_dict) + + +def load_configuration(configuration_path: str) -> Configuration: """ Load configuration from a yaml file - :param file_path: + :param configuration_path: :return: """ - with open(file_path) as config_yaml_file: - yaml_string = config_yaml_file.read() - return load_configuration_from_yaml_str(yaml_string) + path = Path(configuration_path) + if not path.exists(): + raise DataChecksConfigurationError( + message=f"Configuration file {configuration_path} does not exist" + ) + if path.is_file(): + with open(configuration_path) as config_yaml_file: + yaml_string = config_yaml_file.read() + return load_configuration_from_yaml_str(yaml_string) + else: + config_files = glob.glob(f"{configuration_path}/*.yaml") + if len(config_files) == 0: + raise DataChecksConfigurationError( + message=f"No configuration files found in {configuration_path}" + ) + else: + config_dict_list: List[Dict] = [] + for config_file in config_files: + with open(config_file) as config_yaml_file: + yaml_string = config_yaml_file.read() + config_dict: Dict = parse_config(data=yaml_string) + config_dict_list.append(config_dict) + + final_config_dict = { + "data_sources": [], + "metrics": [], + "storage": None, + } + for config_dict in config_dict_list: + final_config_dict["data_sources"].extend(config_dict["data_sources"]) + final_config_dict["metrics"].extend(config_dict["metrics"]) + if "storage" in config_dict: + final_config_dict["storage"] = config_dict["storage"] + + return _parse_configuration_from_dict(final_config_dict) diff --git a/tests/integration/configuration/__init__.py b/tests/integration/configuration/__init__.py new file mode 100644 index 00000000..95dc1b8e --- /dev/null +++ b/tests/integration/configuration/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022-present, the Waterdip Labs Pvt. Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/example_config.yaml b/tests/integration/configuration/test_config.yaml similarity index 100% rename from example_config.yaml rename to tests/integration/configuration/test_config.yaml diff --git a/tests/integration/configuration/test_configurations.py b/tests/integration/configuration/test_configurations.py new file mode 100644 index 00000000..c1b02fe0 --- /dev/null +++ b/tests/integration/configuration/test_configurations.py @@ -0,0 +1,37 @@ +# Copyright 2022-present, the Waterdip Labs Pvt. Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + +from datachecks.core import Configuration, load_configuration + +current_path = pathlib.Path(__file__).parent.resolve() + + +def test_should_parse_single_config_file(): + configuration: Configuration = load_configuration( + f"{current_path}/test_config.yaml" + ) + assert configuration is not None + assert len(configuration.data_sources) == 3 + assert len(configuration.metrics) == 5 + + +def test_should_parse_multiple_config_files(): + configuration: Configuration = load_configuration( + f"{current_path}/test_configurations" + ) + assert configuration is not None + assert len(configuration.data_sources) == 2 + assert len(configuration.metrics) == 2 diff --git a/tests/integration/configuration/test_configurations/test_config_1.yaml b/tests/integration/configuration/test_configurations/test_config_1.yaml new file mode 100644 index 00000000..85117e06 --- /dev/null +++ b/tests/integration/configuration/test_configurations/test_config_1.yaml @@ -0,0 +1,17 @@ +# Data sources to query +data_sources: + - name: search_datastore # Data source name + type: opensearch # Data source type is OpenSearch + connection: + host: 127.0.0.1 + port: 9205 + username: !ENV ${OS_USER} # Username to use for authentication ENV variables + password: !ENV ${OS_PASS} # Password to use for authentication ENV variables + +# Metrics to generate +metrics: + - name: count_us_parts + metric_type: document_count + resource: search_datastore.product_data_us + filters: + where: '{"match_all" : {}}' diff --git a/tests/integration/configuration/test_configurations/test_config_2.yaml b/tests/integration/configuration/test_configurations/test_config_2.yaml new file mode 100644 index 00000000..28cd9be9 --- /dev/null +++ b/tests/integration/configuration/test_configurations/test_config_2.yaml @@ -0,0 +1,18 @@ +# Data sources to query +data_sources: + - name: search_staging_db # Data source name + type: postgres # Data source type is Postgres + connection: + host: 127.0.0.1 + port: 5422 + username: !ENV ${DB2_USER} # Username to use for authentication ENV variables + password: !ENV ${DB2_PASS} # Password to use for authentication ENV variables + database: dc_db_2 + +# Metrics to generate +metrics: + - name: count_us_parts_not_valid + metric_type: row_count + resource: search_staging_db.products + filters: + where: "is_valid is False and country_code = 'US'"