Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 54 additions & 12 deletions datachecks/core/configuration/configuration_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
from abc import ABC
from pathlib import Path
from typing import Dict, List, TypeVar, Union

from pyparsing import Combine, Group, Literal
Expand Down Expand Up @@ -292,13 +294,8 @@ def parse(self, config_list: List[Dict]) -> Dict[str, MetricConfiguration]:
return metric_configurations


def load_configuration_from_yaml_str(yaml_string: str) -> Configuration:
"""
Load configuration from a yaml string
"""
def _parse_configuration_from_dict(config_dict: Dict) -> Configuration:
try:
config_dict: Dict = parse_config(data=yaml_string)

data_source_configurations = DataSourceConfigParser().parse(
config_list=config_dict["data_sources"]
)
Expand All @@ -310,7 +307,7 @@ def load_configuration_from_yaml_str(yaml_string: str) -> Configuration:
data_sources=data_source_configurations, metrics=metric_configurations
)

if "storage" in config_dict:
if "storage" in config_dict and config_dict["storage"] is not None:
configuration.storage = StorageConfigParser().parse(
config=config_dict["storage"]
)
Expand All @@ -321,13 +318,58 @@ def load_configuration_from_yaml_str(yaml_string: str) -> Configuration:
)


def load_configuration(file_path: str) -> Configuration:
def load_configuration_from_yaml_str(yaml_string: str) -> Configuration:
"""
Load configuration from a yaml string
"""
try:
config_dict: Dict = parse_config(data=yaml_string)
except Exception as ex:
raise DataChecksConfigurationError(
message=f"Failed to parse configuration: {str(ex)}"
)
return _parse_configuration_from_dict(config_dict=config_dict)


def load_configuration(configuration_path: str) -> Configuration:
"""
Load configuration from a yaml file
:param file_path:
:param configuration_path:
:return:
"""
with open(file_path) as config_yaml_file:
yaml_string = config_yaml_file.read()

return load_configuration_from_yaml_str(yaml_string)
path = Path(configuration_path)
if not path.exists():
raise DataChecksConfigurationError(
message=f"Configuration file {configuration_path} does not exist"
)
if path.is_file():
with open(configuration_path) as config_yaml_file:
yaml_string = config_yaml_file.read()
return load_configuration_from_yaml_str(yaml_string)
else:
config_files = glob.glob(f"{configuration_path}/*.yaml")
if len(config_files) == 0:
raise DataChecksConfigurationError(
message=f"No configuration files found in {configuration_path}"
)
else:
config_dict_list: List[Dict] = []
for config_file in config_files:
with open(config_file) as config_yaml_file:
yaml_string = config_yaml_file.read()
config_dict: Dict = parse_config(data=yaml_string)
config_dict_list.append(config_dict)

final_config_dict = {
"data_sources": [],
"metrics": [],
"storage": None,
}
for config_dict in config_dict_list:
final_config_dict["data_sources"].extend(config_dict["data_sources"])
final_config_dict["metrics"].extend(config_dict["metrics"])
if "storage" in config_dict:
final_config_dict["storage"] = config_dict["storage"]

return _parse_configuration_from_dict(final_config_dict)
13 changes: 13 additions & 0 deletions tests/integration/configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
File renamed without changes.
37 changes: 37 additions & 0 deletions tests/integration/configuration/test_configurations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pathlib

from datachecks.core import Configuration, load_configuration

current_path = pathlib.Path(__file__).parent.resolve()


def test_should_parse_single_config_file():
configuration: Configuration = load_configuration(
f"{current_path}/test_config.yaml"
)
assert configuration is not None
assert len(configuration.data_sources) == 3
assert len(configuration.metrics) == 5


def test_should_parse_multiple_config_files():
configuration: Configuration = load_configuration(
f"{current_path}/test_configurations"
)
assert configuration is not None
assert len(configuration.data_sources) == 2
assert len(configuration.metrics) == 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Data sources to query
data_sources:
- name: search_datastore # Data source name
type: opensearch # Data source type is OpenSearch
connection:
host: 127.0.0.1
port: 9205
username: !ENV ${OS_USER} # Username to use for authentication ENV variables
password: !ENV ${OS_PASS} # Password to use for authentication ENV variables

# Metrics to generate
metrics:
- name: count_us_parts
metric_type: document_count
resource: search_datastore.product_data_us
filters:
where: '{"match_all" : {}}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Data sources to query
data_sources:
- name: search_staging_db # Data source name
type: postgres # Data source type is Postgres
connection:
host: 127.0.0.1
port: 5422
username: !ENV ${DB2_USER} # Username to use for authentication ENV variables
password: !ENV ${DB2_PASS} # Password to use for authentication ENV variables
database: dc_db_2

# Metrics to generate
metrics:
- name: count_us_parts_not_valid
metric_type: row_count
resource: search_staging_db.products
filters:
where: "is_valid is False and country_code = 'US'"