Skip to content

Commit

Permalink
Merge branch 'develop' into f/DX-101/spark-id-pk
Browse files Browse the repository at this point in the history
* develop:
  [MAINTENANCE] Enrich `RendererConfiguration` primitive types (#6629)
  Skip postgres tests in spark. (#6643)
  [MAINTENANCE] Batch ID must incorporate batch_spec_passthrough.  Instantiate Validator with DataContext in test modules.  Query metrics/expectations types cleanup. (#6636)
  [FEATURE] Use docstring linter for public api to catch missing parameters (#6642)
  [FEATURE] ID/PK Rendering in DataDocs (#6637)
  [FEATURE]  ZEP - Load/dump new style config from DataContext (#6631)
  [MAINTENANCE] Add docstring linter for public api to CI (#6641)
  • Loading branch information
Will Shin committed Dec 26, 2022
2 parents 7f9770a + a0ddfe7 commit 741d652
Show file tree
Hide file tree
Showing 79 changed files with 2,401 additions and 928 deletions.
7 changes: 7 additions & 0 deletions azure-pipelines-dev.yml
Expand Up @@ -123,6 +123,13 @@ stages:
- bash: python scripts/check_docstring_coverage.py
name: DocstringChecker

- job: docstring_linter
steps:
- script: |
pip install --requirement reqs/requirements-dev-test.txt
invoke docstrings
name: DocstringLinter
- job: unused_import_checker
steps:
- script: |
Expand Down
10 changes: 8 additions & 2 deletions azure-pipelines.yml
Expand Up @@ -106,6 +106,14 @@ stages:
- bash: python scripts/check_docstring_coverage.py
name: DocstringChecker

- job: docstring_linter
condition: or(eq(variables.isScheduled, true), eq(variables.isReleasePrep, true), eq(variables.isRelease, true), eq(variables.isManual, true))
steps:
- script: |
pip install --requirement reqs/requirements-dev-test.txt
invoke docstrings
name: DocstringLinter
- job: unused_import_checker
condition: or(eq(variables.isScheduled, true), eq(variables.isReleasePrep, true), eq(variables.isRelease, true), eq(variables.isManual, true))
steps:
Expand Down Expand Up @@ -260,8 +268,6 @@ stages:
python.version: '3.8'
Python39:
python.version: '3.9'
services:
postgres: postgres
variables:
IMAGE_SUFFIX: $[ dependencies.make_suffix.outputs['suffix.IMAGE_SUFFIX'] ]
steps:
Expand Down
@@ -1,6 +1,7 @@
from typing import Optional, Union

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.expectations.expectation import (
ExpectationValidationResult,
Expand Down Expand Up @@ -56,8 +57,6 @@ def validate_configuration(
"""

super().validate_configuration(configuration)
if configuration is None:
configuration = self.configuration

def _validate(
self,
Expand All @@ -66,9 +65,8 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

query_result = metrics.get("query.template_values")
num_of_duplicates = query_result[0][0]
metrics = convert_to_json_serializable(data=metrics)
num_of_duplicates = list(metrics.get("query.template_values")[0].values())[0]

if not num_of_duplicates:
return {
Expand Down
Expand Up @@ -15,10 +15,6 @@
ExpectationValidationResult,
QueryExpectation,
)
from great_expectations.expectations.metrics.import_manager import (
pyspark_sql_Row,
sqlalchemy_engine_Row,
)


class ExpectQueriedColumnPairValuesToHaveDiff(QueryExpectation):
Expand Down Expand Up @@ -73,9 +69,8 @@ def _validate(
) -> Union[ExpectationValidationResult, dict]:
diff: Union[float, int] = configuration["kwargs"].get("diff")
mostly: str = configuration["kwargs"].get("mostly")
query_result: Union[sqlalchemy_engine_Row, pyspark_sql_Row] = metrics.get(
"query.column_pair"
)
query_result = metrics.get("query.column_pair")
query_result = dict([element.values() for element in query_result])

success = (
sum([(abs(x[0]) == diff) for x in query_result]) / len(query_result)
Expand Down
Expand Up @@ -60,8 +60,6 @@ def validate_configuration(
"""

super().validate_configuration(configuration)
if configuration is None:
configuration = self.configuration

def _validate(
self,
Expand All @@ -70,9 +68,8 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

query_result = metrics.get("query.template_values")
query_result = dict(query_result)

if not query_result:
return {
"info": "The column values are unique, under the condition",
Expand Down
Expand Up @@ -70,7 +70,6 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

template_dict = self.validate_template_dict(configuration)
query_result = metrics.get("query.template_values")
actual_num_of_distinct_values = len(query_result)
Expand Down
Expand Up @@ -78,11 +78,10 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

value = configuration["kwargs"].get("value")
threshold = configuration["kwargs"].get("threshold")
query_result = metrics.get("query.column")
query_result = dict(query_result)
query_result = dict([element.values() for element in query_result])

if isinstance(value, list):
success = all(
Expand Down
@@ -1,6 +1,7 @@
from typing import Optional, Union

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.expectations.expectation import (
ExpectationValidationResult,
Expand Down Expand Up @@ -61,16 +62,11 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:
success = False

query_result = metrics.get("query.template_values")
num_of_missing_rows = query_result[0][0]

if num_of_missing_rows == 0:
success = True
metrics = convert_to_json_serializable(data=metrics)
num_of_missing_rows = list(metrics.get("query.template_values")[0].values())[0]

return {
"success": success,
"success": num_of_missing_rows == 0,
"result": {
"Rows with IDs in first table missing in second table": num_of_missing_rows
},
Expand Down
Expand Up @@ -2,6 +2,7 @@
from typing import Optional, Union

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.exceptions.exceptions import (
InvalidExpectationConfigurationError,
)
Expand All @@ -27,10 +28,10 @@ class ExpectQueriedSlowlyChangingTableToHaveNoGaps(QueryExpectation):
metric_dependencies = ("query.template_values",)

query = """
SELECT SUM(CASE WHEN {close_date_column} != COALESCE(next_start_date, {close_date_column}) THEN 1 ELSE 0 END),
SELECT SUM(CASE WHEN {close_date_column} != COALESCE(next_start_date, {close_date_column}) THEN 1 ELSE 0 END),
COUNT(1)
FROM(SELECT {primary_key}, {close_date_column}, LEAD({open_date_column}) OVER(PARTITION BY {primary_key} ORDER BY
{open_date_column}) AS next_start_date
FROM(SELECT {primary_key}, {close_date_column}, LEAD({open_date_column}) OVER(PARTITION BY {primary_key} ORDER BY
{open_date_column}) AS next_start_date
FROM {active_batch})
"""

Expand Down Expand Up @@ -66,20 +67,18 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

success = False
threshold = configuration["kwargs"].get("threshold")
if not threshold:
threshold = self.default_kwarg_values["threshold"]
query_result = metrics.get("query.template_values")
holes_count, total_count = query_result[0]
error_rate = holes_count / total_count

if error_rate <= threshold:
success = True
metrics = convert_to_json_serializable(data=metrics)
holes_count, total_count = list(
metrics.get("query.template_values")[0].values()
)[0]
error_rate = holes_count / total_count

return {
"success": success,
"success": error_rate <= threshold,
"result": {
"threshold": threshold,
"holes_count": holes_count,
Expand Down
Expand Up @@ -7,6 +7,7 @@
from typing import Optional, Union

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.exceptions.exceptions import (
InvalidExpectationConfigurationError,
)
Expand Down Expand Up @@ -64,9 +65,9 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

metrics = convert_to_json_serializable(data=metrics)
query_result = list(metrics.get("query.table")[0].values())[0]
value = configuration["kwargs"].get("value")
query_result = metrics.get("query.table")[0][0]

success = query_result == value

Expand Down
Expand Up @@ -7,6 +7,7 @@
from typing import Optional, Union

from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.exceptions.exceptions import (
InvalidExpectationConfigurationError,
)
Expand All @@ -15,10 +16,6 @@
ExpectationValidationResult,
QueryExpectation,
)
from great_expectations.expectations.metrics.import_manager import (
pyspark_sql_Row,
sqlalchemy_engine_Row,
)


class ExpectQueryCountWithFilterToMeetThreshold(QueryExpectation):
Expand Down Expand Up @@ -68,16 +65,12 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:

query_result: Union[sqlalchemy_engine_Row, pyspark_sql_Row] = metrics.get(
"query.template_dict"
)
metrics = convert_to_json_serializable(data=metrics)
count: int = list(metrics.get("query.template_values")[0].values())[0]
threshold: Union[float, int] = configuration["kwargs"].get("threshold")
count: int = query_result[0][0]
success: bool = count >= threshold

return {
"success": success,
"success": count >= threshold,
"result": {"observed_value": count},
}

Expand Down
Expand Up @@ -13,10 +13,6 @@
ExpectationValidationResult,
QueryExpectation,
)
from great_expectations.expectations.metrics.import_manager import (
pyspark_sql_Row,
sqlalchemy_engine_Row,
)


class ExpectQueryToHaveNoDuplicateValueCombinations(QueryExpectation):
Expand Down Expand Up @@ -56,18 +52,16 @@ def _validate(
runtime_configuration: dict = None,
execution_engine: ExecutionEngine = None,
) -> Union[ExpectationValidationResult, dict]:
query_result = metrics.get("query.multiple_columns")
query_result = dict([element.values() for element in query_result])

query_result: Union[sqlalchemy_engine_Row, pyspark_sql_Row] = metrics.get(
"query.multiple_columns"
)
success: bool = query_result == []
columns = configuration["kwargs"].get("columns")
duplicates = [
dict(zip(columns + ["no_occurrences"], row)) for row in query_result
]

return {
"success": success,
"success": not query_result,
"result": {"observed_value": duplicates},
}

Expand Down
Expand Up @@ -15,11 +15,7 @@
#!!! This giant block of imports should be something simpler, such as:
# from great_exepectations.helpers.expectation_creation import *
from great_expectations.exceptions import InvalidExpectationConfigurationError
from great_expectations.execution_engine import (
ExecutionEngine,
PandasExecutionEngine,
SparkDFExecutionEngine,
)
from great_expectations.execution_engine import ExecutionEngine
from great_expectations.execution_engine.execution_engine import MetricDomainTypes
from great_expectations.execution_engine.sqlalchemy_execution_engine import (
SqlAlchemyExecutionEngine,
Expand All @@ -28,16 +24,10 @@
TableExpectation,
render_evaluation_parameter_string,
)
from great_expectations.expectations.metrics.import_manager import F, sa
from great_expectations.expectations.metrics.metric_provider import metric_value
from great_expectations.expectations.metrics.table_metric_provider import (
TableMetricProvider,
)
from great_expectations.expectations.registry import (
_registered_expectations,
_registered_metrics,
_registered_renderers,
)
from great_expectations.render import RenderedStringTemplateContent
from great_expectations.render.renderer.renderer import renderer
from great_expectations.render.util import substitute_none_for_missing
Expand Down

0 comments on commit 741d652

Please sign in to comment.