Skip to content

Commit

Permalink
unit tests for inferring semantic domain from Domain object
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Sherstinsky committed Apr 29, 2022
1 parent e68b640 commit 59387ad
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 12 deletions.
7 changes: 3 additions & 4 deletions great_expectations/rule_based_profiler/helpers/util.py
Expand Up @@ -433,9 +433,9 @@ def integer_semantic_domain_type(domain: Domain) -> bool:
"""
This method examines "INFERRED_SEMANTIC_TYPE_KEY" attribute of "Domain" argument to check whether or not underlying
"SemanticDomainTypes" enum value is an "integer". Because explicitly designated "SemanticDomainTypes.INTEGER" type
is unavaiable, "SemanticDomainTypes.LOGIC", "SemanticDomainTypes.BINARY", and "SemanticDomainTypes.IDENTIFIER",
are intepreted as taking on "integer" values. Note: In certain settings, this method should be used as pre-filter
to "NumericMetricRangeMultiBatchParameterBuilder._get_round_decimals_using_heuristics()".
is unavaiable, "SemanticDomainTypes.LOGIC" and "SemanticDomainTypes.IDENTIFIER" are intepreted as "integer" values.
This method can be used "NumericMetricRangeMultiBatchParameterBuilder._get_round_decimals_using_heuristics()".
Note: Inability to assess underlying "SemanticDomainTypes" details of "Domain" object produces "False" return value.
Expand All @@ -457,7 +457,6 @@ def integer_semantic_domain_type(domain: Domain) -> bool:
semantic_domain_type
in [
SemanticDomainTypes.LOGIC,
SemanticDomainTypes.BINARY,
SemanticDomainTypes.IDENTIFIER,
]
for semantic_domain_type in (inferred_semantic_domain_type.values())
Expand Down
Expand Up @@ -12,6 +12,7 @@
compute_bootstrap_quantiles_point_estimate,
compute_quantiles,
get_parameter_value_and_validate_return_type,
integer_semantic_domain_type,
)
from great_expectations.rule_based_profiler.parameter_builder import (
AttributedResolvedMetrics,
Expand Down Expand Up @@ -351,12 +352,17 @@ def _estimate_metric_value_range(
lower_bound: Optional[float] = truncate_values.get("lower_bound")
upper_bound: Optional[float] = truncate_values.get("upper_bound")

round_decimals: int = self._get_round_decimals_using_heuristics(
metric_values=metric_values,
domain=domain,
variables=variables,
parameters=parameters,
)
round_decimals: int

if integer_semantic_domain_type(domain=domain):
round_decimals = 0
else:
round_decimals = self._get_round_decimals_using_heuristics(
metric_values=metric_values,
domain=domain,
variables=variables,
parameters=parameters,
)

min_value: Number
max_value: Number
Expand Down
110 changes: 108 additions & 2 deletions tests/rule_based_profiler/domain_builder/test_domain.py
Expand Up @@ -212,7 +212,9 @@ def test_integer_semantic_domain_type():
domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={"column": "passenger_count"},
domain_kwargs={
"column": "passenger_count",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"VendorID": SemanticDomainTypes.NUMERIC,
Expand All @@ -224,10 +226,114 @@ def test_integer_semantic_domain_type():
domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={"column": "VendorID"},
domain_kwargs={
"column": "VendorID",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"VendorID": SemanticDomainTypes.IDENTIFIER,
},
},
)
assert integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column": "is_night_time",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"is_night_time": SemanticDomainTypes.LOGIC,
},
},
)
assert integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column_A": "passenger_count",
"column_B": "fare_amount",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"passenger_count": SemanticDomainTypes.NUMERIC,
"fare_amount": SemanticDomainTypes.NUMERIC,
},
},
)
assert not integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column_A": "passenger_count",
"column_B": "VendorID",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"passenger_count": SemanticDomainTypes.NUMERIC,
"VendorID": SemanticDomainTypes.IDENTIFIER,
},
},
)
assert not integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column_A": "is_night_time",
"column_B": "VendorID",
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"is_night_time": SemanticDomainTypes.LOGIC,
"VendorID": SemanticDomainTypes.IDENTIFIER,
},
},
)
assert integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column_list": [
"passenger_count",
"fare_amount",
"is_night_time",
],
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"passenger_count": SemanticDomainTypes.NUMERIC,
"fare_amount": SemanticDomainTypes.NUMERIC,
"is_night_time": SemanticDomainTypes.LOGIC,
},
},
)
assert not integer_semantic_domain_type(domain=domain)

domain = Domain(
rule_name="my_rule",
domain_type="column",
domain_kwargs={
"column_list": [
"VendorID",
"fare_amount",
"is_night_time",
],
},
details={
INFERRED_SEMANTIC_TYPE_KEY: {
"VendorID": SemanticDomainTypes.IDENTIFIER,
"RatecodeID": SemanticDomainTypes.IDENTIFIER,
"is_night_time": SemanticDomainTypes.LOGIC,
},
},
)
Expand Down

0 comments on commit 59387ad

Please sign in to comment.