diff --git a/great_expectations/rule_based_profiler/helpers/util.py b/great_expectations/rule_based_profiler/helpers/util.py index cf3d8ee7006e..9b64d5a37bfc 100644 --- a/great_expectations/rule_based_profiler/helpers/util.py +++ b/great_expectations/rule_based_profiler/helpers/util.py @@ -433,9 +433,9 @@ def integer_semantic_domain_type(domain: Domain) -> bool: """ This method examines "INFERRED_SEMANTIC_TYPE_KEY" attribute of "Domain" argument to check whether or not underlying "SemanticDomainTypes" enum value is an "integer". Because explicitly designated "SemanticDomainTypes.INTEGER" type - is unavaiable, "SemanticDomainTypes.LOGIC", "SemanticDomainTypes.BINARY", and "SemanticDomainTypes.IDENTIFIER", - are intepreted as taking on "integer" values. Note: In certain settings, this method should be used as pre-filter - to "NumericMetricRangeMultiBatchParameterBuilder._get_round_decimals_using_heuristics()". + is unavaiable, "SemanticDomainTypes.LOGIC" and "SemanticDomainTypes.IDENTIFIER" are intepreted as "integer" values. + + This method can be used "NumericMetricRangeMultiBatchParameterBuilder._get_round_decimals_using_heuristics()". Note: Inability to assess underlying "SemanticDomainTypes" details of "Domain" object produces "False" return value. @@ -457,7 +457,6 @@ def integer_semantic_domain_type(domain: Domain) -> bool: semantic_domain_type in [ SemanticDomainTypes.LOGIC, - SemanticDomainTypes.BINARY, SemanticDomainTypes.IDENTIFIER, ] for semantic_domain_type in (inferred_semantic_domain_type.values()) diff --git a/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py index 934a404a899e..b6e241fe419c 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py @@ -12,6 +12,7 @@ compute_bootstrap_quantiles_point_estimate, compute_quantiles, get_parameter_value_and_validate_return_type, + integer_semantic_domain_type, ) from great_expectations.rule_based_profiler.parameter_builder import ( AttributedResolvedMetrics, @@ -351,12 +352,17 @@ def _estimate_metric_value_range( lower_bound: Optional[float] = truncate_values.get("lower_bound") upper_bound: Optional[float] = truncate_values.get("upper_bound") - round_decimals: int = self._get_round_decimals_using_heuristics( - metric_values=metric_values, - domain=domain, - variables=variables, - parameters=parameters, - ) + round_decimals: int + + if integer_semantic_domain_type(domain=domain): + round_decimals = 0 + else: + round_decimals = self._get_round_decimals_using_heuristics( + metric_values=metric_values, + domain=domain, + variables=variables, + parameters=parameters, + ) min_value: Number max_value: Number diff --git a/tests/rule_based_profiler/domain_builder/test_domain.py b/tests/rule_based_profiler/domain_builder/test_domain.py index 0d5fc8e15d62..655d5fa80658 100644 --- a/tests/rule_based_profiler/domain_builder/test_domain.py +++ b/tests/rule_based_profiler/domain_builder/test_domain.py @@ -212,7 +212,9 @@ def test_integer_semantic_domain_type(): domain = Domain( rule_name="my_rule", domain_type="column", - domain_kwargs={"column": "passenger_count"}, + domain_kwargs={ + "column": "passenger_count", + }, details={ INFERRED_SEMANTIC_TYPE_KEY: { "VendorID": SemanticDomainTypes.NUMERIC, @@ -224,10 +226,114 @@ def test_integer_semantic_domain_type(): domain = Domain( rule_name="my_rule", domain_type="column", - domain_kwargs={"column": "VendorID"}, + domain_kwargs={ + "column": "VendorID", + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "VendorID": SemanticDomainTypes.IDENTIFIER, + }, + }, + ) + assert integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column": "is_night_time", + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "is_night_time": SemanticDomainTypes.LOGIC, + }, + }, + ) + assert integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column_A": "passenger_count", + "column_B": "fare_amount", + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "passenger_count": SemanticDomainTypes.NUMERIC, + "fare_amount": SemanticDomainTypes.NUMERIC, + }, + }, + ) + assert not integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column_A": "passenger_count", + "column_B": "VendorID", + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "passenger_count": SemanticDomainTypes.NUMERIC, + "VendorID": SemanticDomainTypes.IDENTIFIER, + }, + }, + ) + assert not integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column_A": "is_night_time", + "column_B": "VendorID", + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "is_night_time": SemanticDomainTypes.LOGIC, + "VendorID": SemanticDomainTypes.IDENTIFIER, + }, + }, + ) + assert integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column_list": [ + "passenger_count", + "fare_amount", + "is_night_time", + ], + }, + details={ + INFERRED_SEMANTIC_TYPE_KEY: { + "passenger_count": SemanticDomainTypes.NUMERIC, + "fare_amount": SemanticDomainTypes.NUMERIC, + "is_night_time": SemanticDomainTypes.LOGIC, + }, + }, + ) + assert not integer_semantic_domain_type(domain=domain) + + domain = Domain( + rule_name="my_rule", + domain_type="column", + domain_kwargs={ + "column_list": [ + "VendorID", + "fare_amount", + "is_night_time", + ], + }, details={ INFERRED_SEMANTIC_TYPE_KEY: { "VendorID": SemanticDomainTypes.IDENTIFIER, + "RatecodeID": SemanticDomainTypes.IDENTIFIER, + "is_night_time": SemanticDomainTypes.LOGIC, }, }, )