diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py index d6ec23a0b1e9..cd255072129f 100644 --- a/great_expectations/data_context/data_context.py +++ b/great_expectations/data_context/data_context.py @@ -78,6 +78,7 @@ DataContextConfigDefaults, DatasourceConfig, GeCloudConfig, + ProgressBarsConfig, anonymizedUsageStatisticsSchema, dataContextConfigSchema, datasourceConfigSchema, @@ -824,6 +825,10 @@ def anonymous_usage_statistics(self): def concurrency(self) -> ConcurrencyConfig: return self.project_config_with_variables_substituted.concurrency + @property + def progress_bars(self) -> Optional[ProgressBarsConfig]: + return self.project_config_with_variables_substituted.progress_bars + @property def notebooks(self): return self.project_config_with_variables_substituted.notebooks diff --git a/great_expectations/data_context/types/base.py b/great_expectations/data_context/types/base.py index 57faaced6406..acb2f04731dd 100644 --- a/great_expectations/data_context/types/base.py +++ b/great_expectations/data_context/types/base.py @@ -1082,6 +1082,24 @@ def make_notebooks_config(self, data, **kwargs): return NotebooksConfig(**data) +class ProgressBarsConfig(DictDot): + def __init__( + self, + globally: bool = True, + profilers: bool = True, + metric_calculations: bool = True, + ): + self.globally = globally + self.profilers = profilers + self.metric_calculations = metric_calculations + + +class ProgressBarsConfigSchema(Schema): + globally = fields.Boolean(default=True) + profilers = fields.Boolean(default=True) + metric_calculations = fields.Boolean(default=True) + + class ConcurrencyConfig(DictDot): """WARNING: This class is experimental.""" @@ -1173,8 +1191,25 @@ class DataContextConfigSchema(Schema): ) config_variables_file_path = fields.Str(allow_none=True) anonymous_usage_statistics = fields.Nested(AnonymizedUsageStatisticsConfigSchema) + progress_bars = fields.Nested( + ProgressBarsConfigSchema, required=False, allow_none=True + ) concurrency = fields.Nested(ConcurrencyConfigSchema) + # To ensure backwards compatability, we need to ensure that new options are "opt-in" + # If a user has not explicitly configured the value, it will be None and will be wiped by the post_dump hook + REMOVE_KEYS_IF_NONE = [ + "progress_bars", # 0.13.46 + ] + + @post_dump + def remove_keys_if_none(self, data: dict, **kwargs) -> dict: + data = copy.deepcopy(data) + for key in self.REMOVE_KEYS_IF_NONE: + if key in data and data[key] is None: + data.pop(key) + return data + # noinspection PyMethodMayBeStatic # noinspection PyUnusedLocal def handle_error(self, exc, data, **kwargs): @@ -1783,6 +1818,7 @@ def __init__( store_backend_defaults: Optional[BaseStoreBackendDefaults] = None, commented_map: Optional[CommentedMap] = None, concurrency: Optional[Union[ConcurrencyConfig, Dict]] = None, + progress_bars: Optional[ProgressBarsConfig] = None, ): # Set defaults if config_version is None: @@ -1834,6 +1870,7 @@ def __init__( elif isinstance(concurrency, dict): concurrency = ConcurrencyConfig(**concurrency) self.concurrency: ConcurrencyConfig = concurrency + self.progress_bars = progress_bars super().__init__(commented_map=commented_map) @@ -2429,3 +2466,4 @@ class CheckpointValidationConfigSchema(Schema): notebookConfigSchema = NotebookConfigSchema() checkpointConfigSchema = CheckpointConfigSchema() concurrencyConfigSchema = ConcurrencyConfigSchema() +progressBarsConfigSchema = ProgressBarsConfigSchema() diff --git a/great_expectations/profile/user_configurable_profiler.py b/great_expectations/profile/user_configurable_profiler.py index db11dcb1ee2a..51559236e698 100644 --- a/great_expectations/profile/user_configurable_profiler.py +++ b/great_expectations/profile/user_configurable_profiler.py @@ -110,7 +110,9 @@ def __init__( self.profile_dataset = profile_dataset assert isinstance(self.profile_dataset, (Batch, Dataset, Validator)) + context: Optional["DataContext"] = None if isinstance(self.profile_dataset, Batch): + context = self.profile_dataset.data_context self.profile_dataset = Validator( execution_engine=self.profile_dataset.data.execution_engine, batches=[self.profile_dataset], @@ -119,12 +121,23 @@ def __init__( MetricConfiguration("table.columns", {}) ) elif isinstance(self.profile_dataset, Validator): + context = self.profile_dataset.data_context self.all_table_columns = self.profile_dataset.get_metric( MetricConfiguration("table.columns", {}) ) else: self.all_table_columns = self.profile_dataset.get_table_columns() + # Check to see if the user has disabled progress bars + self._enable_progress_bars = True + if context: + progress_bars = context.progress_bars + if progress_bars: + if "globally" in progress_bars: + self._enable_progress_bars = progress_bars["globally"] + if "profilers" in progress_bars: + self._enable_progress_bars = progress_bars["profilers"] + self.semantic_types_dict = semantic_types_dict assert isinstance(self.semantic_types_dict, (dict, type(None))) @@ -297,7 +310,10 @@ def _build_expectation_suite_from_semantic_types_dict(self): ) with tqdm( - desc="Profiling Columns", total=len(self.column_info), delay=5 + desc="Profiling Columns", + total=len(self.column_info), + delay=5, + disable=not self._enable_progress_bars, ) as pbar: for column_name, column_info in self.column_info.items(): pbar.set_postfix_str(f"Column={column_name}") @@ -342,7 +358,12 @@ def _profile_and_build_expectation_suite(self): self._build_expectations_table(profile_dataset=self.profile_dataset) - with tqdm(desc="Profiling", total=len(self.column_info), delay=5) as pbar: + with tqdm( + desc="Profiling", + total=len(self.column_info), + delay=5, + disable=not self._enable_progress_bars, + ) as pbar: for column_name, column_info in self.column_info.items(): pbar.set_postfix_str(f"Column={column_name}") data_type = column_info.get("type") diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py index 76ff6aa0cc04..0f82ef9de4ff 100644 --- a/great_expectations/validator/validator.py +++ b/great_expectations/validator/validator.py @@ -212,6 +212,10 @@ def __dir__(self): return list(combined_dir) + @property + def data_context(self) -> Optional["DataContext"]: + return self._data_context + @property def expose_dataframe_methods(self) -> bool: return self._expose_dataframe_methods @@ -690,12 +694,26 @@ def resolve_validation_graph( validation_graph=graph, metrics=metrics ) + # Check to see if the user has disabled progress bars + disable = False + if self._data_context: + progress_bars = self._data_context.progress_bars + # If progress_bars are not present, assume we want them enabled + if progress_bars is not None: + if "globally" in progress_bars: + disable = not progress_bars["globally"] + if "metric_calculations" in progress_bars: + disable = not progress_bars["metric_calculations"] + + if len(graph.edges) < 3: + disable = True + if pbar is None: # noinspection PyProtectedMember,SpellCheckingInspection pbar = tqdm( total=len(ready_metrics) + len(needed_metrics), desc="Calculating Metrics", - disable=len(graph.edges) < 3, + disable=disable, ) pbar.update(0) diff --git a/tests/profile/test_user_configurable_profiler_v3_batch_request.py b/tests/profile/test_user_configurable_profiler_v3_batch_request.py index 88d2d679de9d..0b5996884fea 100644 --- a/tests/profile/test_user_configurable_profiler_v3_batch_request.py +++ b/tests/profile/test_user_configurable_profiler_v3_batch_request.py @@ -10,6 +10,8 @@ import great_expectations as ge from great_expectations.core.batch import Batch, RuntimeBatchRequest from great_expectations.core.util import get_or_create_spark_application +from great_expectations.data_context.data_context import DataContext +from great_expectations.data_context.types.base import ProgressBarsConfig from great_expectations.data_context.util import file_relative_path from great_expectations.execution_engine import SqlAlchemyExecutionEngine from great_expectations.execution_engine.sqlalchemy_batch_data import ( @@ -1187,3 +1189,48 @@ def test_expect_compound_columns_to_be_unique( } assert expected_expectations == expectations_from_suite + + +@mock.patch("great_expectations.profile.user_configurable_profiler.tqdm") +def test_user_configurable_profiler_progress_bar_config_enabled( + mock_tqdm, cardinality_validator +): + semantic_types = { + "numeric": ["col_few", "col_many", "col_very_many"], + "value_set": ["col_two", "col_very_few"], + } + + profiler = UserConfigurableProfiler( + cardinality_validator, + semantic_types_dict=semantic_types, + ) + + profiler.build_suite() + + assert mock_tqdm.called + assert mock_tqdm.call_count == 1 + + +@mock.patch("great_expectations.data_context.data_context.DataContext") +def test_user_configurable_profiler_progress_bar_config_disabled( + mock_tqdm, cardinality_validator +): + data_context = cardinality_validator.data_context + data_context.project_config_with_variables_substituted.progress_bars = ( + ProgressBarsConfig(profilers=False) + ) + + semantic_types = { + "numeric": ["col_few", "col_many", "col_very_many"], + "value_set": ["col_two", "col_very_few"], + } + + profiler = UserConfigurableProfiler( + cardinality_validator, + semantic_types_dict=semantic_types, + ) + + profiler.build_suite() + + assert not mock_tqdm.called + assert mock_tqdm.call_count == 0 diff --git a/tests/validator/test_validator.py b/tests/validator/test_validator.py index e43f4569c455..19ab708963ac 100644 --- a/tests/validator/test_validator.py +++ b/tests/validator/test_validator.py @@ -20,6 +20,7 @@ from great_expectations.core.expectation_validation_result import ( ExpectationValidationResult, ) +from great_expectations.data_context.types.base import ProgressBarsConfig from great_expectations.datasource.data_connector.batch_filter import ( BatchFilter, build_batch_filter, @@ -31,7 +32,7 @@ from great_expectations.expectations.registry import get_expectation_impl from great_expectations.validator.exception_info import ExceptionInfo from great_expectations.validator.metric_configuration import MetricConfiguration -from great_expectations.validator.validation_graph import ValidationGraph +from great_expectations.validator.validation_graph import MetricEdge, ValidationGraph from great_expectations.validator.validator import ( MAX_METRIC_COMPUTATION_RETRIES, Validator, @@ -1016,3 +1017,41 @@ def test_instantiate_validator_with_a_list_of_batch_requests( assert ve.value.args == ( "Only one of batch_request or batch_request_list may be specified", ) + + +@mock.patch("great_expectations.data_context.data_context.DataContext") +@mock.patch("great_expectations.validator.validation_graph.ValidationGraph") +@mock.patch("great_expectations.validator.validator.tqdm") +def test_validator_progress_bar_config_enabled( + mock_tqdm, mock_validation_graph, mock_data_context +): + data_context = mock_data_context() + engine = PandasExecutionEngine() + validator = Validator(engine, data_context=data_context) + + # ValidationGraph is a complex object that requires len > 3 to not trigger tqdm + mock_validation_graph.edges.__len__ = lambda _: 3 + validator.resolve_validation_graph(mock_validation_graph, {}) + + # Still invoked but doesn't actually do anything due to `disabled` + assert mock_tqdm.called is True + assert mock_tqdm.call_args[1]["disable"] is False + + +@mock.patch("great_expectations.data_context.data_context.DataContext") +@mock.patch("great_expectations.validator.validation_graph.ValidationGraph") +@mock.patch("great_expectations.validator.validator.tqdm") +def test_validator_progress_bar_config_disabled( + mock_tqdm, mock_validation_graph, mock_data_context +): + data_context = mock_data_context() + data_context.progress_bars = ProgressBarsConfig(metric_calculations=False) + engine = PandasExecutionEngine() + validator = Validator(engine, data_context=data_context) + + # ValidationGraph is a complex object that requires len > 3 to not trigger tqdm + mock_validation_graph.edges.__len__ = lambda _: 3 + validator.resolve_validation_graph(mock_validation_graph, {}) + + assert mock_tqdm.called is True + assert mock_tqdm.call_args[1]["disable"] is True