[FEATURE] Add YAML config option to disable progress bars (#3794)

* feat: Add YAML config option to disable progress bars * push before adding test descriptions * updated tests * Update test_data_context.py * Update test_data_context.py * feat: Add granular fields * chore: Remove util method * chore: Revert Validator * feat: Add conditional logic to validator * feat: Add logic to get progress bar config in UCP * feat: Add util func * feat: use DataContext progress_bars attr * chore: remove unnecessary imports * feat: move functionality to ProgressBarsConfig * fix: ensure proper name of key in Validator * refactor: make methods public to work with method * feat: use is_enabled instead of disabled * fix: fix typo of new class * fix: remove method on new config class * test: add new field to tests * feat: update conditional per Tal's comments * chore: make misc changes after discussion with Tal * feat: add post_dump hook * chore: type hint * docs: add comments to post_dump hook * test: write tests for user configurable profiler * test: write tests for validator * docs: add comment to test * test: misc cleanup in prep for review * chore: remove progress bars from test output * test: fix breaking test Co-authored-by: Shinnnyshinshin <will@superconductive.com> Co-authored-by: Don Heppner <donald.heppner@gmail.com>
great-expectations · Jan 4, 2022 · 5168f73 · 5168f73
1 parent 24befd2
commit 5168f73
Show file tree

Hide file tree

Showing 6 changed files with 172 additions and 4 deletions.
diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py
@@ -78,6 +78,7 @@
     DataContextConfigDefaults,
     DatasourceConfig,
     GeCloudConfig,
+    ProgressBarsConfig,
     anonymizedUsageStatisticsSchema,
     dataContextConfigSchema,
     datasourceConfigSchema,
@@ -824,6 +825,10 @@ def anonymous_usage_statistics(self):
     def concurrency(self) -> ConcurrencyConfig:
         return self.project_config_with_variables_substituted.concurrency
 
+    @property
+    def progress_bars(self) -> Optional[ProgressBarsConfig]:
+        return self.project_config_with_variables_substituted.progress_bars
+
     @property
     def notebooks(self):
         return self.project_config_with_variables_substituted.notebooks

diff --git a/great_expectations/data_context/types/base.py b/great_expectations/data_context/types/base.py
@@ -1082,6 +1082,24 @@ def make_notebooks_config(self, data, **kwargs):
         return NotebooksConfig(**data)
 
 
+class ProgressBarsConfig(DictDot):
+    def __init__(
+        self,
+        globally: bool = True,
+        profilers: bool = True,
+        metric_calculations: bool = True,
+    ):
+        self.globally = globally
+        self.profilers = profilers
+        self.metric_calculations = metric_calculations
+
+
+class ProgressBarsConfigSchema(Schema):
+    globally = fields.Boolean(default=True)
+    profilers = fields.Boolean(default=True)
+    metric_calculations = fields.Boolean(default=True)
+
+
 class ConcurrencyConfig(DictDot):
     """WARNING: This class is experimental."""
 
@@ -1173,8 +1191,25 @@ class DataContextConfigSchema(Schema):
     )
     config_variables_file_path = fields.Str(allow_none=True)
     anonymous_usage_statistics = fields.Nested(AnonymizedUsageStatisticsConfigSchema)
+    progress_bars = fields.Nested(
+        ProgressBarsConfigSchema, required=False, allow_none=True
+    )
     concurrency = fields.Nested(ConcurrencyConfigSchema)
 
+    # To ensure backwards compatability, we need to ensure that new options are "opt-in"
+    # If a user has not explicitly configured the value, it will be None and will be wiped by the post_dump hook
+    REMOVE_KEYS_IF_NONE = [
+        "progress_bars",  # 0.13.46
+    ]
+
+    @post_dump
+    def remove_keys_if_none(self, data: dict, **kwargs) -> dict:
+        data = copy.deepcopy(data)
+        for key in self.REMOVE_KEYS_IF_NONE:
+            if key in data and data[key] is None:
+                data.pop(key)
+        return data
+
     # noinspection PyMethodMayBeStatic
     # noinspection PyUnusedLocal
     def handle_error(self, exc, data, **kwargs):
@@ -1783,6 +1818,7 @@ def __init__(
         store_backend_defaults: Optional[BaseStoreBackendDefaults] = None,
         commented_map: Optional[CommentedMap] = None,
         concurrency: Optional[Union[ConcurrencyConfig, Dict]] = None,
+        progress_bars: Optional[ProgressBarsConfig] = None,
     ):
         # Set defaults
         if config_version is None:
@@ -1834,6 +1870,7 @@ def __init__(
         elif isinstance(concurrency, dict):
             concurrency = ConcurrencyConfig(**concurrency)
         self.concurrency: ConcurrencyConfig = concurrency
+        self.progress_bars = progress_bars
 
         super().__init__(commented_map=commented_map)
 
@@ -2429,3 +2466,4 @@ class CheckpointValidationConfigSchema(Schema):
 notebookConfigSchema = NotebookConfigSchema()
 checkpointConfigSchema = CheckpointConfigSchema()
 concurrencyConfigSchema = ConcurrencyConfigSchema()
+progressBarsConfigSchema = ProgressBarsConfigSchema()
diff --git a/great_expectations/profile/user_configurable_profiler.py b/great_expectations/profile/user_configurable_profiler.py
@@ -110,7 +110,9 @@ def __init__(
         self.profile_dataset = profile_dataset
         assert isinstance(self.profile_dataset, (Batch, Dataset, Validator))
 
+        context: Optional["DataContext"] = None
         if isinstance(self.profile_dataset, Batch):
+            context = self.profile_dataset.data_context
             self.profile_dataset = Validator(
                 execution_engine=self.profile_dataset.data.execution_engine,
                 batches=[self.profile_dataset],
@@ -119,12 +121,23 @@ def __init__(
                 MetricConfiguration("table.columns", {})
             )
         elif isinstance(self.profile_dataset, Validator):
+            context = self.profile_dataset.data_context
             self.all_table_columns = self.profile_dataset.get_metric(
                 MetricConfiguration("table.columns", {})
             )
         else:
             self.all_table_columns = self.profile_dataset.get_table_columns()
 
+        # Check to see if the user has disabled progress bars
+        self._enable_progress_bars = True
+        if context:
+            progress_bars = context.progress_bars
+            if progress_bars:
+                if "globally" in progress_bars:
+                    self._enable_progress_bars = progress_bars["globally"]
+                if "profilers" in progress_bars:
+                    self._enable_progress_bars = progress_bars["profilers"]
+
         self.semantic_types_dict = semantic_types_dict
         assert isinstance(self.semantic_types_dict, (dict, type(None)))
 
@@ -297,7 +310,10 @@ def _build_expectation_suite_from_semantic_types_dict(self):
             )
 
         with tqdm(
-            desc="Profiling Columns", total=len(self.column_info), delay=5
+            desc="Profiling Columns",
+            total=len(self.column_info),
+            delay=5,
+            disable=not self._enable_progress_bars,
         ) as pbar:
             for column_name, column_info in self.column_info.items():
                 pbar.set_postfix_str(f"Column={column_name}")
@@ -342,7 +358,12 @@ def _profile_and_build_expectation_suite(self):
 
         self._build_expectations_table(profile_dataset=self.profile_dataset)
 
-        with tqdm(desc="Profiling", total=len(self.column_info), delay=5) as pbar:
+        with tqdm(
+            desc="Profiling",
+            total=len(self.column_info),
+            delay=5,
+            disable=not self._enable_progress_bars,
+        ) as pbar:
             for column_name, column_info in self.column_info.items():
                 pbar.set_postfix_str(f"Column={column_name}")
                 data_type = column_info.get("type")

diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py
@@ -212,6 +212,10 @@ def __dir__(self):
 
         return list(combined_dir)
 
+    @property
+    def data_context(self) -> Optional["DataContext"]:
+        return self._data_context
+
     @property
     def expose_dataframe_methods(self) -> bool:
         return self._expose_dataframe_methods
@@ -690,12 +694,26 @@ def resolve_validation_graph(
                 validation_graph=graph, metrics=metrics
             )
 
+            # Check to see if the user has disabled progress bars
+            disable = False
+            if self._data_context:
+                progress_bars = self._data_context.progress_bars
+                # If progress_bars are not present, assume we want them enabled
+                if progress_bars is not None:
+                    if "globally" in progress_bars:
+                        disable = not progress_bars["globally"]
+                    if "metric_calculations" in progress_bars:
+                        disable = not progress_bars["metric_calculations"]
+
+            if len(graph.edges) < 3:
+                disable = True
+
             if pbar is None:
                 # noinspection PyProtectedMember,SpellCheckingInspection
                 pbar = tqdm(
                     total=len(ready_metrics) + len(needed_metrics),
                     desc="Calculating Metrics",
-                    disable=len(graph.edges) < 3,
+                    disable=disable,
                 )
                 pbar.update(0)
 

diff --git a/tests/profile/test_user_configurable_profiler_v3_batch_request.py b/tests/profile/test_user_configurable_profiler_v3_batch_request.py
@@ -10,6 +10,8 @@
 import great_expectations as ge
 from great_expectations.core.batch import Batch, RuntimeBatchRequest
 from great_expectations.core.util import get_or_create_spark_application
+from great_expectations.data_context.data_context import DataContext
+from great_expectations.data_context.types.base import ProgressBarsConfig
 from great_expectations.data_context.util import file_relative_path
 from great_expectations.execution_engine import SqlAlchemyExecutionEngine
 from great_expectations.execution_engine.sqlalchemy_batch_data import (
@@ -1187,3 +1189,48 @@ def test_expect_compound_columns_to_be_unique(
     }
 
     assert expected_expectations == expectations_from_suite
+
+
+@mock.patch("great_expectations.profile.user_configurable_profiler.tqdm")
+def test_user_configurable_profiler_progress_bar_config_enabled(
+    mock_tqdm, cardinality_validator
+):
+    semantic_types = {
+        "numeric": ["col_few", "col_many", "col_very_many"],
+        "value_set": ["col_two", "col_very_few"],
+    }
+
+    profiler = UserConfigurableProfiler(
+        cardinality_validator,
+        semantic_types_dict=semantic_types,
+    )
+
+    profiler.build_suite()
+
+    assert mock_tqdm.called
+    assert mock_tqdm.call_count == 1
+
+
+@mock.patch("great_expectations.data_context.data_context.DataContext")
+def test_user_configurable_profiler_progress_bar_config_disabled(
+    mock_tqdm, cardinality_validator
+):
+    data_context = cardinality_validator.data_context
+    data_context.project_config_with_variables_substituted.progress_bars = (
+        ProgressBarsConfig(profilers=False)
+    )
+
+    semantic_types = {
+        "numeric": ["col_few", "col_many", "col_very_many"],
+        "value_set": ["col_two", "col_very_few"],
+    }
+
+    profiler = UserConfigurableProfiler(
+        cardinality_validator,
+        semantic_types_dict=semantic_types,
+    )
+
+    profiler.build_suite()
+
+    assert not mock_tqdm.called
+    assert mock_tqdm.call_count == 0
diff --git a/tests/validator/test_validator.py b/tests/validator/test_validator.py
@@ -20,6 +20,7 @@
 from great_expectations.core.expectation_validation_result import (
     ExpectationValidationResult,
 )
+from great_expectations.data_context.types.base import ProgressBarsConfig
 from great_expectations.datasource.data_connector.batch_filter import (
     BatchFilter,
     build_batch_filter,
@@ -31,7 +32,7 @@
 from great_expectations.expectations.registry import get_expectation_impl
 from great_expectations.validator.exception_info import ExceptionInfo
 from great_expectations.validator.metric_configuration import MetricConfiguration
-from great_expectations.validator.validation_graph import ValidationGraph
+from great_expectations.validator.validation_graph import MetricEdge, ValidationGraph
 from great_expectations.validator.validator import (
     MAX_METRIC_COMPUTATION_RETRIES,
     Validator,
@@ -1016,3 +1017,41 @@ def test_instantiate_validator_with_a_list_of_batch_requests(
     assert ve.value.args == (
         "Only one of batch_request or batch_request_list may be specified",
     )
+
+
+@mock.patch("great_expectations.data_context.data_context.DataContext")
+@mock.patch("great_expectations.validator.validation_graph.ValidationGraph")
+@mock.patch("great_expectations.validator.validator.tqdm")
+def test_validator_progress_bar_config_enabled(
+    mock_tqdm, mock_validation_graph, mock_data_context
+):
+    data_context = mock_data_context()
+    engine = PandasExecutionEngine()
+    validator = Validator(engine, data_context=data_context)
+
+    # ValidationGraph is a complex object that requires len > 3 to not trigger tqdm
+    mock_validation_graph.edges.__len__ = lambda _: 3
+    validator.resolve_validation_graph(mock_validation_graph, {})
+
+    # Still invoked but doesn't actually do anything due to `disabled`
+    assert mock_tqdm.called is True
+    assert mock_tqdm.call_args[1]["disable"] is False
+
+
+@mock.patch("great_expectations.data_context.data_context.DataContext")
+@mock.patch("great_expectations.validator.validation_graph.ValidationGraph")
+@mock.patch("great_expectations.validator.validator.tqdm")
+def test_validator_progress_bar_config_disabled(
+    mock_tqdm, mock_validation_graph, mock_data_context
+):
+    data_context = mock_data_context()
+    data_context.progress_bars = ProgressBarsConfig(metric_calculations=False)
+    engine = PandasExecutionEngine()
+    validator = Validator(engine, data_context=data_context)
+
+    # ValidationGraph is a complex object that requires len > 3 to not trigger tqdm
+    mock_validation_graph.edges.__len__ = lambda _: 3
+    validator.resolve_validation_graph(mock_validation_graph, {})
+
+    assert mock_tqdm.called is True
+    assert mock_tqdm.call_args[1]["disable"] is True