Skip to content

Commit

Permalink
[FEATURE] Add YAML config option to disable progress bars (#3794)
Browse files Browse the repository at this point in the history
* feat: Add YAML config option to disable progress bars

* push before adding test descriptions

* updated tests

* Update test_data_context.py

* Update test_data_context.py

* feat: Add granular fields

* chore: Remove util method

* chore: Revert Validator

* feat: Add conditional logic to validator

* feat: Add logic to get progress bar config in UCP

* feat: Add util func

* feat: use DataContext progress_bars attr

* chore: remove unnecessary imports

* feat: move functionality to ProgressBarsConfig

* fix: ensure proper name of key in Validator

* refactor: make methods public to work with method

* feat: use is_enabled instead of disabled

* fix: fix typo of new class

* fix: remove method on new config class

* test: add new field to tests

* feat: update conditional per Tal's comments

* chore: make misc changes after discussion with Tal

* feat: add post_dump hook

* chore: type hint

* docs: add comments to post_dump hook

* test: write tests for user configurable profiler

* test: write tests for validator

* docs: add comment to test

* test: misc cleanup in prep for review

* chore: remove progress bars from test output

* test: fix breaking test

Co-authored-by: Shinnnyshinshin <will@superconductive.com>
Co-authored-by: Don Heppner <donald.heppner@gmail.com>
  • Loading branch information
3 people committed Jan 4, 2022
1 parent 24befd2 commit 5168f73
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 4 deletions.
5 changes: 5 additions & 0 deletions great_expectations/data_context/data_context.py
Expand Up @@ -78,6 +78,7 @@
DataContextConfigDefaults,
DatasourceConfig,
GeCloudConfig,
ProgressBarsConfig,
anonymizedUsageStatisticsSchema,
dataContextConfigSchema,
datasourceConfigSchema,
Expand Down Expand Up @@ -824,6 +825,10 @@ def anonymous_usage_statistics(self):
def concurrency(self) -> ConcurrencyConfig:
return self.project_config_with_variables_substituted.concurrency

@property
def progress_bars(self) -> Optional[ProgressBarsConfig]:
return self.project_config_with_variables_substituted.progress_bars

@property
def notebooks(self):
return self.project_config_with_variables_substituted.notebooks
Expand Down
38 changes: 38 additions & 0 deletions great_expectations/data_context/types/base.py
Expand Up @@ -1082,6 +1082,24 @@ def make_notebooks_config(self, data, **kwargs):
return NotebooksConfig(**data)


class ProgressBarsConfig(DictDot):
def __init__(
self,
globally: bool = True,
profilers: bool = True,
metric_calculations: bool = True,
):
self.globally = globally
self.profilers = profilers
self.metric_calculations = metric_calculations


class ProgressBarsConfigSchema(Schema):
globally = fields.Boolean(default=True)
profilers = fields.Boolean(default=True)
metric_calculations = fields.Boolean(default=True)


class ConcurrencyConfig(DictDot):
"""WARNING: This class is experimental."""

Expand Down Expand Up @@ -1173,8 +1191,25 @@ class DataContextConfigSchema(Schema):
)
config_variables_file_path = fields.Str(allow_none=True)
anonymous_usage_statistics = fields.Nested(AnonymizedUsageStatisticsConfigSchema)
progress_bars = fields.Nested(
ProgressBarsConfigSchema, required=False, allow_none=True
)
concurrency = fields.Nested(ConcurrencyConfigSchema)

# To ensure backwards compatability, we need to ensure that new options are "opt-in"
# If a user has not explicitly configured the value, it will be None and will be wiped by the post_dump hook
REMOVE_KEYS_IF_NONE = [
"progress_bars", # 0.13.46
]

@post_dump
def remove_keys_if_none(self, data: dict, **kwargs) -> dict:
data = copy.deepcopy(data)
for key in self.REMOVE_KEYS_IF_NONE:
if key in data and data[key] is None:
data.pop(key)
return data

# noinspection PyMethodMayBeStatic
# noinspection PyUnusedLocal
def handle_error(self, exc, data, **kwargs):
Expand Down Expand Up @@ -1783,6 +1818,7 @@ def __init__(
store_backend_defaults: Optional[BaseStoreBackendDefaults] = None,
commented_map: Optional[CommentedMap] = None,
concurrency: Optional[Union[ConcurrencyConfig, Dict]] = None,
progress_bars: Optional[ProgressBarsConfig] = None,
):
# Set defaults
if config_version is None:
Expand Down Expand Up @@ -1834,6 +1870,7 @@ def __init__(
elif isinstance(concurrency, dict):
concurrency = ConcurrencyConfig(**concurrency)
self.concurrency: ConcurrencyConfig = concurrency
self.progress_bars = progress_bars

super().__init__(commented_map=commented_map)

Expand Down Expand Up @@ -2429,3 +2466,4 @@ class CheckpointValidationConfigSchema(Schema):
notebookConfigSchema = NotebookConfigSchema()
checkpointConfigSchema = CheckpointConfigSchema()
concurrencyConfigSchema = ConcurrencyConfigSchema()
progressBarsConfigSchema = ProgressBarsConfigSchema()
25 changes: 23 additions & 2 deletions great_expectations/profile/user_configurable_profiler.py
Expand Up @@ -110,7 +110,9 @@ def __init__(
self.profile_dataset = profile_dataset
assert isinstance(self.profile_dataset, (Batch, Dataset, Validator))

context: Optional["DataContext"] = None
if isinstance(self.profile_dataset, Batch):
context = self.profile_dataset.data_context
self.profile_dataset = Validator(
execution_engine=self.profile_dataset.data.execution_engine,
batches=[self.profile_dataset],
Expand All @@ -119,12 +121,23 @@ def __init__(
MetricConfiguration("table.columns", {})
)
elif isinstance(self.profile_dataset, Validator):
context = self.profile_dataset.data_context
self.all_table_columns = self.profile_dataset.get_metric(
MetricConfiguration("table.columns", {})
)
else:
self.all_table_columns = self.profile_dataset.get_table_columns()

# Check to see if the user has disabled progress bars
self._enable_progress_bars = True
if context:
progress_bars = context.progress_bars
if progress_bars:
if "globally" in progress_bars:
self._enable_progress_bars = progress_bars["globally"]
if "profilers" in progress_bars:
self._enable_progress_bars = progress_bars["profilers"]

self.semantic_types_dict = semantic_types_dict
assert isinstance(self.semantic_types_dict, (dict, type(None)))

Expand Down Expand Up @@ -297,7 +310,10 @@ def _build_expectation_suite_from_semantic_types_dict(self):
)

with tqdm(
desc="Profiling Columns", total=len(self.column_info), delay=5
desc="Profiling Columns",
total=len(self.column_info),
delay=5,
disable=not self._enable_progress_bars,
) as pbar:
for column_name, column_info in self.column_info.items():
pbar.set_postfix_str(f"Column={column_name}")
Expand Down Expand Up @@ -342,7 +358,12 @@ def _profile_and_build_expectation_suite(self):

self._build_expectations_table(profile_dataset=self.profile_dataset)

with tqdm(desc="Profiling", total=len(self.column_info), delay=5) as pbar:
with tqdm(
desc="Profiling",
total=len(self.column_info),
delay=5,
disable=not self._enable_progress_bars,
) as pbar:
for column_name, column_info in self.column_info.items():
pbar.set_postfix_str(f"Column={column_name}")
data_type = column_info.get("type")
Expand Down
20 changes: 19 additions & 1 deletion great_expectations/validator/validator.py
Expand Up @@ -212,6 +212,10 @@ def __dir__(self):

return list(combined_dir)

@property
def data_context(self) -> Optional["DataContext"]:
return self._data_context

@property
def expose_dataframe_methods(self) -> bool:
return self._expose_dataframe_methods
Expand Down Expand Up @@ -690,12 +694,26 @@ def resolve_validation_graph(
validation_graph=graph, metrics=metrics
)

# Check to see if the user has disabled progress bars
disable = False
if self._data_context:
progress_bars = self._data_context.progress_bars
# If progress_bars are not present, assume we want them enabled
if progress_bars is not None:
if "globally" in progress_bars:
disable = not progress_bars["globally"]
if "metric_calculations" in progress_bars:
disable = not progress_bars["metric_calculations"]

if len(graph.edges) < 3:
disable = True

if pbar is None:
# noinspection PyProtectedMember,SpellCheckingInspection
pbar = tqdm(
total=len(ready_metrics) + len(needed_metrics),
desc="Calculating Metrics",
disable=len(graph.edges) < 3,
disable=disable,
)
pbar.update(0)

Expand Down
47 changes: 47 additions & 0 deletions tests/profile/test_user_configurable_profiler_v3_batch_request.py
Expand Up @@ -10,6 +10,8 @@
import great_expectations as ge
from great_expectations.core.batch import Batch, RuntimeBatchRequest
from great_expectations.core.util import get_or_create_spark_application
from great_expectations.data_context.data_context import DataContext
from great_expectations.data_context.types.base import ProgressBarsConfig
from great_expectations.data_context.util import file_relative_path
from great_expectations.execution_engine import SqlAlchemyExecutionEngine
from great_expectations.execution_engine.sqlalchemy_batch_data import (
Expand Down Expand Up @@ -1187,3 +1189,48 @@ def test_expect_compound_columns_to_be_unique(
}

assert expected_expectations == expectations_from_suite


@mock.patch("great_expectations.profile.user_configurable_profiler.tqdm")
def test_user_configurable_profiler_progress_bar_config_enabled(
mock_tqdm, cardinality_validator
):
semantic_types = {
"numeric": ["col_few", "col_many", "col_very_many"],
"value_set": ["col_two", "col_very_few"],
}

profiler = UserConfigurableProfiler(
cardinality_validator,
semantic_types_dict=semantic_types,
)

profiler.build_suite()

assert mock_tqdm.called
assert mock_tqdm.call_count == 1


@mock.patch("great_expectations.data_context.data_context.DataContext")
def test_user_configurable_profiler_progress_bar_config_disabled(
mock_tqdm, cardinality_validator
):
data_context = cardinality_validator.data_context
data_context.project_config_with_variables_substituted.progress_bars = (
ProgressBarsConfig(profilers=False)
)

semantic_types = {
"numeric": ["col_few", "col_many", "col_very_many"],
"value_set": ["col_two", "col_very_few"],
}

profiler = UserConfigurableProfiler(
cardinality_validator,
semantic_types_dict=semantic_types,
)

profiler.build_suite()

assert not mock_tqdm.called
assert mock_tqdm.call_count == 0
41 changes: 40 additions & 1 deletion tests/validator/test_validator.py
Expand Up @@ -20,6 +20,7 @@
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_context.types.base import ProgressBarsConfig
from great_expectations.datasource.data_connector.batch_filter import (
BatchFilter,
build_batch_filter,
Expand All @@ -31,7 +32,7 @@
from great_expectations.expectations.registry import get_expectation_impl
from great_expectations.validator.exception_info import ExceptionInfo
from great_expectations.validator.metric_configuration import MetricConfiguration
from great_expectations.validator.validation_graph import ValidationGraph
from great_expectations.validator.validation_graph import MetricEdge, ValidationGraph
from great_expectations.validator.validator import (
MAX_METRIC_COMPUTATION_RETRIES,
Validator,
Expand Down Expand Up @@ -1016,3 +1017,41 @@ def test_instantiate_validator_with_a_list_of_batch_requests(
assert ve.value.args == (
"Only one of batch_request or batch_request_list may be specified",
)


@mock.patch("great_expectations.data_context.data_context.DataContext")
@mock.patch("great_expectations.validator.validation_graph.ValidationGraph")
@mock.patch("great_expectations.validator.validator.tqdm")
def test_validator_progress_bar_config_enabled(
mock_tqdm, mock_validation_graph, mock_data_context
):
data_context = mock_data_context()
engine = PandasExecutionEngine()
validator = Validator(engine, data_context=data_context)

# ValidationGraph is a complex object that requires len > 3 to not trigger tqdm
mock_validation_graph.edges.__len__ = lambda _: 3
validator.resolve_validation_graph(mock_validation_graph, {})

# Still invoked but doesn't actually do anything due to `disabled`
assert mock_tqdm.called is True
assert mock_tqdm.call_args[1]["disable"] is False


@mock.patch("great_expectations.data_context.data_context.DataContext")
@mock.patch("great_expectations.validator.validation_graph.ValidationGraph")
@mock.patch("great_expectations.validator.validator.tqdm")
def test_validator_progress_bar_config_disabled(
mock_tqdm, mock_validation_graph, mock_data_context
):
data_context = mock_data_context()
data_context.progress_bars = ProgressBarsConfig(metric_calculations=False)
engine = PandasExecutionEngine()
validator = Validator(engine, data_context=data_context)

# ValidationGraph is a complex object that requires len > 3 to not trigger tqdm
mock_validation_graph.edges.__len__ = lambda _: 3
validator.resolve_validation_graph(mock_validation_graph, {})

assert mock_tqdm.called is True
assert mock_tqdm.call_args[1]["disable"] is True

0 comments on commit 5168f73

Please sign in to comment.