From 1a6b8d5a35a1432c1414b08906ab747c85776068 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Tue, 23 Jul 2019 21:46:48 -0400 Subject: [PATCH 1/5] Update warning message to use logging and logging setup. --- great_expectations/data_asset/data_asset.py | 163 +++++++++--------- great_expectations/jupyter_ux/__init__.py | 38 ++-- .../profile/basic_dataset_profiler.py | 1 + tests/test_jupyter_ux.py | 39 +++++ 4 files changed, 147 insertions(+), 94 deletions(-) diff --git a/great_expectations/data_asset/data_asset.py b/great_expectations/data_asset/data_asset.py index 3940f982659c..98c55dc37fa3 100644 --- a/great_expectations/data_asset/data_asset.py +++ b/great_expectations/data_asset/data_asset.py @@ -23,7 +23,8 @@ get_empty_expectation_suite ) -logger = logging.getLogger("DataAsset") +logger = logging.getLogger(__name__) +logging.captureWarnings(True) class DataAsset(object): @@ -50,9 +51,12 @@ def __init__(self, *args, **kwargs): data_context = kwargs.pop("data_context", None) batch_kwargs = kwargs.pop("batch_kwargs", None) if "autoinspect_func" in kwargs: - warnings.warn("Autoinspect_func is no longer supported; use a profiler instead (migration is easy!).") + warnings.warn("Autoinspect_func is no longer supported; use a profiler instead (migration is easy!).", + category=DeprecationWarning) super(DataAsset, self).__init__(*args, **kwargs) - self._interactive_evaluation = interactive_evaluation + self._config = { + "interactive_evaluation": interactive_evaluation + } self._initialize_expectations( expectation_suite=expectation_suite, data_asset_name=data_asset_name, @@ -60,6 +64,7 @@ def __init__(self, *args, **kwargs): ) self._data_context = data_context self._batch_kwargs = batch_kwargs + if profiler is not None: profiler.profile(self) @@ -202,7 +207,7 @@ def wrapper(self, *args, **kwargs): exception_message = None # Finally, execute the expectation method itself - if self._interactive_evaluation: + if self._config.get("interactive_evaluation", True): try: return_obj = func(self, **evaluation_args) @@ -365,7 +370,7 @@ def _append_expectation(self, expectation_config): def _copy_and_clean_up_expectation(self, expectation, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, ): """Returns copy of `expectation` without `success_on_last_run` and other specified key-value pairs removed @@ -378,8 +383,8 @@ def _copy_and_clean_up_expectation(self, The expectation to copy and clean. discard_result_format_kwargs (boolean): \ if True, will remove the kwarg `output_format` key-value pair from the copied expectation. - discard_include_configs_kwargs (boolean): - if True, will remove the kwarg `include_configs` key-value pair from the copied expectation. + discard_include_config_kwargs (boolean): + if True, will remove the kwarg `include_config` key-value pair from the copied expectation. discard_catch_exceptions_kwargs (boolean): if True, will remove the kwarg `catch_exceptions` key-value pair from the copied expectation. @@ -396,10 +401,10 @@ def _copy_and_clean_up_expectation(self, del new_expectation["kwargs"]["result_format"] # discards["result_format"] += 1 - if discard_include_configs_kwargs: - if "include_configs" in new_expectation["kwargs"]: - del new_expectation["kwargs"]["include_configs"] - # discards["include_configs"] += 1 + if discard_include_config_kwargs: + if "include_config" in new_expectation["kwargs"]: + del new_expectation["kwargs"]["include_config"] + # discards["include_config"] += 1 if discard_catch_exceptions_kwargs: if "catch_exceptions" in new_expectation["kwargs"]: @@ -412,7 +417,7 @@ def _copy_and_clean_up_expectations_from_indexes( self, match_indexes, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, ): """Copies and cleans all expectations provided by their index in DataAsset._expectation_suite.expectations. @@ -425,8 +430,8 @@ def _copy_and_clean_up_expectations_from_indexes( Index numbers of the expectations from `expectation_config.expectations` to be copied and cleaned. discard_result_format_kwargs (boolean): \ if True, will remove the kwarg `output_format` key-value pair from the copied expectation. - discard_include_configs_kwargs (boolean): - if True, will remove the kwarg `include_configs` key-value pair from the copied expectation. + discard_include_config_kwargs (boolean): + if True, will remove the kwarg `include_config` key-value pair from the copied expectation. discard_catch_exceptions_kwargs (boolean): if True, will remove the kwarg `catch_exceptions` key-value pair from the copied expectation. @@ -443,7 +448,7 @@ def _copy_and_clean_up_expectations_from_indexes( self._copy_and_clean_up_expectation( self._expectation_suite.expectations[i], discard_result_format_kwargs, - discard_include_configs_kwargs, + discard_include_config_kwargs, discard_catch_exceptions_kwargs, ) ) @@ -498,7 +503,7 @@ def find_expectations(self, column=None, expectation_kwargs=None, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, ): """Find matching expectations within _expectation_config. @@ -508,8 +513,8 @@ def find_expectations(self, expectation_kwargs=None : A dictionary of kwargs to match against. discard_result_format_kwargs=True : In returned expectation object(s), \ suppress the `result_format` parameter. - discard_include_configs_kwargs=True : In returned expectation object(s), \ - suppress the `include_configs` parameter. + discard_include_config_kwargs=True : In returned expectation object(s), \ + suppress the `include_config` parameter. discard_catch_exceptions_kwargs=True : In returned expectation object(s), \ suppress the `catch_exceptions` parameter. @@ -527,7 +532,7 @@ def find_expectations(self, return self._copy_and_clean_up_expectations_from_indexes( match_indexes, discard_result_format_kwargs, - discard_include_configs_kwargs, + discard_include_config_kwargs, discard_catch_exceptions_kwargs, ) @@ -555,7 +560,7 @@ def remove_expectation(self, If remove_expectation doesn't find any matches, it raises a ValueError. If remove_expectation finds more than one matches and remove_multiple_matches!=True, it raises a ValueError. If dry_run=True, then `remove_expectation` acts as a thin layer to find_expectations, with the default \ - values for discard_result_format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs + values for discard_result_format_kwargs, discard_include_config_kwargs, and discard_catch_exceptions_kwargs """ match_indexes = self.find_expectation_indexes( @@ -602,7 +607,6 @@ def discard_failing_expectations(self): for item in res: self.remove_expectation(expectation_type=item['expectation_config']['expectation_type'], expectation_kwargs=item['expectation_config']['kwargs']) -# print("WARNING: Removed %s expectations that were 'False'" % len(res)) warnings.warn( "Removed %s expectations that were 'False'" % len(res)) @@ -645,7 +649,7 @@ def set_default_expectation_argument(self, argument, value): def get_expectations_config(self, discard_failed_expectations=True, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False ): @@ -654,14 +658,15 @@ def get_expectations_config(self, return self.get_expectation_suite( discard_failed_expectations, discard_result_format_kwargs, - discard_include_configs_kwargs, + discard_include_config_kwargs, discard_catch_exceptions_kwargs, - suppress_warnings) + suppress_warnings + ) def get_expectation_suite(self, discard_failed_expectations=True, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False ): @@ -672,23 +677,21 @@ def get_expectation_suite(self, Only include expectations with success_on_last_run=True in the exported config. Defaults to `True`. discard_result_format_kwargs (boolean): \ In returned expectation objects, suppress the `result_format` parameter. Defaults to `True`. - discard_include_configs_kwargs (boolean): \ - In returned expectation objects, suppress the `include_configs` parameter. Defaults to `True`. + discard_include_config_kwargs (boolean): \ + In returned expectation objects, suppress the `include_config` parameter. Defaults to `True`. discard_catch_exceptions_kwargs (boolean): \ In returned expectation objects, suppress the `catch_exceptions` parameter. Defaults to `True`. - suppress_warnings (boolean): \ - If true, do not print warning message about information discarded before return Returns: - An expectation config. + An expectation suite. Note: - get_expectation_suite does not affect the underlying config at all. The returned config is a copy of \ - _expectation_suite, not the original object. + get_expectation_suite does not affect the underlying expectation suite at all. The returned suite is a \ + copy of _expectation_suite, not the original object. """ - config = dict(self._expectation_suite) - config = copy.deepcopy(config) - expectations = config["expectations"] + + expectation_suite = copy.deepcopy(dict(self._expectation_suite)) + expectations = expectation_suite["expectations"] discards = defaultdict(int) @@ -707,6 +710,13 @@ def get_expectation_suite(self, expectations = new_expectations + message = "%d expectation(s) included in expectation_suite." % len(expectations) + + if discards["failed_expectations"] > 0 and not suppress_warnings: + message += " Omitting %d expectations that failed on their last run; set " \ + "discard_failed_expectations=False to include them." \ + % discards["failed_expectations"] + for expectation in expectations: # FIXME: Factor this out into a new function. The logic is duplicated in remove_expectation, # which calls _copy_and_clean_up_expectation @@ -718,47 +728,39 @@ def get_expectation_suite(self, del expectation["kwargs"]["result_format"] discards["result_format"] += 1 - if discard_include_configs_kwargs: - if "include_configs" in expectation["kwargs"]: - del expectation["kwargs"]["include_configs"] - discards["include_configs"] += 1 + if discard_include_config_kwargs: + if "include_config" in expectation["kwargs"]: + del expectation["kwargs"]["include_config"] + discards["include_config"] += 1 if discard_catch_exceptions_kwargs: if "catch_exceptions" in expectation["kwargs"]: del expectation["kwargs"]["catch_exceptions"] discards["catch_exceptions"] += 1 - if not suppress_warnings: - if any([discard_failed_expectations, - discard_result_format_kwargs, - discard_include_configs_kwargs, - discard_catch_exceptions_kwargs]): - print("WARNING: get_expectation_suite discarded") - if discard_failed_expectations: - print("\t%d failing expectations" % - discards["failed_expectations"]) - if discard_result_format_kwargs: - print("\t%d result_format kwargs" % - discards["result_format"]) - if discard_include_configs_kwargs: - print("\t%d include_configs kwargs" % - discards["include_configs"]) - if discard_catch_exceptions_kwargs: - print("\t%d catch_exceptions kwargs" % - discards["catch_exceptions"]) - print( - "If you wish to change this behavior, please set discard_failed_expectations, discard_result " - "format_kwargs, discard_include_configs_kwargs, and discard_catch_exceptions_kwargs appropriately.") - - config["expectations"] = expectations - return config + settings_message = "" + + if discards["result_format"] > 0 and not suppress_warnings: + settings_message += " result_format" + + if discards["include_config"] > 0 and not suppress_warnings: + settings_message += " include_config" + + if discards["catch_exceptions"] > 0 and not suppress_warnings: + settings_message += " catch_exceptions" + + settings_message += " settings filtered." + + expectation_suite["expectations"] = expectations + logger.info(message + settings_message) + return expectation_suite def save_expectations_config( self, filepath=None, discard_failed_expectations=True, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False ): @@ -766,14 +768,14 @@ def save_expectations_config( "Please use save_expectation_suite instead.", DeprecationWarning) self.save_expectation_suite( filepath, discard_failed_expectations, discard_result_format_kwargs, - discard_include_configs_kwargs, discard_catch_exceptions_kwargs, suppress_warnings) + discard_include_config_kwargs, discard_catch_exceptions_kwargs, suppress_warnings) def save_expectation_suite( self, filepath=None, discard_failed_expectations=True, discard_result_format_kwargs=True, - discard_include_configs_kwargs=True, + discard_include_config_kwargs=True, discard_catch_exceptions_kwargs=True, suppress_warnings=False ): @@ -793,7 +795,7 @@ def save_expectation_suite( discard_result_format_kwargs (boolean): \ If True, the :ref:`result_format` attribute for each expectation is not written to the JSON config \ file. - discard_include_configs_kwargs (boolean): \ + discard_include_config_kwargs (boolean): \ If True, the :ref:`include_config` attribute for each expectation is not written to the JSON config \ file. discard_catch_exceptions_kwargs (boolean): \ @@ -807,7 +809,7 @@ def save_expectation_suite( expectation_suite = self.get_expectation_suite( discard_failed_expectations, discard_result_format_kwargs, - discard_include_configs_kwargs, + discard_include_config_kwargs, discard_catch_exceptions_kwargs, suppress_warnings ) @@ -892,10 +894,10 @@ def validate(self, Raises: AttributeError - if 'catch_exceptions'=None and an expectation throws an AttributeError """ - validate__interactive_evaluation = self._interactive_evaluation - if not self._interactive_evaluation: + validate__interactive_evaluation = self._config.get("interactive_evaluation") + if not validate__interactive_evaluation: # Turn this off for an explicit call to validate - self._interactive_evaluation = True + self._config["interactive_evaluation"] = True # If a different validation data context was provided, override validate__data_context = self._data_context @@ -911,7 +913,7 @@ def validate(self, expectation_suite = self.get_expectation_suite( discard_failed_expectations=False, discard_result_format_kwargs=False, - discard_include_configs_kwargs=False, + discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False, ) elif isinstance(expectation_suite, string_types): @@ -1037,7 +1039,7 @@ def validate(self, result = data_context.register_validation_results(run_id, result, self) self._data_context = validate__data_context - self._interactive_evaluation = validate__interactive_evaluation + self._config["interactive_evaluation"] = validate__interactive_evaluation return result @@ -1109,7 +1111,7 @@ def _build_evaluation_parameters(self, expectation_args, evaluation_parameters): value["$PARAMETER"]] elif evaluation_parameters is not None and value["$PARAMETER"] in evaluation_parameters: evaluation_args[key] = evaluation_parameters[value['$PARAMETER']] - elif not self._interactive_evaluation: + elif not self._config.get("interactive_evaluation", True): pass else: raise KeyError( @@ -1188,11 +1190,11 @@ def _format_map_output( if 0 < result_format.get('partial_unexpected_count'): try: partial_unexpected_counts = [ - {'value': key, 'count': value} - for key, value - in sorted( - Counter(unexpected_list).most_common(result_format['partial_unexpected_count']), - key=lambda x: (-x[1], x[0])) + {'value': key, 'count': value} + for key, value + in sorted( + Counter(unexpected_list).most_common(result_format['partial_unexpected_count']), + key=lambda x: (-x[1], x[0])) ] except TypeError: partial_unexpected_counts = [ @@ -1283,6 +1285,7 @@ def test_expectation_function(self, function, *args, **kwargs): if PY3: argspec = inspect.getfullargspec(function)[0][1:] else: + # noinspection PyDeprecation argspec = inspect.getargspec(function)[0][1:] new_function = self.expectation(argspec)(function) @@ -1319,5 +1322,5 @@ def _calc_validation_statistics(validation_results): evaluated_expectations=evaluated_expectations, unsuccessful_expectations=unsuccessful_expectations, success=success, - success_percent=success_percent, + success_percent=success_percent ) diff --git a/great_expectations/jupyter_ux/__init__.py b/great_expectations/jupyter_ux/__init__.py index 72ac69312f15..6ce9664ade46 100755 --- a/great_expectations/jupyter_ux/__init__.py +++ b/great_expectations/jupyter_ux/__init__.py @@ -1,16 +1,16 @@ """Utility functions for working with great_expectations within jupyter notebooks or jupyter lab. """ -import json -import os import logging -import great_expectations as ge +import sys + import great_expectations.render as render from datetime import datetime import tzlocal from IPython.core.display import display, HTML + def set_data_source(context, data_source_type=None): """ TODO: Needs a docstring and tests. @@ -74,9 +74,12 @@ def set_data_source(context, data_source_type=None): return data_source_name + def setup_notebook_logging(logger=None): - """ - TODO: Needs a docstring and tests. + """Set up the provided logger for the GE default logging configuration. + + Args: + logger - the logger to configure """ def posix2local(timestamp, tz=tzlocal.get_localzone()): @@ -97,18 +100,18 @@ def formatTime(self, record, datefmt=None): return s if not logger: - logger = logging.getLogger() - chandler = logging.StreamHandler() + logger = logging.getLogger(__name__) + chandler = logging.StreamHandler(stream=sys.stdout) chandler.setLevel(logging.DEBUG) - chandler.setFormatter(Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s", "%Y-%m-%dT%H:%M:%S%z")) + # chandler.setFormatter(Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s", "%Y-%m-%dT%H:%M:%S%z")) + chandler.setFormatter(Formatter("%(asctime)s - %(levelname)s - %(message)s", "%Y-%m-%dT%H:%M:%S%z")) logger.addHandler(chandler) - logger.setLevel(logging.ERROR) - # logger.setLevel(logging.INFO) - logging.debug("test") + logger.setLevel(logging.INFO) + # + # # Filter warnings + # import warnings + # warnings.filterwarnings('ignore') - # Filter warnings - import warnings - warnings.filterwarnings('ignore') def list_available_data_asset_names(context, data_source_name=None): """ @@ -196,6 +199,7 @@ def list_available_data_asset_names(context, data_source_name=None): """ + def display_column_expectations_as_section( expectation_suite, column, @@ -235,6 +239,7 @@ def display_column_expectations_as_section( else: display(HTML(html_to_display)) + def display_column_evrs_as_section( evrs, column, @@ -272,3 +277,8 @@ def display_column_evrs_as_section( return html_to_display else: display(HTML(html_to_display)) + + +# When importing the jupyter_ux module, we set up a preferred logging configuration +logger = logging.getLogger("great_expectations") +setup_notebook_logging(logger) \ No newline at end of file diff --git a/great_expectations/profile/basic_dataset_profiler.py b/great_expectations/profile/basic_dataset_profiler.py index 751fd54a7d31..62a830334084 100644 --- a/great_expectations/profile/basic_dataset_profiler.py +++ b/great_expectations/profile/basic_dataset_profiler.py @@ -5,6 +5,7 @@ logger = logging.getLogger(__name__) + class BasicDatasetProfiler(DatasetProfiler): """BasicDatasetProfiler is inspired by the beloved pandas_profiling project. diff --git a/tests/test_jupyter_ux.py b/tests/test_jupyter_ux.py index 662de98c032f..f7b9373e5b30 100644 --- a/tests/test_jupyter_ux.py +++ b/tests/test_jupyter_ux.py @@ -1,7 +1,11 @@ +import logging +import sys + import great_expectations as ge import great_expectations.jupyter_ux as jux from great_expectations.profile.basic_dataset_profiler import BasicDatasetProfiler + def test_styling_elements_exist(): assert "' assert ".cooltip" in jux.cooltip_style_element + def test_display_column_expectations_as_section(basic_expectation_suite): html_to_display = jux.display_column_expectations_as_section( basic_expectation_suite, @@ -115,6 +120,7 @@ def test_display_column_expectations_as_section(basic_expectation_suite): """ + def test_display_column_evrs_as_section(): #TODO: We should add a fixture that contains EVRs df = ge.read_csv("./tests/test_sets/Titanic.csv") @@ -133,3 +139,36 @@ def test_display_column_evrs_as_section(): assert '
' in html_to_display assert 'Carlsson, Mr Frans Olof' in html_to_display assert '
  • expect_column_values_to_be_in_type_list True
  • ' in html_to_display + + +def test_configure_logging(caplog): + # First, ensure we set the root logger to close-to-jupyter settings (only show warnings) + caplog.set_level(logging.WARNING) + caplog.set_level(logging.WARNING, logger="great_expectations") + + root = logging.getLogger() # root logger + root.info("do_not_show") + + # This df is used only for logging; we don't want to test against different backends + df = ge.dataset.PandasDataset({"a": [1, 2, 3]}) + df.expect_column_to_exist("a") + df.get_expectation_suite() + + res = caplog.text + assert "do_not_show" not in res + + assert "expectation_suite" not in res + caplog.clear() + + # Now use the logging setup from the notebook + logger = logging.getLogger("great_expectations") + jux.setup_notebook_logging(logger) + df = ge.dataset.PandasDataset({"a": [1, 2, 3]}) + df.expect_column_to_exist("a") + df.get_expectation_suite() + + root.info("do_not_show") + res = caplog.text + assert "do_not_show" not in res + + assert "expectation_suite" in res From 6c1503367c01c803cdf9dad3ebdf52776f459569 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Wed, 24 Jul 2019 09:06:40 -0400 Subject: [PATCH 2/5] Add note about log configuration when importing jupyter_ux module --- great_expectations/data_asset/data_asset.py | 2 +- great_expectations/jupyter_ux/__init__.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/great_expectations/data_asset/data_asset.py b/great_expectations/data_asset/data_asset.py index 98c55dc37fa3..95af67b133b0 100644 --- a/great_expectations/data_asset/data_asset.py +++ b/great_expectations/data_asset/data_asset.py @@ -713,7 +713,7 @@ def get_expectation_suite(self, message = "%d expectation(s) included in expectation_suite." % len(expectations) if discards["failed_expectations"] > 0 and not suppress_warnings: - message += " Omitting %d expectations that failed on their last run; set " \ + message += " Omitting %d expectation(s) that failed when last run; set " \ "discard_failed_expectations=False to include them." \ % discards["failed_expectations"] diff --git a/great_expectations/jupyter_ux/__init__.py b/great_expectations/jupyter_ux/__init__.py index 6ce9664ade46..fbf4723bb2c3 100755 --- a/great_expectations/jupyter_ux/__init__.py +++ b/great_expectations/jupyter_ux/__init__.py @@ -100,13 +100,14 @@ def formatTime(self, record, datefmt=None): return s if not logger: - logger = logging.getLogger(__name__) + logger = logging.getLogger("great_expectations") chandler = logging.StreamHandler(stream=sys.stdout) chandler.setLevel(logging.DEBUG) # chandler.setFormatter(Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s", "%Y-%m-%dT%H:%M:%S%z")) chandler.setFormatter(Formatter("%(asctime)s - %(levelname)s - %(message)s", "%Y-%m-%dT%H:%M:%S%z")) logger.addHandler(chandler) logger.setLevel(logging.INFO) + logger.info("Great Expectations logging enabled at INFO level by JupyterUX module.") # # # Filter warnings # import warnings @@ -281,4 +282,4 @@ def display_column_evrs_as_section( # When importing the jupyter_ux module, we set up a preferred logging configuration logger = logging.getLogger("great_expectations") -setup_notebook_logging(logger) \ No newline at end of file +setup_notebook_logging(logger) From 53113b3712ba14918c73dc5cc14ec6523e9510e5 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Wed, 24 Jul 2019 09:40:43 -0400 Subject: [PATCH 3/5] Update tests to reflect new name for include_config --- great_expectations/dataset/sparkdf_dataset.py | 2 +- great_expectations/dataset/sqlalchemy_dataset.py | 2 +- tests/test_data_asset.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/great_expectations/dataset/sparkdf_dataset.py b/great_expectations/dataset/sparkdf_dataset.py index 262e07744754..3bc55bee6b17 100644 --- a/great_expectations/dataset/sparkdf_dataset.py +++ b/great_expectations/dataset/sparkdf_dataset.py @@ -193,7 +193,7 @@ def head(self, n=5): discard_failed_expectations=False, discard_result_format_kwargs=False, discard_catch_exceptions_kwargs=False, - discard_include_configs_kwargs=False + discard_include_config_kwargs=False ) ) diff --git a/great_expectations/dataset/sqlalchemy_dataset.py b/great_expectations/dataset/sqlalchemy_dataset.py index f38345cff55a..0616c613e8a9 100644 --- a/great_expectations/dataset/sqlalchemy_dataset.py +++ b/great_expectations/dataset/sqlalchemy_dataset.py @@ -241,7 +241,7 @@ def head(self, n=5): discard_failed_expectations=False, discard_result_format_kwargs=False, discard_catch_exceptions_kwargs=False, - discard_include_configs_kwargs=False + discard_include_config_kwargs=False ) ) diff --git a/tests/test_data_asset.py b/tests/test_data_asset.py index 386b97ad3ca8..cb62afab87fc 100644 --- a/tests/test_data_asset.py +++ b/tests/test_data_asset.py @@ -324,7 +324,7 @@ def test_get_and_save_expectation_config(self): self.assertEqual( df.get_expectation_suite( discard_result_format_kwargs=False, - discard_include_configs_kwargs=False, + discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False, ), output_config, @@ -334,7 +334,7 @@ def test_get_and_save_expectation_config(self): df.save_expectation_suite( directory_name+'/temp3.json', discard_result_format_kwargs=False, - discard_include_configs_kwargs=False, + discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False, ) temp_file = open(directory_name+'/temp3.json') From 0a690a5c2d44bd9ac1666243ae1045db4e94f35e Mon Sep 17 00:00:00 2001 From: James Campbell Date: Wed, 24 Jul 2019 09:42:37 -0400 Subject: [PATCH 4/5] Match travis config in postgres config for test_sqlalchemy_datasource - remove redundant method --- great_expectations/datasource/sqlalchemy_source.py | 12 ++++-------- tests/datasource/test_datasources.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/great_expectations/datasource/sqlalchemy_source.py b/great_expectations/datasource/sqlalchemy_source.py index 6a1a8aefd2db..18036b17ce55 100644 --- a/great_expectations/datasource/sqlalchemy_source.py +++ b/great_expectations/datasource/sqlalchemy_source.py @@ -11,11 +11,10 @@ try: import sqlalchemy - from sqlalchemy import create_engine, MetaData + from sqlalchemy import create_engine except ImportError: sqlalchemy = None create_engine = None - MetaData = None logger.debug("Unable to import sqlalchemy.") @@ -63,7 +62,9 @@ def __init__(self, name="default", data_context=None, profile=None, generators=N # Otherwise, connect using remaining kwargs else: - self._connect(self._get_sqlalchemy_connection_options(**kwargs)) + self.engine = create_engine(self._get_sqlalchemy_connection_options(**kwargs)) + self.engine.connect() + except sqlalchemy.exc.OperationalError as sqlalchemy_error: raise DatasourceInitializationError(self._name, str(sqlalchemy_error)) @@ -82,11 +83,6 @@ def _get_sqlalchemy_connection_options(self, **kwargs): options = sqlalchemy.engine.url.URL(drivername, **credentials) return options - def _connect(self, options): - self.engine = create_engine(options) - self.engine.connect() - self.meta = MetaData() - def _get_generator_class(self, type_): if type_ == "queries": return QueryGenerator diff --git a/tests/datasource/test_datasources.py b/tests/datasource/test_datasources.py index ce9e2fb60e9e..9f88c09598e3 100644 --- a/tests/datasource/test_datasources.py +++ b/tests/datasource/test_datasources.py @@ -107,7 +107,7 @@ def test_create_sqlalchemy_datasource(data_context): type_ = "sqlalchemy" connection_kwargs = { "drivername": "postgresql", - "username": "", + "username": "postgres", "password": "", "host": "localhost", "port": 5432, From 64ab69e1b76b7d58a4916da2cf4bfc79ceacdea6 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Wed, 24 Jul 2019 09:43:07 -0400 Subject: [PATCH 5/5] Add pandas support for _config dict on data_asset --- great_expectations/dataset/pandas_dataset.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/great_expectations/dataset/pandas_dataset.py b/great_expectations/dataset/pandas_dataset.py index 0ea1402ffeab..25f25fce13cb 100644 --- a/great_expectations/dataset/pandas_dataset.py +++ b/great_expectations/dataset/pandas_dataset.py @@ -2,20 +2,16 @@ import inspect import json -import re from datetime import datetime #, timedelta # Add for case of testing timedelta types import logging -import io from datetime import datetime from functools import wraps import jsonschema -import sys import numpy as np import pandas as pd from dateutil.parser import parse from scipy import stats from six import PY3, integer_types, string_types -from numbers import Number from .dataset import Dataset from great_expectations.data_asset.util import DocInherit, parse_result_format @@ -25,6 +21,7 @@ logger = logging.getLogger(__name__) + class MetaPandasDataset(Dataset): """MetaPandasDataset is a thin layer between Dataset and PandasDataset. @@ -292,6 +289,7 @@ class PandasDataset(MetaPandasDataset, pd.DataFrame): _internal_names = pd.DataFrame._internal_names + [ '_batch_kwargs', '_expectation_suite', + '_config', 'caching', 'default_expectation_args', 'discard_subset_failing_expectations' @@ -310,7 +308,7 @@ def __finalize__(self, other, method=None, **kwargs): self._initialize_expectations(other.get_expectation_suite( discard_failed_expectations=False, discard_result_format_kwargs=False, - discard_include_configs_kwargs=False, + discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False)) # If other was coerced to be a PandasDataset (e.g. via _constructor call during self.copy() operation) # then it may not have discard_subset_failing_expectations set. Default to self value