Merge branch 'develop' into bugfixes/tests-rob

great-expectations · Aug 19, 2019 · 99344d4 · 99344d4
2 parents b64944f + 0486990
commit 99344d4
Show file tree

Hide file tree

Showing 39 changed files with 1,207 additions and 448 deletions.
diff --git a/docs/roadmap_and_changelog/changelog.rst b/docs/roadmap_and_changelog/changelog.rst
@@ -2,7 +2,20 @@
 
 v.0.7.7
 -----------------
-
+* Fix databricks generator (thanks @sspitz3!)
+* Add support for reader_method = "delta" to SparkDFDatasource
+* Standardize the way that plugin module loading works. DataContext will begin to use the new-style class and plugin
+identification moving forward; yml configs should specify class_name and module_name (with module_name optional for
+GE types). For now, it is possible to use the "type" parameter in configuration (as before).
+* Add support for custom data_asset_type to all datasources
+* Fix several memory and performance issues in SparkDFDataset.
+ - Use only distinct value count instead of bringing values to driver
+ - Migrate away from UDF for set membership, nullity, and regex expectations
+* Fix several UI issues in the data_documentation
+ - Broken link on Home
+ - Scroll follows navigation properly
+* Add support for strict_min and strict_max to inequality-based expectations to allow strict inequality checks
+(thanks @RoyalTS!)
 
 v.0.7.6
 -----------------

diff --git a/great_expectations/data_asset/data_asset.py b/great_expectations/data_asset/data_asset.py
@@ -959,7 +959,31 @@ def validate(self,
             warnings.warn(
                 "WARNING: No great_expectations version found in configuration object.")
 
-        for expectation in expectation_suite['expectations']:
+
+
+        ###
+        # This is an early example of what will become part of the ValidationOperator
+        # This operator would be dataset-semantic aware
+        # Adding now to simply ensure we can be slightly better at ordering our expectation evaluation
+        ###
+
+        # Group expectations by column
+        columns = {}
+
+        for expectation in expectation_suite["expectations"]:
+            if "column" in expectation["kwargs"]:
+                column = expectation["kwargs"]["column"]
+            else:
+                column = "_nocolumn"
+            if column not in columns:
+                columns[column] = []
+            columns[column].append(expectation)
+
+        expectations_to_evaluate = []
+        for col in columns:
+            expectations_to_evaluate.extend(columns[col])
+
+        for expectation in expectations_to_evaluate:
 
             try:
                 expectation_method = getattr(

diff --git a/great_expectations/data_context/types/__init__.py b/great_expectations/data_context/types/__init__.py
@@ -0,0 +1 @@
+from .configurations import ClassConfig
diff --git a/great_expectations/data_context/types/configurations.py b/great_expectations/data_context/types/configurations.py
@@ -0,0 +1,18 @@
+from ruamel.yaml import YAML, yaml_object
+from great_expectations.types import LooselyTypedDotDict
+yaml = YAML()
+
+
+@yaml_object(yaml)
+class ClassConfig(LooselyTypedDotDict):
+    _allowed_keys = {
+        "module_name",
+        "class_name"
+    }
+    _required_keys = {
+        "class_name"
+    }
+    _key_types = {
+        "module_name": str,
+        "class_name": str
+    }
diff --git a/great_expectations/data_context/util.py b/great_expectations/data_context/util.py
@@ -6,6 +6,7 @@
 import json
 import errno
 from collections import namedtuple
+import six
 
 logger = logging.getLogger(__name__)
 
@@ -121,6 +122,11 @@ def send_slack_notification(validation_json=None):
 
 def safe_mmkdir(directory, exist_ok=True):
     """Simple wrapper since exist_ok is not available in python 2"""
+    if not isinstance(directory, six.string_types):
+        raise TypeError("directory must be of type str, not {0}".format({
+            "directory_type": str(type(directory))
+        }))
+
     if not exist_ok:
         raise ValueError(
             "This wrapper should only be used for exist_ok=True; it is designed to make porting easier later")