Skip to content

Commit

Permalink
Merge branch 'develop' into bugfixes/tests-rob
Browse files Browse the repository at this point in the history
  • Loading branch information
roblim committed Aug 19, 2019
2 parents b64944f + 0486990 commit 99344d4
Show file tree
Hide file tree
Showing 39 changed files with 1,207 additions and 448 deletions.
15 changes: 14 additions & 1 deletion docs/roadmap_and_changelog/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,20 @@

v.0.7.7
-----------------

* Fix databricks generator (thanks @sspitz3!)
* Add support for reader_method = "delta" to SparkDFDatasource
* Standardize the way that plugin module loading works. DataContext will begin to use the new-style class and plugin
identification moving forward; yml configs should specify class_name and module_name (with module_name optional for
GE types). For now, it is possible to use the "type" parameter in configuration (as before).
* Add support for custom data_asset_type to all datasources
* Fix several memory and performance issues in SparkDFDataset.
- Use only distinct value count instead of bringing values to driver
- Migrate away from UDF for set membership, nullity, and regex expectations
* Fix several UI issues in the data_documentation
- Broken link on Home
- Scroll follows navigation properly
* Add support for strict_min and strict_max to inequality-based expectations to allow strict inequality checks
(thanks @RoyalTS!)

v.0.7.6
-----------------
Expand Down
26 changes: 25 additions & 1 deletion great_expectations/data_asset/data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,31 @@ def validate(self,
warnings.warn(
"WARNING: No great_expectations version found in configuration object.")

for expectation in expectation_suite['expectations']:


###
# This is an early example of what will become part of the ValidationOperator
# This operator would be dataset-semantic aware
# Adding now to simply ensure we can be slightly better at ordering our expectation evaluation
###

# Group expectations by column
columns = {}

for expectation in expectation_suite["expectations"]:
if "column" in expectation["kwargs"]:
column = expectation["kwargs"]["column"]
else:
column = "_nocolumn"
if column not in columns:
columns[column] = []
columns[column].append(expectation)

expectations_to_evaluate = []
for col in columns:
expectations_to_evaluate.extend(columns[col])

for expectation in expectations_to_evaluate:

try:
expectation_method = getattr(
Expand Down
1 change: 1 addition & 0 deletions great_expectations/data_context/types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .configurations import ClassConfig
18 changes: 18 additions & 0 deletions great_expectations/data_context/types/configurations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ruamel.yaml import YAML, yaml_object
from great_expectations.types import LooselyTypedDotDict
yaml = YAML()


@yaml_object(yaml)
class ClassConfig(LooselyTypedDotDict):
_allowed_keys = {
"module_name",
"class_name"
}
_required_keys = {
"class_name"
}
_key_types = {
"module_name": str,
"class_name": str
}
6 changes: 6 additions & 0 deletions great_expectations/data_context/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import errno
from collections import namedtuple
import six

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -121,6 +122,11 @@ def send_slack_notification(validation_json=None):

def safe_mmkdir(directory, exist_ok=True):
"""Simple wrapper since exist_ok is not available in python 2"""
if not isinstance(directory, six.string_types):
raise TypeError("directory must be of type str, not {0}".format({
"directory_type": str(type(directory))
}))

if not exist_ok:
raise ValueError(
"This wrapper should only be used for exist_ok=True; it is designed to make porting easier later")
Expand Down

0 comments on commit 99344d4

Please sign in to comment.