Skip to content

Commit

Permalink
Merge fac8b34 into cb2060a
Browse files Browse the repository at this point in the history
  • Loading branch information
jcampbell committed Apr 23, 2019
2 parents cb2060a + fac8b34 commit 92be4a2
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 50 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ For quick, hands-on introductions to Great Expectations' key features, check out
What's the best way to get in touch with the Great Expectations team?
--------------------------------------------------------------------------------

[Issues on GitHub](https://github.com/great-expectations/great_expectations/issues). If you have questions, comments, feature requests, etc., [opening an issue](https://github.com/great-expectations/great_expectations/issues/new) is definitely the best path forward.
If you have questions, comments, feature requests, etc., [opening an issue](https://github.com/great-expectations/great_expectations/issues/new) is definitely the best path forward. We also have a slack channel: if you emal us at <team@greatexpectations.io> with the subject line "SLACK" we'll get you an invite.


Great Expectations doesn't do X. Is it right for my use case?
Expand Down
32 changes: 27 additions & 5 deletions great_expectations/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import sys
import os
import argparse
import logging

from great_expectations import read_csv
from great_expectations import __version__
from great_expectations.dataset import PandasDataset
from great_expectations.dataset import Dataset, PandasDataset
from great_expectations.data_asset import FileDataAsset

logger = logging.getLogger(__name__)

def dispatch(args):
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -69,6 +72,7 @@ def validate(parsed_args):
else:
evaluation_parameters = None

# Use a custom dataasset module and class if provided. Otherwise infer from the config.
if parsed_args["custom_dataset_module"]:
sys.path.insert(0, os.path.dirname(
parsed_args["custom_dataset_module"]))
Expand All @@ -77,14 +81,27 @@ def validate(parsed_args):
custom_module = __import__(module_name)
dataset_class = getattr(
custom_module, parsed_args["custom_dataset_class"])

elif "data_asset_type" in expectations_config:
if expectations_config["data_asset_type"] == "Dataset" or expectations_config["data_asset_type"] == "PandasDataset":
dataset_class = PandasDataset
elif expectations_config["data_asset_type"].endswith("Dataset"):
logger.info("Using PandasDataset to validate dataset of type %s." % expectations_config["data_asset_type"])
dataset_class = PandasDataset
elif expectations_config["data_asset_type"] == "FileDataAsset":
dataset_class = FileDataAsset
else:
logger.critical("Unrecognized data_asset_type %s. You may need to specifcy custom_dataset_module and custom_dataset_class." % expectations_config["data_asset_type"])
return -1
else:
dataset_class = PandasDataset

df = read_csv(data_set, expectations_config=expectations_config,
dataset_class=dataset_class)
if issubclass(dataset_class, Dataset):
da = read_csv(data_set, expectations_config=expectations_config,
dataset_class=dataset_class)
else:
da = dataset_class(data_set, config=expectations_config)

result = df.validate(
result = da.validate(
evaluation_parameters=evaluation_parameters,
result_format=parsed_args["result_format"],
catch_exceptions=parsed_args["catch_exceptions"],
Expand All @@ -103,6 +120,11 @@ def version(parsed_args):


def main():
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
return_value = dispatch(sys.argv[1:])
sys.exit(return_value)

Expand Down
31 changes: 25 additions & 6 deletions great_expectations/data_asset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from functools import wraps
import traceback
import warnings
import logging
from six import PY3, string_types
from collections import namedtuple

Expand All @@ -17,6 +18,7 @@
from great_expectations.data_asset.util import DotDict, recursively_convert_to_json_serializable, parse_result_format
from great_expectations.dataset.autoinspect import columns_exist

logger = logging.getLogger("DataAsset")

class DataAsset(object):

Expand All @@ -35,9 +37,11 @@ def __init__(self, *args, **kwargs):
"""
autoinspect_func = kwargs.pop("autoinspect_func", None)
initial_config = kwargs.pop("config", None)
data_asset_name = kwargs.pop("data_asset_name", None)

super(DataAsset, self).__init__(*args, **kwargs)
self._initialize_expectations()
self._initialize_expectations(config=initial_config, data_asset_name=data_asset_name)
if autoinspect_func is not None:
autoinspect_func(self)

Expand Down Expand Up @@ -198,20 +202,24 @@ def wrapper(self, *args, **kwargs):

return outer_wrapper

def _initialize_expectations(self, config=None, name=None):
def _initialize_expectations(self, config=None, data_asset_name=None):
"""Instantiates `_expectations_config` as empty by default or with a specified expectation `config`.
In addition, this always sets the `default_expectation_args` to:
`include_config`: False,
`catch_exceptions`: False,
`output_format`: 'BASIC'
By default, initializes data_asset_type to the name of the implementing class, but subclasses
that have interoperable semantics (e.g. Dataset) may override that parameter to clarify their
interoperability.
Args:
config (json): \
A json-serializable expectation config. \
If None, creates default `_expectations_config` with an empty list of expectations and \
key value `data_asset_name` as `name`.
key value `data_asset_name` as `data_asset_name`.
name (string): \
data_asset_name (string): \
The name to assign to `_expectations_config.data_asset_name` if `config` is not provided.
"""
Expand All @@ -224,14 +232,17 @@ def _initialize_expectations(self, config=None, name=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
self._expectations_config = DotDict(copy.deepcopy(config))
if data_asset_name is not None:
self._expectations_config["data_asset_name"] = data_asset_name

else:
# Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it
# since we are subclassing.
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
self._expectations_config = DotDict({
"data_asset_name": name,
"data_asset_name": data_asset_name,
"data_asset_type": self.__class__.__name__,
"meta": {
"great_expectations.__version__": __version__
},
Expand Down Expand Up @@ -730,7 +741,7 @@ def validate(self, expectations_config=None, evaluation_parameters=None, catch_e
If True, the returned results include the config information associated with each expectation, if \
it exists.
only_return_failures (boolean): \
If True, expectation results are only returned when ``success = False``\.
If True, expectation results are only returned when ``success = False`` \
Returns:
A JSON-formatted dictionary containing a list of the validation results. \
Expand Down Expand Up @@ -912,6 +923,14 @@ def set_evaluation_parameter(self, parameter_name, parameter_value):
self._expectations_config['evaluation_parameters'].update(
{parameter_name: parameter_value})

def set_data_asset_name(self, data_asset_name):
"""Sets the name of this data_asset as stored in the expectations configuration."""
self._expectations_config['data_asset_name'] = data_asset_name

def get_data_asset_name(self):
"""Gets the current name of this data_asset as stored in the expectations configuration."""
return self._expectations_config['data_asset_name']

def _build_evaluation_parameters(self, expectation_args, evaluation_parameters):
"""Build a dictionary of parameters to evaluate, using the provided evaluation_paramters,
AND mutate expectation_args by removing any parameter values passed in as temporary values during
Expand Down
33 changes: 20 additions & 13 deletions great_expectations/data_asset/file_data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def file_lines_map_expectation(cls, func):
@wraps(func)
def inner_wrapper(self, skip=None, mostly=None, null_lines_regex=r"^\s*$", result_format=None, *args, **kwargs):
try:
f = open(self.path, "r")
f = open(self._path, "r")
except:
raise

Expand Down Expand Up @@ -139,10 +139,9 @@ class FileDataAsset(MetaFileDataAsset):
"""


def __init__(self, file_path, *args, **kwargs):
def __init__(self, file_path=None, *args, **kwargs):
super(FileDataAsset, self).__init__(*args, **kwargs)
self.path = file_path

self._path = file_path

@MetaFileDataAsset.file_lines_map_expectation
def expect_file_line_regex_match_count_to_be_between(self,
Expand Down Expand Up @@ -375,7 +374,7 @@ def expect_file_hash_to_equal(self, value, hash_alg='md5', result_format=None,
# Limit file reads to 64 KB chunks at a time
BLOCKSIZE = 65536
try:
with open(self.path, 'rb') as file:
with open(self._path, 'rb') as file:
file_buffer = file.read(BLOCKSIZE)
while file_buffer:
hash.update(file_buffer)
Expand Down Expand Up @@ -424,7 +423,7 @@ def expect_file_size_to_be_between(self, minsize, maxsize, result_format=None,

success = False
try:
size = os.path.getsize(self.path)
size = os.path.getsize(self._path)
except OSError:
raise

Expand All @@ -448,13 +447,18 @@ def expect_file_size_to_be_between(self, minsize, maxsize, result_format=None,

return {"success":success}

@DataAsset.expectation([])
def expect_file_to_exist(self, result_format=None, include_config=False,
@DataAsset.expectation(["filepath"])
def expect_file_to_exist(self, filepath=None, result_format=None, include_config=False,
catch_exceptions=None, meta=None):

"""
Checks to see if a file specified by the user actually exists
Args:
filepath (str or None): \
The filepath to evalutate. If none, will check the currently-configured path object
of this FileDataAsset.
Keyword Args:
result_format (str or None): \
Expand All @@ -481,9 +485,12 @@ def expect_file_to_exist(self, result_format=None, include_config=False,
:ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.
"""

success = False
if os.path.isfile(self.path):
if filepath is not None and os.path.isfile(filepath):
success = True
elif self._path is not None and os.path.isfile(self._path):
success = True
else:
success = False

return {"success":success}

Expand Down Expand Up @@ -537,7 +544,7 @@ def expect_file_to_have_valid_table_header(self, regex, skip=None,
success = False

try:
with open(self.path, 'r') as f:
with open(self._path, 'r') as f:
lines = f.readlines() #Read in file lines

except IOError:
Expand Down Expand Up @@ -596,7 +603,7 @@ def expect_file_to_be_valid_json(self, schema=None, result_format=None,
success = False
if schema is None:
try:
with open(self.path, 'r') as f:
with open(self._path, 'r') as f:
json.load(f)
success = True
except ValueError:
Expand All @@ -606,7 +613,7 @@ def expect_file_to_be_valid_json(self, schema=None, result_format=None,
with open(schema, 'r') as s:
schema_data = s.read()
sdata = json.loads(schema_data)
with open(self.path, 'r') as f:
with open(self._path, 'r') as f:
json_data = f.read()
jdata = json.loads(json_data)
jsonschema.validate(jdata, sdata)
Expand Down
6 changes: 6 additions & 0 deletions great_expectations/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ class Dataset(DataAsset):
def __init__(self, *args, **kwargs):
super(Dataset, self).__init__(*args, **kwargs)

def _initialize_expectations(self, config=None, data_asset_name=None):
"""Override data_asset_type with "Dataset"
"""
super(Dataset, self)._initialize_expectations(config=config, data_asset_name=data_asset_name)
self._expectations_config["data_asset_type"] = "Dataset"

@classmethod
def column_map_expectation(cls, func):
"""Constructs an expectation using column-map semantics.
Expand Down
31 changes: 6 additions & 25 deletions tests/test_data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,11 @@ def test_data_asset(self):
D._expectations_config,
{
"data_asset_name": None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
},
"expectations": []
# No longer expect autoinspection 20180920
# {
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "x", 'result_format': 'BASIC'},
# 'success_on_last_run': True
# },{
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "y", 'result_format': 'BASIC'},
# 'success_on_last_run': True
# },{
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "z", 'result_format': 'BASIC'},
# 'success_on_last_run': True
# }]
}
)

Expand All @@ -58,21 +45,11 @@ def test_data_asset(self):
D.get_expectations_config(),
{
"data_asset_name": None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
},
"expectations": []
# No longer expect autoinspection 20180920
# {
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "x"}
# },{
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "y"}
# },{
# "expectation_type" : "expect_column_to_exist",
# "kwargs" : { "column" : "z"}
# }]
}
)

Expand Down Expand Up @@ -189,6 +166,7 @@ def test_get_and_save_expectation_config(self):
}
],
"data_asset_name": None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
}
Expand Down Expand Up @@ -261,6 +239,7 @@ def test_get_and_save_expectation_config(self):
}
],
"data_asset_name": None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
}
Expand Down Expand Up @@ -332,6 +311,7 @@ def test_get_and_save_expectation_config(self):
}
],
"data_asset_name": None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
}
Expand Down Expand Up @@ -996,6 +976,7 @@ def test_remove_expectation(self):
}
],
'data_asset_name': None,
"data_asset_type": "Dataset",
"meta": {
"great_expectations.__version__": ge.__version__
}
Expand Down

0 comments on commit 92be4a2

Please sign in to comment.