Skip to content

Commit

Permalink
Merge 64597c7 into 74b98fe
Browse files Browse the repository at this point in the history
  • Loading branch information
jcampbell committed Apr 23, 2019
2 parents 74b98fe + 64597c7 commit 2cff9f4
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 130 deletions.
32 changes: 27 additions & 5 deletions great_expectations/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import sys
import os
import argparse
import logging

from great_expectations import read_csv
from great_expectations import __version__
from great_expectations.dataset import PandasDataset
from great_expectations.dataset import Dataset, PandasDataset
from great_expectations.data_asset import FileDataAsset

logger = logging.getLogger(__name__)

def dispatch(args):
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -69,6 +72,7 @@ def validate(parsed_args):
else:
evaluation_parameters = None

# Use a custom dataasset module and class if provided. Otherwise infer from the config.
if parsed_args["custom_dataset_module"]:
sys.path.insert(0, os.path.dirname(
parsed_args["custom_dataset_module"]))
Expand All @@ -77,14 +81,27 @@ def validate(parsed_args):
custom_module = __import__(module_name)
dataset_class = getattr(
custom_module, parsed_args["custom_dataset_class"])

elif "data_asset_type" in expectations_config:
if expectations_config["data_asset_type"]== "PandasDataset":
dataset_class = PandasDataset
elif expectations_config["data_asset_type"].endswith("Dataset"):
logger.info("Using PandasDataset to validate dataset of type %s." % expectations_config["data_asset_type"])
dataset_class = PandasDataset
elif expectations_config["data_asset_type"] == "FileDataAsset":
dataset_class = FileDataAsset
else:
logger.critical("Unrecognized data_asset_type %s" % expectations_config["data_asset_type"])
return -1
else:
dataset_class = PandasDataset

df = read_csv(data_set, expectations_config=expectations_config,
dataset_class=dataset_class)
if issubclass(dataset_class, Dataset):
da = read_csv(data_set, expectations_config=expectations_config,
dataset_class=dataset_class)
else:
da = dataset_class(data_set, config=expectations_config)

result = df.validate(
result = da.validate(
evaluation_parameters=evaluation_parameters,
result_format=parsed_args["result_format"],
catch_exceptions=parsed_args["catch_exceptions"],
Expand All @@ -103,6 +120,11 @@ def version(parsed_args):


def main():
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
return_value = dispatch(sys.argv[1:])
sys.exit(return_value)

Expand Down
30 changes: 24 additions & 6 deletions great_expectations/data_asset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from functools import wraps
import traceback
import warnings
import logging
from six import PY3, string_types
from collections import namedtuple

Expand All @@ -17,6 +18,7 @@
from great_expectations.data_asset.util import DotDict, recursively_convert_to_json_serializable, parse_result_format
from great_expectations.dataset.autoinspect import columns_exist

logger = logging.getLogger("DataAsset")

class DataAsset(object):

Expand All @@ -35,9 +37,11 @@ def __init__(self, *args, **kwargs):
"""
autoinspect_func = kwargs.pop("autoinspect_func", None)
initial_config = kwargs.pop("config", None)
data_asset_name = kwargs.pop("data_asset_name", None)

super(DataAsset, self).__init__(*args, **kwargs)
self._initialize_expectations()
self._initialize_expectations(config=initial_config, data_asset_name=data_asset_name)
if autoinspect_func is not None:
autoinspect_func(self)

Expand Down Expand Up @@ -198,7 +202,7 @@ def wrapper(self, *args, **kwargs):

return outer_wrapper

def _initialize_expectations(self, config=None, name=None):
def _initialize_expectations(self, config=None, data_asset_name=None):
"""Instantiates `_expectations_config` as empty by default or with a specified expectation `config`.
In addition, this always sets the `default_expectation_args` to:
`include_config`: False,
Expand All @@ -209,13 +213,16 @@ def _initialize_expectations(self, config=None, name=None):
config (json): \
A json-serializable expectation config. \
If None, creates default `_expectations_config` with an empty list of expectations and \
key value `data_asset_name` as `name`.
key value `data_asset_name` as `data_asset_name`.
name (string): \
data_asset_name (string): \
The name to assign to `_expectations_config.data_asset_name` if `config` is not provided.
"""
if config != None:
if "data_asset_type" in config and config["data_asset_type"] != self.__class__.__name__:
logger.warning("Building data asset of type %s but config is of type %s" % \
(config["data_asset_type"], self.__class__.__name__))
#!!! Should validate the incoming config with jsonschema here

# Copy the original so that we don't overwrite it by accident
Expand All @@ -224,14 +231,17 @@ def _initialize_expectations(self, config=None, name=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
self._expectations_config = DotDict(copy.deepcopy(config))
if data_asset_name is not None:
self._expectations_config["data_asset_name"] = data_asset_name

else:
# Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it
# since we are subclassing.
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
self._expectations_config = DotDict({
"data_asset_name": name,
"data_asset_name": data_asset_name,
"data_asset_type": self.__class__.__name__,
"meta": {
"great_expectations.__version__": __version__
},
Expand Down Expand Up @@ -730,7 +740,7 @@ def validate(self, expectations_config=None, evaluation_parameters=None, catch_e
If True, the returned results include the config information associated with each expectation, if \
it exists.
only_return_failures (boolean): \
If True, expectation results are only returned when ``success = False``\.
If True, expectation results are only returned when ``success = False`` \
Returns:
A JSON-formatted dictionary containing a list of the validation results. \
Expand Down Expand Up @@ -912,6 +922,14 @@ def set_evaluation_parameter(self, parameter_name, parameter_value):
self._expectations_config['evaluation_parameters'].update(
{parameter_name: parameter_value})

def set_data_asset_name(self, data_asset_name):
"""Sets the name of this data_asset as stored in the expectations configuration."""
self._expectations_config['data_asset_name'] = data_asset_name

def get_data_asset_name(self):
"""Gets the current name of this data_asset as stored in the expectations configuration."""
return self._expectations_config['data_asset_name']

def _build_evaluation_parameters(self, expectation_args, evaluation_parameters):
"""Build a dictionary of parameters to evaluate, using the provided evaluation_paramters,
AND mutate expectation_args by removing any parameter values passed in as temporary values during
Expand Down

0 comments on commit 2cff9f4

Please sign in to comment.