Merge 64597c7 into 74b98fe

great-expectations · Apr 23, 2019 · 2cff9f4 · 2cff9f4
2 parents 74b98fe + 64597c7
commit 2cff9f4
Show file tree

Hide file tree

Showing 8 changed files with 168 additions and 130 deletions.
diff --git a/great_expectations/cli.py b/great_expectations/cli.py
@@ -2,11 +2,14 @@
 import sys
 import os
 import argparse
+import logging
 
 from great_expectations import read_csv
 from great_expectations import __version__
-from great_expectations.dataset import PandasDataset
+from great_expectations.dataset import Dataset, PandasDataset
+from great_expectations.data_asset import FileDataAsset
 
+logger = logging.getLogger(__name__)
 
 def dispatch(args):
     parser = argparse.ArgumentParser(
@@ -69,6 +72,7 @@ def validate(parsed_args):
     else:
         evaluation_parameters = None
 
+    # Use a custom dataasset module and class if provided. Otherwise infer from the config.
     if parsed_args["custom_dataset_module"]:
         sys.path.insert(0, os.path.dirname(
             parsed_args["custom_dataset_module"]))
@@ -77,14 +81,27 @@ def validate(parsed_args):
         custom_module = __import__(module_name)
         dataset_class = getattr(
             custom_module, parsed_args["custom_dataset_class"])
-
+    elif "data_asset_type" in expectations_config:
+        if expectations_config["data_asset_type"]== "PandasDataset":
+            dataset_class = PandasDataset
+        elif expectations_config["data_asset_type"].endswith("Dataset"):
+            logger.info("Using PandasDataset to validate dataset of type %s." % expectations_config["data_asset_type"])
+            dataset_class = PandasDataset
+        elif expectations_config["data_asset_type"] == "FileDataAsset":
+            dataset_class = FileDataAsset
+        else:
+            logger.critical("Unrecognized data_asset_type %s" % expectations_config["data_asset_type"])
+            return -1
     else:
         dataset_class = PandasDataset
 
-    df = read_csv(data_set, expectations_config=expectations_config,
-                  dataset_class=dataset_class)
+    if issubclass(dataset_class, Dataset):
+        da = read_csv(data_set, expectations_config=expectations_config,
+                    dataset_class=dataset_class)
+    else:
+        da = dataset_class(data_set, config=expectations_config)
 
-    result = df.validate(
+    result = da.validate(
         evaluation_parameters=evaluation_parameters,
         result_format=parsed_args["result_format"],
         catch_exceptions=parsed_args["catch_exceptions"],
@@ -103,6 +120,11 @@ def version(parsed_args):
 
 
 def main():
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.setLevel(logging.INFO)
     return_value = dispatch(sys.argv[1:])
     sys.exit(return_value)
 

diff --git a/great_expectations/data_asset/base.py b/great_expectations/data_asset/base.py
@@ -5,6 +5,7 @@
 from functools import wraps
 import traceback
 import warnings
+import logging
 from six import PY3, string_types
 from collections import namedtuple
 
@@ -17,6 +18,7 @@
 from great_expectations.data_asset.util import DotDict, recursively_convert_to_json_serializable, parse_result_format
 from great_expectations.dataset.autoinspect import columns_exist
 
+logger = logging.getLogger("DataAsset")
 
 class DataAsset(object):
 
@@ -35,9 +37,11 @@ def __init__(self, *args, **kwargs):
 
         """
         autoinspect_func = kwargs.pop("autoinspect_func", None)
+        initial_config = kwargs.pop("config", None)
+        data_asset_name = kwargs.pop("data_asset_name", None)
 
         super(DataAsset, self).__init__(*args, **kwargs)
-        self._initialize_expectations()
+        self._initialize_expectations(config=initial_config, data_asset_name=data_asset_name)
         if autoinspect_func is not None:
             autoinspect_func(self)
 
@@ -198,7 +202,7 @@ def wrapper(self, *args, **kwargs):
 
         return outer_wrapper
 
-    def _initialize_expectations(self, config=None, name=None):
+    def _initialize_expectations(self, config=None, data_asset_name=None):
         """Instantiates `_expectations_config` as empty by default or with a specified expectation `config`.
         In addition, this always sets the `default_expectation_args` to:
             `include_config`: False,
@@ -209,13 +213,16 @@ def _initialize_expectations(self, config=None, name=None):
             config (json): \
                 A json-serializable expectation config. \
                 If None, creates default `_expectations_config` with an empty list of expectations and \
-                key value `data_asset_name` as `name`.
+                key value `data_asset_name` as `data_asset_name`.
 
-            name (string): \
+            data_asset_name (string): \
                 The name to assign to `_expectations_config.data_asset_name` if `config` is not provided.
 
         """
         if config != None:
+            if "data_asset_type" in config and config["data_asset_type"] != self.__class__.__name__:
+                logger.warning("Building data asset of type %s but config is of type %s" % \
+                    (config["data_asset_type"], self.__class__.__name__))
             #!!! Should validate the incoming config with jsonschema here
 
             # Copy the original so that we don't overwrite it by accident
@@ -224,14 +231,17 @@ def _initialize_expectations(self, config=None, name=None):
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=UserWarning)
                 self._expectations_config = DotDict(copy.deepcopy(config))
+                if data_asset_name is not None:
+                    self._expectations_config["data_asset_name"] = data_asset_name
 
         else:
             # Pandas incorrectly interprets this as an attempt to create a column and throws up a warning. Suppress it
             # since we are subclassing.
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=UserWarning)
                 self._expectations_config = DotDict({
-                    "data_asset_name": name,
+                    "data_asset_name": data_asset_name,
+                    "data_asset_type": self.__class__.__name__,
                     "meta": {
                         "great_expectations.__version__": __version__
                     },
@@ -730,7 +740,7 @@ def validate(self, expectations_config=None, evaluation_parameters=None, catch_e
                     If True, the returned results include the config information associated with each expectation, if \
                     it exists.
                 only_return_failures (boolean): \
-                    If True, expectation results are only returned when ``success = False``\.
+                    If True, expectation results are only returned when ``success = False`` \
 
             Returns:
                 A JSON-formatted dictionary containing a list of the validation results. \
@@ -912,6 +922,14 @@ def set_evaluation_parameter(self, parameter_name, parameter_value):
         self._expectations_config['evaluation_parameters'].update(
             {parameter_name: parameter_value})
 
+    def set_data_asset_name(self, data_asset_name):
+        """Sets the name of this data_asset as stored in the expectations configuration."""
+        self._expectations_config['data_asset_name'] = data_asset_name
+
+    def get_data_asset_name(self):
+        """Gets the current name of this data_asset as stored in the expectations configuration."""
+        return self._expectations_config['data_asset_name']
+
     def _build_evaluation_parameters(self, expectation_args, evaluation_parameters):
         """Build a dictionary of parameters to evaluate, using the provided evaluation_paramters,
         AND mutate expectation_args by removing any parameter values passed in as temporary values during