Merge pull request #487 from great-expectations/uncontroversial-cli-c…

…hanges Uncontroversial cli changes
great-expectations · Jun 11, 2019 · 0d710db · 0d710db
2 parents 2e0dfed + fb7f278
commit 0d710db
Show file tree

Hide file tree

Showing 5 changed files with 121 additions and 64 deletions.
diff --git a/great_expectations/cli/cli.py b/great_expectations/cli/cli.py
@@ -140,26 +140,20 @@ def init(target_directory):
     appends to a `.gitignore` file.
     """
 
+    #!!! This injects a version tag into the docs. We should test that those versioned docs exist in RTD.
     greeting_1 = """
-Welcome to Great Expectations! Always know what to expect from your data.
+Always know what to expect from your data.
 
-When you develop data pipelines, ML models, ETLs and other data products, 
-Great Expectations helps you express what you expect your data to look like 
-(e.g., "column X should not have more than 5% null values"). 
-It produces tests and documentation.
+If you're new to Great Expectations, this tutorial is a good place to start:
 
-When your data product runs in production, 
-Great Expectations uses the tests that you created to validate data and protect 
-your code against data that it was not written to deal with.
-
-    """
+    https://great-expectations.readthedocs.io/en/v%s/intro.html#how-do-i-get-started
+    """ % __version__
 
     msg_prompt_lets_begin = """
-Let's add Great Expectations to your project. 
-We will add great_expectations directory that will look like that: 
+Let's add Great Expectations to your project, by scaffolding a new great_expectations directory:
 
     great_expectations
-        ├── great_expectations.yml        
+        ├── great_expectations.yml
         ├── datasources
         ├── expectations
         ├── fixtures
@@ -272,7 +266,7 @@ def init(target_directory):
 
     _scaffold_directories_and_notebooks(base_dir)
     cli_message(
-        "\nDone. Later you can check out great_expectations/great_expectations.yml config file for useful options.",
+        "\nDone.",
         color="blue")
 
     context = DataContext('.')

diff --git a/great_expectations/cli/supporting_methods.py b/great_expectations/cli/supporting_methods.py
@@ -4,7 +4,13 @@
 
 from ..util import safe_mmkdir
 
+
 def _scaffold_directories_and_notebooks(base_dir):
+    #!!! FIXME: Check to see if the directory already exists. If it does, refuse with:
+    # `great_expectations/` already exists.
+    # If you're certain you want to re-initialize Great Expectations within this project,
+    # please delete the whole `great_expectations/` directory and run `great_expectations init` again.
+
     safe_mmkdir(base_dir, exist_ok=True)
     notebook_dir_name = "notebooks"
 
@@ -14,13 +20,15 @@ def _scaffold_directories_and_notebooks(base_dir):
         safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)
 
     for uncommitted_directory in ["validations", "credentials", "samples"]:
-        safe_mmkdir(os.path.join(base_dir, "uncommitted", uncommitted_directory), exist_ok=True)
+        safe_mmkdir(os.path.join(base_dir, "uncommitted",
+                                 uncommitted_directory), exist_ok=True)
 
     for notebook in glob.glob(script_relative_path("../init_notebooks/*.ipynb")):
         notebook_name = os.path.basename(notebook)
         shutil.copyfile(notebook, os.path.join(
             base_dir, notebook_dir_name, notebook_name))
 
+
 def script_relative_path(file_path):
     '''
     Useful for testing with local files. Use a path relative to where the

diff --git a/great_expectations/util.py b/great_expectations/util.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def _convert_to_dataset_class(df, dataset_class, expectations_config=None, autoinspect_func=None):
     """
     Convert a (pandas) dataframe to a great_expectations dataset, with (optional) expectations_config
@@ -138,11 +139,11 @@ def read_parquet(
     return df
 
 
-def from_pandas(pandas_df, 
+def from_pandas(pandas_df,
                 dataset_class=dataset.pandas_dataset.PandasDataset,
-                expectations_config=None, 
+                expectations_config=None,
                 autoinspect_func=None
-):
+                ):
     """Read a Pandas data frame and return a great_expectations dataset.
 
     Args:
@@ -163,17 +164,20 @@ class to which to convert resulting Pandas df
         autoinspect_func
     )
 
+
 def validate(data_asset, expectations_config=None, data_asset_name=None, data_context=None, data_asset_type=None, *args, **kwargs):
     """Validate the provided data asset using the provided config"""
     if expectations_config is None and data_context is None:
-        raise ValueError("Either an expectations config or a DataContext is required for validation.")
+        raise ValueError(
+            "Either an expectations config or a DataContext is required for validation.")
 
     if expectations_config is None:
         logger.info("Using expectations config from DataContext.")
         expectations_config = data_context.get_expectations(data_asset_name)
     else:
         if data_asset_name in expectations_config:
-            logger.info("Using expectations config with name %s" % expectations_config["data_asset_name"])
+            logger.info("Using expectations config with name %s" %
+                        expectations_config["data_asset_name"])
         else:
             logger.info("Using expectations config with no data_asset_name")
 
@@ -190,15 +194,18 @@ def validate(data_asset, expectations_config=None, data_asset_name=None, data_co
     # Otherwise, we will convert for the user to a subclass of the
     # existing class to enable new expectations, but only for datasets
     if not isinstance(data_asset, (dataset.Dataset, pd.DataFrame)):
-        raise ValueError("The validate util method only supports dataset validations, including custom subclasses. For other data asset types, use the object's own validate method.")
+        raise ValueError(
+            "The validate util method only supports dataset validations, including custom subclasses. For other data asset types, use the object's own validate method.")
 
     if not issubclass(type(data_asset), data_asset_type):
         if isinstance(data_asset, (pd.DataFrame)) and issubclass(data_asset_type, dataset.PandasDataset):
-            pass # This is a special type of allowed coercion
+            pass  # This is a special type of allowed coercion
         else:
-            raise ValueError("The validate util method only supports validation for subtypes of the provided data_asset_type.")
+            raise ValueError(
+                "The validate util method only supports validation for subtypes of the provided data_asset_type.")
 
-    data_asset_ = _convert_to_dataset_class(data_asset, data_asset_type, expectations_config)
+    data_asset_ = _convert_to_dataset_class(
+        data_asset, data_asset_type, expectations_config)
     return data_asset_.validate(*args, data_context=data_context, **kwargs)
 
 
@@ -209,12 +216,12 @@ def build_slack_notification_request(validation_json=None):
     run_id = None
     data_asset_name = "no_name_provided_" + str(uuid.uuid4())
     title_block = {
-               "type": "section",
-               "text": {
-                   "type": "mrkdwn",
-                   "text": "No validation occurred. Please ensure you passed a validation_json.",
-               },
-           }
+        "type": "section",
+        "text": {
+            "type": "mrkdwn",
+            "text": "No validation occurred. Please ensure you passed a validation_json.",
+        },
+    }
 
     query = {"blocks": [title_block]}
 
@@ -225,15 +232,18 @@ def build_slack_notification_request(validation_json=None):
         n_checks_succeeded = validation_json["statistics"]["successful_expectations"]
         n_checks = validation_json["statistics"]["evaluated_expectations"]
         run_id = validation_json["meta"].get("run_id", None)
-        check_details_text = "{} of {} expectations were met\n\n".format(n_checks_succeeded, n_checks)
+        check_details_text = "{} of {} expectations were met\n\n".format(
+            n_checks_succeeded, n_checks)
 
         if validation_json["success"]:
             status = "Success :tada:"
 
-        query["blocks"][0]["text"]["text"] = "*Validated batch from data asset:* `{}`\n*Status: {}*\n{}".format(data_asset_name, status, check_details_text)
+        query["blocks"][0]["text"]["text"] = "*Validated batch from data asset:* `{}`\n*Status: {}*\n{}".format(
+            data_asset_name, status, check_details_text)
         if "batch_kwargs" in validation_json["meta"]:
-            query["blocks"][1]["text"]["text"] = "Batch kwargs: {}".format(json.dumps(validation_json["meta"]["batch_kwargs"], indent=2))
-
+            query["blocks"][1]["text"]["text"] = "Batch kwargs: {}".format(
+                json.dumps(validation_json["meta"]["batch_kwargs"], indent=2))
+
         if "result_reference" in validation_json["meta"]:
             report_element = {
                 "type": "section",
@@ -305,10 +315,12 @@ def __getattr__(self, attr):
     def __dir__(self):
         return self.keys()
 
-def safe_mmkdir(directory, exist_ok=True): #exist_ok is  always true; it's ignored, but left here to make porting later easier
+
+# exist_ok is always true; it's ignored, but left here to make porting later easier
+def safe_mmkdir(directory, exist_ok=True):
     """Simple wrapper since exist_ok is not available in python 2"""
     try:
         os.makedirs(directory)
     except OSError as e:
         if e.errno != errno.EEXIST:
-            raise
+            raise
diff --git a/great_expectations/version.py b/great_expectations/version.py
@@ -1 +1 @@
-__version__ = "0.6.1__develop__sch_internal"
+__version__ = "0.7.0-beta"
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,5 +1,11 @@
-from __future__ import unicode_literals # Since our cli produces unicode output, but we want tests in python2 as well
+# Since our cli produces unicode output, but we want tests in python2 as well
+from __future__ import unicode_literals
 
+from click.testing import CliRunner
+import great_expectations.version
+from great_expectations.cli import cli
+import tempfile
+import pytest
 import json
 import os
 import shutil
@@ -11,13 +17,7 @@
     from unittest import mock
 except ImportError:
     import mock
-import pytest
-import tempfile
 
-from great_expectations.cli import cli
-import great_expectations.version
-
-from click.testing import CliRunner
 
 def test_cli_command_entrance():
     runner = CliRunner()
@@ -38,6 +38,7 @@ def test_cli_command_entrance():
   validate  Validate a CSV file against an expectations configuration.
 """
 
+
 def test_cli_command_bad_command():
     runner = CliRunner()
 
@@ -88,17 +89,19 @@ def test_cli_validate_help():
                                   during evaluation.
   --help                          Show this message and exit.
 """.replace(" ", "").replace("\t", "").replace("\n", "")
-    output = str(result.output).replace(" ", "").replace("\t", "").replace("\n", "")
+    output = str(result.output).replace(
+        " ", "").replace("\t", "").replace("\n", "")
     assert output == expected_help_message
 
 
 def test_cli_validate_missing_positional_arguments():
     runner = CliRunner()
-    
+
     result = runner.invoke(cli, ["validate"])
 
     assert "Error: Missing argument \"DATASET\"." in str(result.output)
 
+
 def test_cli_version():
     runner = CliRunner()
 
@@ -112,7 +115,7 @@ def test_validate_basic_operation():
         runner = CliRunner()
         with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
             result = runner.invoke(cli, ["validate", "./tests/test_sets/Titanic.csv",
-                                        "./tests/test_sets/titanic_expectations.json"])
+                                         "./tests/test_sets/titanic_expectations.json"])
 
             assert result.exit_code == 1
             json_result = json.loads(str(result.output))
@@ -130,14 +133,14 @@ def test_validate_custom_dataset():
         runner = CliRunner()
         with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
             result = runner.invoke(cli, ["validate",
-                                             "./tests/test_sets/Titanic.csv",
-                                             "./tests/test_sets/titanic_custom_expectations.json",
-                                             "-f", "True",
-                                             "-m", "./tests/test_fixtures/custom_dataset.py",
-                                             "-c", "CustomPandasDataset"])
+                                         "./tests/test_sets/Titanic.csv",
+                                         "./tests/test_sets/titanic_custom_expectations.json",
+                                         "-f", "True",
+                                         "-m", "./tests/test_fixtures/custom_dataset.py",
+                                         "-c", "CustomPandasDataset"])
 
             json_result = json.loads(result.output)
-           
+
     del json_result["meta"]["great_expectations.__version__"]
     del json_result["results"][0]["result"]['partial_unexpected_counts']
     with open('./tests/test_sets/expected_cli_results_custom.json', 'r') as f:
@@ -150,34 +153,74 @@ def test_cli_evaluation_parameters(capsys):
     with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
         runner = CliRunner()
         result = runner.invoke(cli, ["validate",
-                                         "./tests/test_sets/Titanic.csv",
-                                         "./tests/test_sets/titanic_parameterized_expectations.json",
-                                         "--evaluation_parameters",
-                                         "./tests/test_sets/titanic_evaluation_parameters.json",
-                                         "-f", "True"])
+                                     "./tests/test_sets/Titanic.csv",
+                                     "./tests/test_sets/titanic_parameterized_expectations.json",
+                                     "--evaluation_parameters",
+                                     "./tests/test_sets/titanic_evaluation_parameters.json",
+                                     "-f", "True"])
         json_result = json.loads(result.output)
 
-
     with open('./tests/test_sets/titanic_evaluation_parameters.json', 'r') as f:
         expected_evaluation_parameters = json.load(f)
 
     assert json_result['evaluation_parameters'] == expected_evaluation_parameters
 
+
 def test_cli_init(tmp_path_factory):
     basedir = tmp_path_factory.mktemp("test_cli_init_diff")
     basedir = str(basedir)
     os.makedirs(os.path.join(basedir, "data"))
     curdir = os.path.abspath(os.getcwd())
     os.chdir(basedir)
     runner = CliRunner()
-    result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(os.path.join(basedir, "data")))
+    result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(
+        os.path.join(basedir, "data")))
 
-    assert """Welcome to Great Expectations! Always know what to expect from your data.""" in result.output
+    print(result.output)
 
+    assert """Always know what to expect from your data.""" in result.output
 
     assert os.path.isdir(os.path.join(basedir, "great_expectations"))
-    assert os.path.isfile(os.path.join(basedir, "great_expectations/great_expectations.yml"))
-    config = yaml.load(open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
+    assert os.path.isfile(os.path.join(
+        basedir, "great_expectations/great_expectations.yml"))
+    config = yaml.load(
+        open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
     assert config["datasources"]["data"]["type"] == "pandas"
 
-    os.chdir(curdir)
+    os.chdir(curdir)
+
+# def test_cli_render(tmp_path_factory):
+#     runner = CliRunner()
+#     result = runner.invoke(cli, ["render"])
+
+#     print(result)
+#     print(result.output)
+#     assert False
+
+
+# def test_cli_profile(tmp_path_factory):
+#     runner = CliRunner()
+#     result = runner.invoke(cli, ["profile"])
+
+#     print(result)
+#     assert False
+
+#     # basedir = tmp_path_ factory.mktemp("test_cli_init_diff")
+#     # basedir = str(basedir)
+#     # os.makedirs(os.path.join(basedir, "data"))
+#     # curdir = os.path.abspath(os.getcwd())
+#     # os.chdir(basedir)
+#     # runner = CliRunner()
+#     # result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(
+#     #     os.path.join(basedir, "data")))
+
+#     # assert """Welcome to Great Expectations! Always know what to expect from your data.""" in result.output
+
+#     # assert os.path.isdir(os.path.join(basedir, "great_expectations"))
+#     # assert os.path.isfile(os.path.join(
+#     #     basedir, "great_expectations/great_expectations.yml"))
+#     # config = yaml.load(
+#     #     open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
+#     # assert config["datasources"]["data"]["type"] == "pandas"
+
+#     # os.chdir(curdir)