Skip to content

Commit

Permalink
Merge pull request #487 from great-expectations/uncontroversial-cli-c…
Browse files Browse the repository at this point in the history
…hanges

Uncontroversial cli changes
  • Loading branch information
abegong committed Jun 11, 2019
2 parents 2e0dfed + fb7f278 commit 0d710db
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 64 deletions.
22 changes: 8 additions & 14 deletions great_expectations/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,26 +140,20 @@ def init(target_directory):
appends to a `.gitignore` file.
"""

#!!! This injects a version tag into the docs. We should test that those versioned docs exist in RTD.
greeting_1 = """
Welcome to Great Expectations! Always know what to expect from your data.
Always know what to expect from your data.
When you develop data pipelines, ML models, ETLs and other data products,
Great Expectations helps you express what you expect your data to look like
(e.g., "column X should not have more than 5% null values").
It produces tests and documentation.
If you're new to Great Expectations, this tutorial is a good place to start:
When your data product runs in production,
Great Expectations uses the tests that you created to validate data and protect
your code against data that it was not written to deal with.
"""
https://great-expectations.readthedocs.io/en/v%s/intro.html#how-do-i-get-started
""" % __version__

msg_prompt_lets_begin = """
Let's add Great Expectations to your project.
We will add great_expectations directory that will look like that:
Let's add Great Expectations to your project, by scaffolding a new great_expectations directory:
great_expectations
├── great_expectations.yml
├── great_expectations.yml
├── datasources
├── expectations
├── fixtures
Expand Down Expand Up @@ -272,7 +266,7 @@ def init(target_directory):

_scaffold_directories_and_notebooks(base_dir)
cli_message(
"\nDone. Later you can check out great_expectations/great_expectations.yml config file for useful options.",
"\nDone.",
color="blue")

context = DataContext('.')
Expand Down
10 changes: 9 additions & 1 deletion great_expectations/cli/supporting_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@

from ..util import safe_mmkdir


def _scaffold_directories_and_notebooks(base_dir):
#!!! FIXME: Check to see if the directory already exists. If it does, refuse with:
# `great_expectations/` already exists.
# If you're certain you want to re-initialize Great Expectations within this project,
# please delete the whole `great_expectations/` directory and run `great_expectations init` again.

safe_mmkdir(base_dir, exist_ok=True)
notebook_dir_name = "notebooks"

Expand All @@ -14,13 +20,15 @@ def _scaffold_directories_and_notebooks(base_dir):
safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)

for uncommitted_directory in ["validations", "credentials", "samples"]:
safe_mmkdir(os.path.join(base_dir, "uncommitted", uncommitted_directory), exist_ok=True)
safe_mmkdir(os.path.join(base_dir, "uncommitted",
uncommitted_directory), exist_ok=True)

for notebook in glob.glob(script_relative_path("../init_notebooks/*.ipynb")):
notebook_name = os.path.basename(notebook)
shutil.copyfile(notebook, os.path.join(
base_dir, notebook_dir_name, notebook_name))


def script_relative_path(file_path):
'''
Useful for testing with local files. Use a path relative to where the
Expand Down
54 changes: 33 additions & 21 deletions great_expectations/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

logger = logging.getLogger(__name__)


def _convert_to_dataset_class(df, dataset_class, expectations_config=None, autoinspect_func=None):
"""
Convert a (pandas) dataframe to a great_expectations dataset, with (optional) expectations_config
Expand Down Expand Up @@ -138,11 +139,11 @@ def read_parquet(
return df


def from_pandas(pandas_df,
def from_pandas(pandas_df,
dataset_class=dataset.pandas_dataset.PandasDataset,
expectations_config=None,
expectations_config=None,
autoinspect_func=None
):
):
"""Read a Pandas data frame and return a great_expectations dataset.
Args:
Expand All @@ -163,17 +164,20 @@ class to which to convert resulting Pandas df
autoinspect_func
)


def validate(data_asset, expectations_config=None, data_asset_name=None, data_context=None, data_asset_type=None, *args, **kwargs):
"""Validate the provided data asset using the provided config"""
if expectations_config is None and data_context is None:
raise ValueError("Either an expectations config or a DataContext is required for validation.")
raise ValueError(
"Either an expectations config or a DataContext is required for validation.")

if expectations_config is None:
logger.info("Using expectations config from DataContext.")
expectations_config = data_context.get_expectations(data_asset_name)
else:
if data_asset_name in expectations_config:
logger.info("Using expectations config with name %s" % expectations_config["data_asset_name"])
logger.info("Using expectations config with name %s" %
expectations_config["data_asset_name"])
else:
logger.info("Using expectations config with no data_asset_name")

Expand All @@ -190,15 +194,18 @@ def validate(data_asset, expectations_config=None, data_asset_name=None, data_co
# Otherwise, we will convert for the user to a subclass of the
# existing class to enable new expectations, but only for datasets
if not isinstance(data_asset, (dataset.Dataset, pd.DataFrame)):
raise ValueError("The validate util method only supports dataset validations, including custom subclasses. For other data asset types, use the object's own validate method.")
raise ValueError(
"The validate util method only supports dataset validations, including custom subclasses. For other data asset types, use the object's own validate method.")

if not issubclass(type(data_asset), data_asset_type):
if isinstance(data_asset, (pd.DataFrame)) and issubclass(data_asset_type, dataset.PandasDataset):
pass # This is a special type of allowed coercion
pass # This is a special type of allowed coercion
else:
raise ValueError("The validate util method only supports validation for subtypes of the provided data_asset_type.")
raise ValueError(
"The validate util method only supports validation for subtypes of the provided data_asset_type.")

data_asset_ = _convert_to_dataset_class(data_asset, data_asset_type, expectations_config)
data_asset_ = _convert_to_dataset_class(
data_asset, data_asset_type, expectations_config)
return data_asset_.validate(*args, data_context=data_context, **kwargs)


Expand All @@ -209,12 +216,12 @@ def build_slack_notification_request(validation_json=None):
run_id = None
data_asset_name = "no_name_provided_" + str(uuid.uuid4())
title_block = {
"type": "section",
"text": {
"type": "mrkdwn",
"text": "No validation occurred. Please ensure you passed a validation_json.",
},
}
"type": "section",
"text": {
"type": "mrkdwn",
"text": "No validation occurred. Please ensure you passed a validation_json.",
},
}

query = {"blocks": [title_block]}

Expand All @@ -225,15 +232,18 @@ def build_slack_notification_request(validation_json=None):
n_checks_succeeded = validation_json["statistics"]["successful_expectations"]
n_checks = validation_json["statistics"]["evaluated_expectations"]
run_id = validation_json["meta"].get("run_id", None)
check_details_text = "{} of {} expectations were met\n\n".format(n_checks_succeeded, n_checks)
check_details_text = "{} of {} expectations were met\n\n".format(
n_checks_succeeded, n_checks)

if validation_json["success"]:
status = "Success :tada:"

query["blocks"][0]["text"]["text"] = "*Validated batch from data asset:* `{}`\n*Status: {}*\n{}".format(data_asset_name, status, check_details_text)
query["blocks"][0]["text"]["text"] = "*Validated batch from data asset:* `{}`\n*Status: {}*\n{}".format(
data_asset_name, status, check_details_text)
if "batch_kwargs" in validation_json["meta"]:
query["blocks"][1]["text"]["text"] = "Batch kwargs: {}".format(json.dumps(validation_json["meta"]["batch_kwargs"], indent=2))

query["blocks"][1]["text"]["text"] = "Batch kwargs: {}".format(
json.dumps(validation_json["meta"]["batch_kwargs"], indent=2))

if "result_reference" in validation_json["meta"]:
report_element = {
"type": "section",
Expand Down Expand Up @@ -305,10 +315,12 @@ def __getattr__(self, attr):
def __dir__(self):
return self.keys()

def safe_mmkdir(directory, exist_ok=True): #exist_ok is always true; it's ignored, but left here to make porting later easier

# exist_ok is always true; it's ignored, but left here to make porting later easier
def safe_mmkdir(directory, exist_ok=True):
"""Simple wrapper since exist_ok is not available in python 2"""
try:
os.makedirs(directory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
raise
2 changes: 1 addition & 1 deletion great_expectations/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.1__develop__sch_internal"
__version__ = "0.7.0-beta"
97 changes: 70 additions & 27 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from __future__ import unicode_literals # Since our cli produces unicode output, but we want tests in python2 as well
# Since our cli produces unicode output, but we want tests in python2 as well
from __future__ import unicode_literals

from click.testing import CliRunner
import great_expectations.version
from great_expectations.cli import cli
import tempfile
import pytest
import json
import os
import shutil
Expand All @@ -11,13 +17,7 @@
from unittest import mock
except ImportError:
import mock
import pytest
import tempfile

from great_expectations.cli import cli
import great_expectations.version

from click.testing import CliRunner

def test_cli_command_entrance():
runner = CliRunner()
Expand All @@ -38,6 +38,7 @@ def test_cli_command_entrance():
validate Validate a CSV file against an expectations configuration.
"""


def test_cli_command_bad_command():
runner = CliRunner()

Expand Down Expand Up @@ -88,17 +89,19 @@ def test_cli_validate_help():
during evaluation.
--help Show this message and exit.
""".replace(" ", "").replace("\t", "").replace("\n", "")
output = str(result.output).replace(" ", "").replace("\t", "").replace("\n", "")
output = str(result.output).replace(
" ", "").replace("\t", "").replace("\n", "")
assert output == expected_help_message


def test_cli_validate_missing_positional_arguments():
runner = CliRunner()

result = runner.invoke(cli, ["validate"])

assert "Error: Missing argument \"DATASET\"." in str(result.output)


def test_cli_version():
runner = CliRunner()

Expand All @@ -112,7 +115,7 @@ def test_validate_basic_operation():
runner = CliRunner()
with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
result = runner.invoke(cli, ["validate", "./tests/test_sets/Titanic.csv",
"./tests/test_sets/titanic_expectations.json"])
"./tests/test_sets/titanic_expectations.json"])

assert result.exit_code == 1
json_result = json.loads(str(result.output))
Expand All @@ -130,14 +133,14 @@ def test_validate_custom_dataset():
runner = CliRunner()
with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
result = runner.invoke(cli, ["validate",
"./tests/test_sets/Titanic.csv",
"./tests/test_sets/titanic_custom_expectations.json",
"-f", "True",
"-m", "./tests/test_fixtures/custom_dataset.py",
"-c", "CustomPandasDataset"])
"./tests/test_sets/Titanic.csv",
"./tests/test_sets/titanic_custom_expectations.json",
"-f", "True",
"-m", "./tests/test_fixtures/custom_dataset.py",
"-c", "CustomPandasDataset"])

json_result = json.loads(result.output)

del json_result["meta"]["great_expectations.__version__"]
del json_result["results"][0]["result"]['partial_unexpected_counts']
with open('./tests/test_sets/expected_cli_results_custom.json', 'r') as f:
Expand All @@ -150,34 +153,74 @@ def test_cli_evaluation_parameters(capsys):
with pytest.warns(UserWarning, match="No great_expectations version found in configuration object."):
runner = CliRunner()
result = runner.invoke(cli, ["validate",
"./tests/test_sets/Titanic.csv",
"./tests/test_sets/titanic_parameterized_expectations.json",
"--evaluation_parameters",
"./tests/test_sets/titanic_evaluation_parameters.json",
"-f", "True"])
"./tests/test_sets/Titanic.csv",
"./tests/test_sets/titanic_parameterized_expectations.json",
"--evaluation_parameters",
"./tests/test_sets/titanic_evaluation_parameters.json",
"-f", "True"])
json_result = json.loads(result.output)


with open('./tests/test_sets/titanic_evaluation_parameters.json', 'r') as f:
expected_evaluation_parameters = json.load(f)

assert json_result['evaluation_parameters'] == expected_evaluation_parameters


def test_cli_init(tmp_path_factory):
basedir = tmp_path_factory.mktemp("test_cli_init_diff")
basedir = str(basedir)
os.makedirs(os.path.join(basedir, "data"))
curdir = os.path.abspath(os.getcwd())
os.chdir(basedir)
runner = CliRunner()
result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(os.path.join(basedir, "data")))
result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(
os.path.join(basedir, "data")))

assert """Welcome to Great Expectations! Always know what to expect from your data.""" in result.output
print(result.output)

assert """Always know what to expect from your data.""" in result.output

assert os.path.isdir(os.path.join(basedir, "great_expectations"))
assert os.path.isfile(os.path.join(basedir, "great_expectations/great_expectations.yml"))
config = yaml.load(open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
assert os.path.isfile(os.path.join(
basedir, "great_expectations/great_expectations.yml"))
config = yaml.load(
open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
assert config["datasources"]["data"]["type"] == "pandas"

os.chdir(curdir)
os.chdir(curdir)

# def test_cli_render(tmp_path_factory):
# runner = CliRunner()
# result = runner.invoke(cli, ["render"])

# print(result)
# print(result.output)
# assert False


# def test_cli_profile(tmp_path_factory):
# runner = CliRunner()
# result = runner.invoke(cli, ["profile"])

# print(result)
# assert False

# # basedir = tmp_path_ factory.mktemp("test_cli_init_diff")
# # basedir = str(basedir)
# # os.makedirs(os.path.join(basedir, "data"))
# # curdir = os.path.abspath(os.getcwd())
# # os.chdir(basedir)
# # runner = CliRunner()
# # result = runner.invoke(cli, ["init"], input="Y\n1\n%s\n\n" % str(
# # os.path.join(basedir, "data")))

# # assert """Welcome to Great Expectations! Always know what to expect from your data.""" in result.output

# # assert os.path.isdir(os.path.join(basedir, "great_expectations"))
# # assert os.path.isfile(os.path.join(
# # basedir, "great_expectations/great_expectations.yml"))
# # config = yaml.load(
# # open(os.path.join(basedir, "great_expectations/great_expectations.yml"), "r"))
# # assert config["datasources"]["data"]["type"] == "pandas"

# # os.chdir(curdir)

0 comments on commit 0d710db

Please sign in to comment.