Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/feature/new_renderer' into featu…
Browse files Browse the repository at this point in the history
…re/new_renderer-rob
  • Loading branch information
jcampbell committed Jul 3, 2019
2 parents b5756a5 + 5432ded commit 5dbe616
Show file tree
Hide file tree
Showing 15 changed files with 244 additions and 492 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Documentation build
docs/source/_build

# Render test output
tests/render/output/*

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
15 changes: 8 additions & 7 deletions great_expectations/cli/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@

from .util import cli_message
from great_expectations.render import DefaultJinjaPageView
from great_expectations.version import __version__ as __version__


def add_datasource(context):
cli_message(
"""
========== Datasources ==========
See <blue>https://great-expectations.readthedocs.io/en/latest/core_concepts/datasource.html?src=cli</blue> for more information about datasources.
"""
See <blue>https://docs.greatexpectations.io/en/latest/core_concepts/datasource.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue> for more information about datasources.
""".format(__version__.replace(".", "_"))
)
data_source_selection = click.prompt(
msg_prompt_choose_data_source,
Expand Down Expand Up @@ -122,14 +123,14 @@ def add_datasource(context):
"""
========== Profiling ==========
Would you like to profile '%s' to create candidate expectations and documentation?
Would you like to profile '{0:s}' to create candidate expectations and documentation?
Please note: As of v0.7.0, profiling is still a beta feature in Great Expectations.
This generation of profilers will evaluate the entire data source (without sampling) and may be very time consuming.
As a rule of thumb, we recommend starting with data smaller than 100MB.
To learn more about profiling, visit <blue>https://great-expectations.readthedocs.io/en/latest/guides/profiling.html?src=cli</blue>.
""" % (data_source_name)
To learn more about profiling, visit <blue>https://docs.greatexpectations.io/en/latest/guides/profiling.html?utm_source=cli&utm_medium=init&utm_campaign={1:s}</blue>.
""".format(data_source_name, __version__.replace(".", "_"))
)
if click.confirm("Proceed?",
default=True
Expand Down Expand Up @@ -157,8 +158,8 @@ def add_datasource(context):
Before committing, please make sure that this data does not contain sensitive information!
To learn more: <blue>https://great-expectations.readthedocs.io/en/latest/guides/data_documentation?src=cli</blue>
"""
To learn more: <blue>https://docs.greatexpectations.io/en/latest/guides/data_documentation.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue>
""".format(__version__.replace(".", "_"))
)
if click.confirm("Move the profiled data?",
default=True
Expand Down
6 changes: 3 additions & 3 deletions great_expectations/cli/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import shutil

from great_expectations.data_context.util import safe_mmkdir
from great_expectations import __version__
from great_expectations import __version__ as __version__


def script_relative_path(file_path):
Expand Down Expand Up @@ -52,8 +52,8 @@ def scaffold_directories_and_notebooks(base_dir):
If you're new to Great Expectations, this tutorial is a good place to start:
<blue>https://great-expectations.readthedocs.io/en/latest/getting_started.html?src=cli</blue>
"""
<blue>https://docs.greatexpectations.io/en/latest/getting_started.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue>
""".format(__version__.replace(".", "_"))

msg_prompt_lets_begin = """
Let's add Great Expectations to your project, by scaffolding a new great_expectations directory:
Expand Down
2 changes: 1 addition & 1 deletion great_expectations/data_context/data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,7 @@ def profile_datasource(self,
Returns:
List of (expectation_suite, EVR) tuples for each of the data_assets found in the datasource
"""
logger.info("\nProfiling '%s' with '%s'" % (datasource_name, profiler.__name__))
logger.info("Profiling '%s' with '%s'" % (datasource_name, profiler.__name__))
profiling_results = []
data_asset_names = self.get_available_data_asset_names(datasource_name)
if generator_name is None:
Expand Down
24 changes: 0 additions & 24 deletions great_expectations/render/base.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import datetime

from .content_block import ContentBlockRenderer
from ...util import ordinal


def substitute_none_for_missing(kwargs, kwarg_list):
Expand Down Expand Up @@ -76,14 +76,13 @@ def expect_column_to_exist(cls, expectation, styling=None, include_column_name=T
["column", "column_index"],
)

if params["column_index"] == None:
if params["column_index"] is None:
if include_column_name:
template_str = "$column is a required field."
else:
template_str = "is a required field."
else:
#!!! FIXME: this works for 4th, 5th, 6th, etc, but is dumb about 1th, 2th, and 3th.
params["column_indexth"] = str(params["column_index"])+"th"
params["column_indexth"] = ordinal(params["column_index"])
if include_column_name:
template_str = "$column must be the $column_indexth field"
else:
Expand Down Expand Up @@ -282,10 +281,9 @@ def expect_column_pair_values_to_be_equal(cls, expectation, styling=None, includ
# NOTE: This renderer doesn't do anything with "ignore_row_if"

if (params["column_A"] is None) or (params["column_B"] is None):
# FIXME: this string is wrong
template_str = " has a bogus $expectation_name expectation."
template_str = " unrecognized kwargs for expect_column_pair_values_to_be_equal: missing column."

if params["mostly"] == None:
if params["mostly"] is None:
template_str = "Values in $column_A and $column_B must always be equal."

else:
Expand All @@ -307,16 +305,17 @@ def expect_table_columns_to_match_ordered_list(cls, expectation, styling=None, i
["column_list"]
)

if params["column_list"] == None:
# NOTE: I don't know how I feel about these underspecified expectation messages.
if params["column_list"] is None:
template_str = "This table should have a list of columns in a specific order, but that order is not specified."

else:
# FIXME: This is slightly wrong, since the whole string (including commas) will get syntax highlighting.
# It would be better to have each element highlighted separately.
# See `expect_column_distinct_values_to_be_in_set`
params["column_list_str"] = ", ".join(params["column_list"])
template_str = "This table should have these columns in this order: $column_list_str"
template_str = "This table should have these columns in this order: "
for idx in range(len(params["column_list"]) - 1):
template_str += "$column_list_" + str(idx) + ", "
params["column_list_" + str(idx)] = params["column_list"][idx]

template_str += "$column_list_" + str(idx+1)
params["column_list_" + str(idx+1)] = params["column_list"][idx+1]

return [{
"template": template_str,
Expand All @@ -331,10 +330,13 @@ def expect_multicolumn_values_to_be_unique(cls, expectation, styling=None, inclu
["column_list", "ignore_row_if"]
)

# FIXME: This is slightly wrong, since the whole string (including commas) will get syntax highlighting.
# It would be better to have each element highlighted separately, but I need to research methods to do this elegantly.
params["column_list_str"] = ", ".join(params["column_list"])
template_str = "Values must always be unique across columns: $column_list_str"
template_str = "Values must always be unique across columns: "
for idx in range(len(params["column_list"]) - 1):
template_str += "$column_list_" + str(idx) + ", "
params["column_list_" + str(idx)] = params["column_list"][idx]

template_str += "$column_list_" + str(idx + 1)
params["column_list_" + str(idx + 1)] = params["column_list"][idx + 1]

return [{
"template": template_str,
Expand Down Expand Up @@ -381,13 +383,12 @@ def expect_table_row_count_to_equal(cls, expectation, styling=None, include_colu

@classmethod
def expect_column_distinct_values_to_be_in_set(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", "value_set"],
)

if params["value_set"] == None:
if params["value_set"] is None:

if include_column_name:
template_str = "$column values must belong to a set, but that set is not specified."
Expand Down Expand Up @@ -415,7 +416,6 @@ def expect_column_distinct_values_to_be_in_set(cls, expectation, styling=None, i

@classmethod
def expect_column_values_to_not_match_regex(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", "regex", "mostly"],
Expand All @@ -434,7 +434,6 @@ def expect_column_values_to_not_match_regex(cls, expectation, styling=None, incl

@classmethod
def expect_column_values_to_not_be_null(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", "mostly"],
Expand All @@ -453,17 +452,23 @@ def expect_column_values_to_not_be_null(cls, expectation, styling=None, include_

@classmethod
def expect_column_proportion_of_unique_values_to_be_between(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", "min_value", "max_value"],
)

if include_column_name:
template_str = "$column must have between $min_value and $max_value% unique values."
if params["min_value"] is None and params["max_value"] is None:
template_str = "may have any percentage of unique values."
elif params["min_value"] is None:
template_str = "must have no more than $max_value% unique values."
elif params["max_value"] is None:
template_str = "must have at least $min_value% unique values."
else:
template_str = "must have between $min_value and $max_value% unique values."

if include_column_name:
template_str = "$column " + template_str

return [{
"template": template_str,
"params": params,
Expand All @@ -472,7 +477,6 @@ def expect_column_proportion_of_unique_values_to_be_between(cls, expectation, st

@classmethod
def expect_column_values_to_be_unique(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", ],
Expand All @@ -491,7 +495,6 @@ def expect_column_values_to_be_unique(cls, expectation, styling=None, include_co

@classmethod
def expect_column_values_to_be_in_type_list(cls, expectation, styling=None, include_column_name=True):
# TODO: thoroughly review this method. It was implemented quickly and hackily.
params = substitute_none_for_missing(
expectation["kwargs"],
["column", "type_list", "mostly"],
Expand Down
Empty file.
20 changes: 11 additions & 9 deletions great_expectations/render/renderer/page_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class PrescriptivePageRenderer(Renderer):
def render(cls, expectations):
# Group expectations by column
columns = {}
ordered_columns = None
for expectation in expectations["expectations"]:
if "column" in expectation["kwargs"]:
column = expectation["kwargs"]["column"]
Expand All @@ -23,8 +24,15 @@ def render(cls, expectations):
columns[column] = []
columns[column].append(expectation)

# TODO: in general, there should be a mechanism for imposing order here.
ordered_columns = list(columns.keys())
# if possible, get the order of columns from expect_table_columns_to_match_ordered_list
if expectation["expectation_type"] == "expect_table_columns_to_match_ordered_list":
exp_column_list = expectation["kwargs"]["column_list"]
if exp_column_list and len(exp_column_list) > 0:
ordered_columns = exp_column_list

# if no order of colums is expected, sort alphabetically
if not ordered_columns:
ordered_columns = sorted(list(columns.keys()))

return {
"renderer_type": "PrescriptivePageRenderer",
Expand All @@ -50,15 +58,9 @@ def render(cls, validation_results):
columns[column] = []
columns[column].append(evr)

# TODO: in general, there should be a mechanism for imposing order here.
ordered_columns = list(columns.keys())

ordered_columns = Renderer._get_column_list_from_evrs(validation_results)
column_types = DescriptiveOverviewSectionRenderer._get_column_types(validation_results)

# FIXME: This is a hack to limit output on one training file
# if "Reporting Area" in ordered_columns:
# ordered_columns = ["Reporting Area"]

if "data_asset_name" in validation_results["meta"] and validation_results["meta"]["data_asset_name"]:
data_asset_name = validation_results["meta"]["data_asset_name"].split(
'/')[-1]
Expand Down
23 changes: 19 additions & 4 deletions great_expectations/render/renderer/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,26 @@ def _find_all_evrs_by_type(cls, evrs, type_, column_=None):

@classmethod
def _get_column_list_from_evrs(cls, evrs):
# Group EVRs by column
columns = list(set([evr["expectation_config"]["kwargs"]["column"] for evr in evrs["results"] if "column" in evr["expectation_config"]["kwargs"]]))
"""
Get list of column names.
# TODO: in general, there should be a mechanism for imposing order here.
ordered_columns = columns
If expect_table_columns_to_match_ordered_list EVR is present, use it as the list, including the order.
Otherwise, get the list of all columns mentioned in the expectations and order it alphabetically.
:param evrs:
:return: list of columns with best effort sorting
"""
evrs_ = evrs["results"] if "results" in evrs else evrs

expect_table_columns_to_match_ordered_list_evr = cls._find_evr_by_type(evrs_, "expect_table_columns_to_match_ordered_list")
if expect_table_columns_to_match_ordered_list_evr:
ordered_columns = expect_table_columns_to_match_ordered_list_evr["result"]["observed_value"]
else:
# Group EVRs by column
columns = list(set([evr["expectation_config"]["kwargs"]["column"] for evr in evrs_ if "column" in evr["expectation_config"]["kwargs"]]))

ordered_columns = sorted(columns)

return ordered_columns

15 changes: 15 additions & 0 deletions great_expectations/render/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Rendering utility"""

SUFFIXES = {1: 'st', 2: 'nd', 3: 'rd'}


def ordinal(num):
"""Convert a number to ordinal"""
# Taken from https://codereview.stackexchange.com/questions/41298/producing-ordinal-numbers/41301
# Consider a library like num2word when internationalization comes
if 10 <= num % 100 <= 20:
suffix = 'th'
else:
# the second parameter is a default.
suffix = SUFFIXES.get(num % 10, 'th')
return str(num) + suffix

0 comments on commit 5dbe616

Please sign in to comment.