Merge remote-tracking branch 'origin/feature/new_renderer' into featu…

…re/new_renderer-rob
great-expectations · Jul 3, 2019 · 5dbe616 · 5dbe616
2 parents b5756a5 + 5432ded
commit 5dbe616
Show file tree

Hide file tree

Showing 15 changed files with 244 additions and 492 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 # Documentation build
 docs/source/_build
 
+# Render test output
+tests/render/output/*
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/great_expectations/cli/datasource.py b/great_expectations/cli/datasource.py
@@ -3,15 +3,16 @@
 
 from .util import cli_message
 from great_expectations.render import DefaultJinjaPageView
+from great_expectations.version import __version__ as __version__
 
 
 def add_datasource(context):
     cli_message(
         """
 ========== Datasources ==========
 
-See <blue>https://great-expectations.readthedocs.io/en/latest/core_concepts/datasource.html?src=cli</blue> for more information about datasources.
-"""
+See <blue>https://docs.greatexpectations.io/en/latest/core_concepts/datasource.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue> for more information about datasources.
+""".format(__version__.replace(".", "_"))
     )
     data_source_selection = click.prompt(
         msg_prompt_choose_data_source,
@@ -122,14 +123,14 @@ def add_datasource(context):
             """
 ========== Profiling ==========
 
-Would you like to profile '%s' to create candidate expectations and documentation?
+Would you like to profile '{0:s}' to create candidate expectations and documentation?
 
 Please note: As of v0.7.0, profiling is still a beta feature in Great Expectations.  
 This generation of profilers will evaluate the entire data source (without sampling) and may be very time consuming. 
 As a rule of thumb, we recommend starting with data smaller than 100MB.
 
-To learn more about profiling, visit <blue>https://great-expectations.readthedocs.io/en/latest/guides/profiling.html?src=cli</blue>.
-            """ % (data_source_name)
+To learn more about profiling, visit <blue>https://docs.greatexpectations.io/en/latest/guides/profiling.html?utm_source=cli&utm_medium=init&utm_campaign={1:s}</blue>.
+            """.format(data_source_name, __version__.replace(".", "_"))
         )
         if click.confirm("Proceed?",
                          default=True
@@ -157,8 +158,8 @@ def add_datasource(context):
 
 Before committing, please make sure that this data does not contain sensitive information!
 
-To learn more: <blue>https://great-expectations.readthedocs.io/en/latest/guides/data_documentation?src=cli</blue>
-"""
+To learn more: <blue>https://docs.greatexpectations.io/en/latest/guides/data_documentation.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue>
+""".format(__version__.replace(".", "_"))
             )
             if click.confirm("Move the profiled data?",
                              default=True

diff --git a/great_expectations/cli/init.py b/great_expectations/cli/init.py
@@ -5,7 +5,7 @@
 import shutil
 
 from great_expectations.data_context.util import safe_mmkdir
-from great_expectations import __version__
+from great_expectations import __version__ as __version__
 
 
 def script_relative_path(file_path):
@@ -52,8 +52,8 @@ def scaffold_directories_and_notebooks(base_dir):
 
 If you're new to Great Expectations, this tutorial is a good place to start:
 
-    <blue>https://great-expectations.readthedocs.io/en/latest/getting_started.html?src=cli</blue>
-"""
+    <blue>https://docs.greatexpectations.io/en/latest/getting_started.html?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue>
+""".format(__version__.replace(".", "_"))
 
 msg_prompt_lets_begin = """
 Let's add Great Expectations to your project, by scaffolding a new great_expectations directory:

diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py
@@ -1482,7 +1482,7 @@ def profile_datasource(self,
         Returns:
             List of (expectation_suite, EVR) tuples for each of the data_assets found in the datasource
         """
-        logger.info("\nProfiling '%s' with '%s'" % (datasource_name, profiler.__name__))
+        logger.info("Profiling '%s' with '%s'" % (datasource_name, profiler.__name__))
         profiling_results = []
         data_asset_names = self.get_available_data_asset_names(datasource_name)
         if generator_name is None:

diff --git a/great_expectations/render/base.py b/great_expectations/render/base.py
diff --git a/great_expectations/render/renderer/content_block/bullet_list_content_block.py b/great_expectations/render/renderer/content_block/bullet_list_content_block.py
@@ -1,7 +1,7 @@
 import copy
-import datetime
 
 from .content_block import ContentBlockRenderer
+from ...util import ordinal
 
 
 def substitute_none_for_missing(kwargs, kwarg_list):
@@ -76,14 +76,13 @@ def expect_column_to_exist(cls, expectation, styling=None, include_column_name=T
             ["column", "column_index"],
         )
 
-        if params["column_index"] == None:
+        if params["column_index"] is None:
             if include_column_name:
                 template_str = "$column is a required field."
             else:
                 template_str = "is a required field."
         else:
-            #!!! FIXME: this works for 4th, 5th, 6th, etc, but is dumb about 1th, 2th, and 3th.
-            params["column_indexth"] = str(params["column_index"])+"th"
+            params["column_indexth"] = ordinal(params["column_index"])
             if include_column_name:
                 template_str = "$column must be the $column_indexth field"
             else:
@@ -282,10 +281,9 @@ def expect_column_pair_values_to_be_equal(cls, expectation, styling=None, includ
         # NOTE: This renderer doesn't do anything with "ignore_row_if"
 
         if (params["column_A"] is None) or (params["column_B"] is None):
-            # FIXME: this string is wrong
-            template_str = " has a bogus $expectation_name expectation."
+            template_str = " unrecognized kwargs for expect_column_pair_values_to_be_equal: missing column."
 
-        if params["mostly"] == None:
+        if params["mostly"] is None:
             template_str = "Values in $column_A and $column_B must always be equal."
 
         else:
@@ -307,16 +305,17 @@ def expect_table_columns_to_match_ordered_list(cls, expectation, styling=None, i
             ["column_list"]
         )
 
-        if params["column_list"] == None:
-            # NOTE: I don't know how I feel about these underspecified expectation messages.
+        if params["column_list"] is None:
             template_str = "This table should have a list of columns in a specific order, but that order is not specified."
 
         else:
-            # FIXME: This is slightly wrong, since the whole string (including commas) will get syntax highlighting.
-            # It would be better to have each element highlighted separately.
-            # See `expect_column_distinct_values_to_be_in_set`
-            params["column_list_str"] = ", ".join(params["column_list"])
-            template_str = "This table should have these columns in this order: $column_list_str"
+            template_str = "This table should have these columns in this order: "
+            for idx in range(len(params["column_list"]) - 1):
+                template_str += "$column_list_" + str(idx) + ", "
+                params["column_list_" + str(idx)] = params["column_list"][idx]
+
+            template_str += "$column_list_" + str(idx+1)
+            params["column_list_" + str(idx+1)] = params["column_list"][idx+1]
 
         return [{
             "template": template_str,
@@ -331,10 +330,13 @@ def expect_multicolumn_values_to_be_unique(cls, expectation, styling=None, inclu
             ["column_list", "ignore_row_if"]
         )
 
-        # FIXME: This is slightly wrong, since the whole string (including commas) will get syntax highlighting.
-        # It would be better to have each element highlighted separately, but I need to research methods to do this elegantly.
-        params["column_list_str"] = ", ".join(params["column_list"])
-        template_str = "Values must always be unique across columns: $column_list_str"
+        template_str = "Values must always be unique across columns: "
+        for idx in range(len(params["column_list"]) - 1):
+            template_str += "$column_list_" + str(idx) + ", "
+            params["column_list_" + str(idx)] = params["column_list"][idx]
+
+        template_str += "$column_list_" + str(idx + 1)
+        params["column_list_" + str(idx + 1)] = params["column_list"][idx + 1]
 
         return [{
             "template": template_str,
@@ -381,13 +383,12 @@ def expect_table_row_count_to_equal(cls, expectation, styling=None, include_colu
 
     @classmethod
     def expect_column_distinct_values_to_be_in_set(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", "value_set"],
         )
 
-        if params["value_set"] == None:
+        if params["value_set"] is None:
 
             if include_column_name:
                 template_str = "$column values must belong to a set, but that set is not specified."
@@ -415,7 +416,6 @@ def expect_column_distinct_values_to_be_in_set(cls, expectation, styling=None, i
 
     @classmethod
     def expect_column_values_to_not_match_regex(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", "regex", "mostly"],
@@ -434,7 +434,6 @@ def expect_column_values_to_not_match_regex(cls, expectation, styling=None, incl
 
     @classmethod
     def expect_column_values_to_not_be_null(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", "mostly"],
@@ -453,17 +452,23 @@ def expect_column_values_to_not_be_null(cls, expectation, styling=None, include_
 
     @classmethod
     def expect_column_proportion_of_unique_values_to_be_between(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", "min_value", "max_value"],
         )
 
-        if include_column_name:
-            template_str = "$column must have between $min_value and $max_value% unique values."
+        if params["min_value"] is None and params["max_value"] is None:
+            template_str = "may have any percentage of unique values."
+        elif params["min_value"] is None:
+            template_str = "must have no more than $max_value% unique values."
+        elif params["max_value"] is None:
+            template_str = "must have at least $min_value% unique values."
         else:
             template_str = "must have between $min_value and $max_value% unique values."
 
+        if include_column_name:
+            template_str = "$column " + template_str
+
         return [{
             "template": template_str,
             "params": params,
@@ -472,7 +477,6 @@ def expect_column_proportion_of_unique_values_to_be_between(cls, expectation, st
 
     @classmethod
     def expect_column_values_to_be_unique(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", ],
@@ -491,7 +495,6 @@ def expect_column_values_to_be_unique(cls, expectation, styling=None, include_co
 
     @classmethod
     def expect_column_values_to_be_in_type_list(cls, expectation, styling=None, include_column_name=True):
-        # TODO: thoroughly review this method. It was implemented quickly and hackily.
         params = substitute_none_for_missing(
             expectation["kwargs"],
             ["column", "type_list", "mostly"],

diff --git a/great_expectations/render/renderer/fancy_column_section_renderer.py b/great_expectations/render/renderer/fancy_column_section_renderer.py
diff --git a/great_expectations/render/renderer/page_renderer.py b/great_expectations/render/renderer/page_renderer.py
@@ -14,6 +14,7 @@ class PrescriptivePageRenderer(Renderer):
     def render(cls, expectations):
         # Group expectations by column
         columns = {}
+        ordered_columns = None
         for expectation in expectations["expectations"]:
             if "column" in expectation["kwargs"]:
                 column = expectation["kwargs"]["column"]
@@ -23,8 +24,15 @@ def render(cls, expectations):
                 columns[column] = []
             columns[column].append(expectation)
 
-        # TODO: in general, there should be a mechanism for imposing order here.
-        ordered_columns = list(columns.keys())
+            # if possible, get the order of columns from expect_table_columns_to_match_ordered_list
+            if expectation["expectation_type"] == "expect_table_columns_to_match_ordered_list":
+                exp_column_list = expectation["kwargs"]["column_list"]
+                if exp_column_list and len(exp_column_list) > 0:
+                    ordered_columns = exp_column_list
+
+        # if no order of colums is expected, sort alphabetically
+        if not ordered_columns:
+            ordered_columns = sorted(list(columns.keys()))
 
         return {
             "renderer_type": "PrescriptivePageRenderer",
@@ -50,15 +58,9 @@ def render(cls, validation_results):
                 columns[column] = []
             columns[column].append(evr)
 
-        # TODO: in general, there should be a mechanism for imposing order here.
-        ordered_columns = list(columns.keys())
-
+        ordered_columns = Renderer._get_column_list_from_evrs(validation_results)
         column_types = DescriptiveOverviewSectionRenderer._get_column_types(validation_results)
 
-        # FIXME: This is a hack to limit output on one training file
-        # if "Reporting Area" in ordered_columns:
-        #     ordered_columns = ["Reporting Area"]
-
         if "data_asset_name" in validation_results["meta"] and validation_results["meta"]["data_asset_name"]:
             data_asset_name = validation_results["meta"]["data_asset_name"].split(
                 '/')[-1]

diff --git a/great_expectations/render/renderer/renderer.py b/great_expectations/render/renderer/renderer.py
@@ -68,11 +68,26 @@ def _find_all_evrs_by_type(cls, evrs, type_, column_=None):
 
     @classmethod
     def _get_column_list_from_evrs(cls, evrs):
-        # Group EVRs by column
-        columns = list(set([evr["expectation_config"]["kwargs"]["column"] for evr in evrs["results"] if "column" in evr["expectation_config"]["kwargs"]]))
+        """
+        Get list of column names.
 
-        # TODO: in general, there should be a mechanism for imposing order here.
-        ordered_columns = columns
+        If expect_table_columns_to_match_ordered_list EVR is present, use it as the list, including the order.
+
+        Otherwise, get the list of all columns mentioned in the expectations and order it alphabetically.
+
+        :param evrs:
+        :return: list of columns with best effort sorting
+        """
+        evrs_ = evrs["results"] if "results" in evrs else evrs
+
+        expect_table_columns_to_match_ordered_list_evr = cls._find_evr_by_type(evrs_, "expect_table_columns_to_match_ordered_list")
+        if expect_table_columns_to_match_ordered_list_evr:
+            ordered_columns = expect_table_columns_to_match_ordered_list_evr["result"]["observed_value"]
+        else:
+            # Group EVRs by column
+            columns = list(set([evr["expectation_config"]["kwargs"]["column"] for evr in evrs_ if "column" in evr["expectation_config"]["kwargs"]]))
+
+            ordered_columns = sorted(columns)
 
         return ordered_columns
 
diff --git a/great_expectations/render/util.py b/great_expectations/render/util.py
@@ -0,0 +1,15 @@
+"""Rendering utility"""
+
+SUFFIXES = {1: 'st', 2: 'nd', 3: 'rd'}
+
+
+def ordinal(num):
+    """Convert a number to ordinal"""
+    # Taken from https://codereview.stackexchange.com/questions/41298/producing-ordinal-numbers/41301
+    # Consider a library like num2word when internationalization comes
+    if 10 <= num % 100 <= 20:
+        suffix = 'th'
+    else:
+        # the second parameter is a default.
+        suffix = SUFFIXES.get(num % 10, 'th')
+    return str(num) + suffix