Merge pull request #507 from great-expectations/feature/new_renderer

Feature/new renderer
great-expectations · Jul 4, 2019 · da17f7b · da17f7b
2 parents 830217a + 79932ce
commit da17f7b
Show file tree

Hide file tree

Showing 69 changed files with 3,219 additions and 1,904 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 # Documentation build
 docs/source/_build
 
+# Render test output
+tests/render/output/*
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/docs/source/core_concepts.rst b/docs/source/core_concepts.rst
@@ -11,4 +11,4 @@ Core Concepts
    /core_concepts/data_context
    /core_concepts/datasource
    /core_concepts/custom_expectations
-   /core_concepts/glossary
+   /glossary
diff --git a/docs/source/get_in_touch.rst b/docs/source/get_in_touch.rst
@@ -9,22 +9,11 @@ Get in Touch
 What's the best way to get in touch with the Great Expectations team?
 ---------------------------------------------------------------------
 
+Join our slack channel, which you can join here: https://tinyurl.com/great-expectations-slack
+
 `Issues on
 GitHub <https://github.com/great-expectations/great_expectations/issues>`__.
 If you have questions, comments, feature requests, etc., `opening an
 issue <https://github.com/great-expectations/great_expectations/issues/new>`__
 is definitely the best path forward.
 
-We also have a slack channel, which you can join here: https://tinyurl.com/great-expectations-slack
-
-
-Great Expectations doesn't do X. Is it right for my use case?
--------------------------------------------------------------
-
-It depends. If you have needs that the library doesn't meet yet, please
-`upvote an existing
-issue(s) <https://github.com/great-expectations/great_expectations/issues>`__
-or `open a new
-issue <https://github.com/great-expectations/great_expectations/issues/new>`__
-and we'll see what we can do. Great Expectations is under active
-development, so your use case might be supported soon.
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
@@ -31,10 +31,12 @@ Then go to the root directory of the project you want to use Great Expectations
 
 :ref:`tutorial_create_expectations`
 
+:ref:`tutorial_pipeline_integration`
 
 .. toctree::
    :maxdepth: 2
    :hidden:
 
-   tutorial_init
-   tutorial_create_expectations
+   /tutorials/cli_init
+   /tutorials/create_expectations
+   /tutorials/pipeline_integration
diff --git a/docs/source/core_concepts/glossary.rst → docs/source/glossary.rst b/docs/source/core_concepts/glossary.rst → docs/source/glossary.rst
diff --git a/docs/source/roadmap_changelog.rst b/docs/source/roadmap_changelog.rst
@@ -57,6 +57,8 @@ to top-level names.
    ``expect_column_values_to_be_between`` ``None`` and ``None``. This makes it possible to progressively
    tighten expectations while using them as the basis for profiling results and documentation.
 
+  * Enabled caching on dataset objects by default.
+
  * Bugfixes and improvements:
 
    * New expectations:

diff --git a/docs/source/tutorial_init.rst b/docs/source/tutorial_init.rst
diff --git a/docs/source/tutorials/cli_init.rst b/docs/source/tutorials/cli_init.rst
@@ -0,0 +1,9 @@
+.. _tutorial_init:
+
+Tutorial - great_expectations init
+==================================
+
+.. toctree::
+   :maxdepth: 2
+
+More content coming soon...
diff --git a/docs/source/tutorial_create_expectations.rst → .../source/tutorials/create_expectations.rst b/docs/source/tutorial_create_expectations.rst → .../source/tutorials/create_expectations.rst
@@ -6,14 +6,53 @@ Tutorial - Create Expectations
 .. toctree::
    :maxdepth: 2
 
+Creating expectations is an opportunity to blend contextual knowledge from subject-matter experts and insights
+from profiling and performing exploratory analysis on your dataset.
 
-Get DataContext object
+Video
+------
+
+This brief video covers the basics of creating expectations
+
+
+Get DataContext Object
 -----------------------
-TODO: content!!!
 
-What expectations are available?
+The DataContext object manages
+
+
+Data Assets
+-------------
+
+
+
+Get Batch
+----------
+
+
+
+Reader Options
+---------------
+
+
+
+Create Expectations
 --------------------------------
 
+
+Now that you have one of the data batches loaded, you can call expect* methods on the dataframe in order to check
+if you can make an assumption about the data.
+
+For example, to check if you can expect values in column "order_date" to never be empty, call: `df.expect_column_values_to_not_be_null('order_date')`
+
+### How do I know which types of expectations I can add?
+* *Tab-complete* this statement, and add an expectation of your own; copy the cell to add more
+* In jupyter, you can also use *shift-tab* to see the docstring for each expectation, to see what parameters it takes and get more information about the expectation.
+* Here is a glossary of expectations you can add:
+https://great-expectations.readthedocs.io/en/latest/glossary.html
+
+
+
 Expectations include:
 
 - ``expect_table_row_count_to_equal``
@@ -23,7 +62,7 @@ Expectations include:
 - ...and many more
 
 Visit the `glossary of
-expectations <http://great-expectations.readthedocs.io/en/latest/glossary.html>`__
+expectations <https://docs.greatexpectations.io/en/latest/glossary.html>`__
 for a complete list of expectations that are currently part of the great
 expectations vocabulary.
 
@@ -32,7 +71,7 @@ How do I learn more?
 --------------------
 
 For full documentation, visit `Great Expectations on
-readthedocs.io <http://great-expectations.readthedocs.io/en/latest/>`__.
+readthedocs.io <https://docs.greatexpectations.io/en/latest/>`__.
 
 `Down with Pipeline
 Debt! <https://medium.com/@expectgreatdata/down-with-pipeline-debt-introducing-great-expectations-862ddc46782a>`__

diff --git a/docs/source/tutorials/pipeline_integration.rst b/docs/source/tutorials/pipeline_integration.rst
@@ -0,0 +1,99 @@
+.. _tutorial_pipeline_integration:
+
+Tutorial - Pipeline Integration
+=================================
+
+This tutorial covers integrating Great Expectations (GE) into an existing pipeline.
+
+To continue the example we used in the previous notebook,
+we created an expectation suite for the data asset ``notable_works_by_charles_dickens``. By doing this
+we defined what we expect a valid batch of this data to look like.
+
+Once our pipeline is deployed, it will process new batches of this data asset as they arrive as files.
+
+Just before calling the method that does the computation on a new batch, we call Great Expectations'
+validate method to make sure that the file meets your expectations about
+what a valid orders file should look like.
+If the file does not pass validation, we can decide what to do, e.g., stop the pipeline, since its output on invalid input cannot be guaranteed.
+
+To run validation we need 2 things:
+
+* A batch to validate - in our case it is a file loaded into a Pandas Dataframe (or Spark Dataframe, if your pipeline is built on Spark)
+* Expectations to validate against
+
+
+
+
+Video
+------
+
+This brief video covers the basics of integrating GE into a pipeline.
+
+Get a DataContext object
+------------------------
+
+A DataContext represents a Great Expectations project. It organizes storage and access for
+expectation suites, datasources, notification settings, and data fixtures.
+The DataContext is configured via a yml file stored in a directory called great_expectations;
+the configuration file as well as managed expectation suites should be stored in version control.
+
+Obtaining a DataContext object gets us access to these resources after the object reads its
+configuration file.
+
+::
+
+    context = ge.data_context.DataContext()
+
+To read more about DataContext.... TODO: insert link
+
+Run Id
+-------
+
+A run id links together validations across batches.
+
+
+Choose Data Asset and Expectation Suite
+-----------------------------------------
+
+We called our data asset "notable_works_by_charles_dickens" and created an expectation suite called "node_1"
+
+
+Obtain a Batch to Validate
+-----------------------------
+
+We read the new batch of data from a file that our pipeline is about to process and
+convert the resulting Pandas DataFrame into a Great Expectations batch that can be validated.
+
+::
+
+    df = pd.read_csv(file_path_to_validate)
+    batch = context.get_batch(data_asset_name, expectation_suite_name, df)
+
+Validate
+---------
+
+::
+
+    validation_result = batch.validate(run_id=run_id)
+
+    if validation_result["success"]:
+        print("This file meets all expectations from a valid batch of {0:s}".format(data_asset_name))
+    else:
+        print("This file is not a valid batch of {0:s}".format(data_asset_name))
+
+
+Review Validation Results
+----------------------------
+
+
+Send Notifications
+-------------------
+
+
+Save Validation Results
+-------------------------
+
+
+Save Failed Batches
+---------------------
+
diff --git a/great_expectations/cli/cli.py b/great_expectations/cli/cli.py
@@ -1,5 +1,35 @@
 # -*- coding: utf-8 -*-
-
+"""
+# Style guide for the Great Expectations CLI.
+
+### The CLI never writes to disk without asking first.
+### Questions are always phrased as conversational sentences.
+### Sections are divided by headers: "========== Profiling =========="
+### We use punctuation: Please finish sentences with periods, questions marks, or an occasional exclamation point.
+### Keep indentation consistent! (We're pythonistas, natch.)
+### Include exactly one blank line after every question.
+### Within those contraints, shorter is better. When in doubt, shorten.
+### Clickable links (usually to documentation) are blue.
+### Copyable bash commands are green.
+
+"""
+from .datasource import (
+    add_datasource
+)
+from .init import (
+    scaffold_directories_and_notebooks,
+    greeting_1,
+    msg_prompt_lets_begin,
+)
+from .util import cli_message
+from great_expectations.render.view import DefaultJinjaPageView
+from great_expectations.render.renderer import DescriptivePageRenderer, PrescriptivePageRenderer
+from great_expectations.data_context import DataContext
+from great_expectations.data_asset import FileDataAsset
+from great_expectations.dataset import Dataset, PandasDataset
+from great_expectations.exceptions import DataContextError
+from great_expectations import __version__, read_csv
+from pyfiglet import figlet_format
 import click
 import six
 import os
@@ -9,35 +39,16 @@
 import warnings
 warnings.filterwarnings('ignore')
 
-from pyfiglet import figlet_format
 try:
     from termcolor import colored
 except ImportError:
     colored = None
 
-from great_expectations import __version__, read_csv
-from great_expectations.exceptions import DataContextError
-from great_expectations.dataset import Dataset, PandasDataset
-from great_expectations.data_asset import FileDataAsset
-from great_expectations.data_context import DataContext
-
-from great_expectations.render.renderer import DescriptivePageRenderer, PrescriptivePageRenderer
-from great_expectations.render.view import DescriptivePageView
-
-
-from .util import cli_message
-from .init import (
-    scaffold_directories_and_notebooks,
-    greeting_1,
-    msg_prompt_lets_begin,
-)
-from .datasource import (
-    add_datasource
-)
 
 # Take over the entire GE module logging namespace when running CLI
 logger = logging.getLogger("great_expectations")
 
+
 @click.group()
 @click.version_option(version=__version__)
 def cli():
@@ -152,7 +163,7 @@ def init(target_directory):
         logger.critical(err.message)
         sys.exit(-1)
 
-    base_dir = os.path.join(target_directory, "great_expectations")
+    base_dir = context.root_directory
 
     six.print_(colored(
         figlet_format("Great Expectations", font="big"),
@@ -187,7 +198,7 @@ def render(render_object):
 
     model = DescriptivePageRenderer.render(raw)
     # model = PrescriptivePageRenderer.render(raw)
-    print(DescriptivePageView.render(model))
+    print(DefaultJinjaPageView.render(model))
 
 
 @cli.command()
@@ -219,7 +230,7 @@ def main():
     handler = logging.StreamHandler()
     # Just levelname and message Could re-add other info if we want
     formatter = logging.Formatter(
-        ' %(message)s')
+        '%(message)s')
     # '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
     handler.setFormatter(formatter)
     logger.addHandler(handler)