Merge e293a51 into 90ff14b

great-expectations · Jun 28, 2019 · a04e50d · a04e50d
2 parents 90ff14b + e293a51
commit a04e50d
Show file tree

Hide file tree

Showing 165 changed files with 16,807 additions and 1,432 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,3 +1,4 @@
+# dist: xenial
 language: python
 os:
   - linux
@@ -27,15 +28,19 @@ matrix:
   # - dist: xenial
   #   python: 3.7
   #   env: PANDAS=latest
+addons:
+  postgresql: "9.4"
 services:
   - postgresql
+  - mysql
 install:
-  # - ./travis-java.sh
+#   - ./travis-java.sh
   - pip install --only-binary=numpy,scipy numpy scipy
   - if [ "$PANDAS" = "latest" ]; then pip install pandas; else pip install pandas==$PANDAS; fi
   - pip install -r requirements-dev.txt
 before_script:
   - psql -c 'create database test_ci;' -U postgres
+  - mysql -u root --password="" -e 'create database test_ci;'
 script:
   - pytest --cov=great_expectations tests/
 after_success:

diff --git a/README.md b/README.md
@@ -46,9 +46,15 @@ To get more done with data, faster. Teams use great_expectations to
 How do I get started?
 --------------------------------------------------------------------------------
 
-It's easy! Just use pip install:
+It's easy! 
+    First use pip install:
 
-    $ pip install great_expectations
+        $ pip install great_expectations
+
+    Then run this command in the root directory of the project you want to try Great Expectations on:
+
+        $ great_expectations init
+
 
 You can also clone the repository, which includes examples of using great_expectations.
 

diff --git a/docs/source/autoinspection.rst b/docs/source/autoinspection.rst
diff --git a/docs/source/data_context_module.rst b/docs/source/data_context_module.rst
@@ -25,27 +25,27 @@ great_expectations.data_context.base
 great_expectations.data_context.PandasCSVDataContext
 ----------------------------------------------------
 
-.. automodule:: great_expectations.data_context.pandas_context
+.. automodule:: great_expectations.data_context.pandas_source
     :members:
     :undoc-members:
     :show-inheritance:
     :exclude-members: PandasCSVDataContext
 
-    .. autoclass:: great_expectations.data_context.pandas_context.PandasCSVDataContext
+    .. autoclass:: great_expectations.data_context.pandas_source.PandasCSVDataSource
         :members:
         :undoc-members:
         :show-inheritance:
 
 great_expectations.data_context.SqlAlchemyDataContext
 -----------------------------------------------------
 
-.. automodule:: great_expectations.data_context.sqlalchemy_context
+.. automodule:: great_expectations.data_context.sqlalchemy_source
     :members:
     :undoc-members:
     :show-inheritance:
     :exclude-members: SqlAlchemyDataContext
 
-    .. autoclass:: great_expectations.data_context.sqlalchemy_context.SqlAlchemyDataContext
+    .. autoclass:: great_expectations.data_context.sqlalchemy_context.SqlAlchemyDataSource
         :members:
         :undoc-members:
         :show-inheritance:

diff --git a/docs/source/data_contexts.rst b/docs/source/data_contexts.rst
@@ -1,5 +1,8 @@
 .. _data_contexts:
 
+
+WARNING: this has not yet been updated for the GE 0.7 API
+
 ================================================================================
 Data Contexts
 ================================================================================

diff --git a/docs/source/dataset_module.rst b/docs/source/dataset_module.rst
@@ -79,11 +79,3 @@ great_expectations.dataset.util
     :undoc-members:
     :show-inheritance:
 
-
-great_expectations.dataset.autoinspect
---------------------------------------
-
-.. automodule:: great_expectations.dataset.autoinspect
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/evaluation_parameters.rst b/docs/source/evaluation_parameters.rst
@@ -24,7 +24,7 @@ value that should be used during the initial evaluation of the expectation.
     }
 
 You can also store parameter values in a special dictionary called evaluation_parameters that is stored in the \
-expectations_config to be available to multiple expectations or while declaring additional expectations.
+expectation_suite to be available to multiple expectations or while declaring additional expectations.
 
 .. code-block:: python
 
@@ -42,7 +42,7 @@ When validating expectations, you can provide evaluation parameters based on ups
 
 .. code-block:: python
 
-    >> my_df.validate(expectations_config=my_dag_step_config, evaluation_parameters={"upstream_row_count": upstream_row_count})
+    >> my_df.validate(expectation_suite=my_dag_step_config, evaluation_parameters={"upstream_row_count": upstream_row_count})
 
 Finally, the command-line tool also allows you to provide a JSON file that contains parameters to use during evaluation:
 
@@ -52,4 +52,4 @@ Finally, the command-line tool also allows you to provide a JSON file that conta
     {
         "upstream_row_count": 10
     }
-    >> great_expectations validate --evaluation_paramters=my_parameters_file.json dataset_file.csv expectations_config.json
+    >> great_expectations validate --evaluation_parameters=my_parameters_file.json dataset_file.csv expectation_suite.json
diff --git a/docs/source/expectations.rst b/docs/source/expectations.rst
@@ -125,13 +125,13 @@ You can also add notes or even structured metadata to expectations to describe t
 Saving Expectations
 ------------------------------------------------------------------------------
 
-At the end of your exploration, call `save_expectations` to store all Expectations from your session to your pipeline test files.
+At the end of your exploration, call `save_expectation_suite` to store all Expectations from your session to your pipeline test files.
 
 This is how you always know what to expect from your data.
 
 .. code-block:: bash
 
-    >> my_df.save_expectations_config("my_titanic_expectations.json")
+    >> my_df.save_expectation_suite("my_titanic_expectations.json")
 
 For more detail on how to control expectation output, please see :ref:`standard_arguments` and :ref:`result_format`.
 

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
@@ -58,10 +58,12 @@ Datetime and JSON parsing
 Aggregate functions
 --------------------------------------------------------------------------------
 
+* :func:`expect_column_distinct_values_to_be_in_set <great_expectations.dataset.dataset.Dataset.expect_column_distinct_values_to_be_in_set>`
 * :func:`expect_column_distinct_values_to_contain_set <great_expectations.dataset.dataset.Dataset.expect_column_distinct_values_to_contain_set>`
 * :func:`expect_column_distinct_values_to_equal_set <great_expectations.dataset.dataset.Dataset.expect_column_distinct_values_to_equal_set>`
 * :func:`expect_column_mean_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_mean_to_be_between>`
 * :func:`expect_column_median_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_median_to_be_between>`
+* :func:`expect_column_quantile_values_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_quantile_values_to_be_between>`
 * :func:`expect_column_stdev_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_stdev_to_be_between>`
 * :func:`expect_column_unique_value_count_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_unique_value_count_to_be_between>`
 * :func:`expect_column_proportion_of_unique_values_to_be_between <great_expectations.dataset.dataset.Dataset.expect_column_proportion_of_unique_values_to_be_between>`

diff --git a/docs/source/implemented_expectations.rst b/docs/source/implemented_expectations.rst
@@ -25,21 +25,21 @@ out the missing implementations!
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_values_to_be_null`                                            | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
-|`expect_column_values_to_be_of_type`                                         | True     | False    | False    |
+|`expect_column_values_to_be_of_type`                                         | True     | True     | False    |
 +-----------------------------------------------------------------------------+----------+----------+----------+
-|`expect_column_values_to_be_in_type_list`                                    | True     | False    | False    |
+|`expect_column_values_to_be_in_type_list`                                    | True     | True     | False    |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_values_to_be_in_set`                                          | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_values_to_not_be_in_set`                                      | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
-|`expect_column_values_to_be_between`                                         | True     | True     | False    |
+|`expect_column_values_to_be_between`                                         | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_values_to_be_increasing`                                      | True     | False    | False    |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_values_to_be_decreasing`                                      | True     | False    | False    |
 +-----------------------------------------------------------------------------+----------+----------+----------+
-|`expect_column_value_lengths_to_be_between`                                  | True     | True     | False    |
+|`expect_column_value_lengths_to_be_between`                                  | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_value_lengths_to_equal`                                       | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
@@ -67,7 +67,7 @@ out the missing implementations!
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_mean_to_be_between`                                           | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
-|`expect_column_median_to_be_between`                                         | True     | True     | False    |
+|`expect_column_median_to_be_between`                                         | True     | True     | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+
 |`expect_column_stdev_to_be_between`                                          | True     | False    | True     |
 +-----------------------------------------------------------------------------+----------+----------+----------+

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -25,12 +25,13 @@ Advanced
 
    standard_arguments
    result_format
-   autoinspection
+   profiling
    evaluation_parameters
    custom_expectations
    conventions
    roadmap_changelog
    implemented_expectations
+   migrating_versions
 
 Module Docs
 -------------
@@ -40,6 +41,7 @@ Module Docs
    data_asset_module
    dataset_module
    data_context_module
+   profile_module
 
 Indices and tables
 ------------------

diff --git a/docs/source/migrating.rst b/docs/source/migrating.rst
@@ -0,0 +1,9 @@
+.. _migrating_versions:
+
+
+Migrating between Versions
+===========================
+
+
+
+TODO: Describe datacontext -> datasource migration
diff --git a/docs/source/profile.rst b/docs/source/profile.rst
@@ -0,0 +1,12 @@
+.. _profile_module:
+
+Profile Module
+==================================
+
+great_expectations.profile
+--------------------------------------
+
+.. automodule:: great_expectations.profile
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/profiling.rst b/docs/source/profiling.rst
@@ -0,0 +1,27 @@
+.. _profiling:
+
+================================================================================
+Profiling
+================================================================================
+
+It can be very convenient to have great expectations automatically review a \
+dataset and suggest expectations that may be appropriate. Currently, there's \
+a very basic, but easily extensible, profiling capability available.
+
+Dataset objects have a `profile` method which allows you to provide a \
+profiler class that will evaluate a dataset object and add expectations to it.
+
+.. code-block:: python
+
+    >> import great_expectations as ge
+    >> df = ge.dataset.PandasDataset({"col": [1, 2, 3, 4, 5]})
+    >> df.profile(ge.profile.ColumnsExistProfiler)
+    >> df.get_expectation_suite()
+        {'data_asset_name': None,
+         'expectation_suite_name': None,
+         'meta': {'great_expectations.__version__': '0.7.0'},
+         'expectations': [
+             {'expectation_type': 'expect_column_to_exist',
+              'kwargs': {'column': 'col'}
+             }]
+        }
diff --git a/docs/source/roadmap_changelog.rst b/docs/source/roadmap_changelog.rst
@@ -9,10 +9,16 @@ Planned Features
 * Improved variable typing
 * Support for non-tabular datasources (e.g. JSON, XML, AVRO)
 
-v.0.6.1__develop
+v.0.7.0__develop
 ________________
-
-
+* Name clarifications: we've stopped using the overloaded terms "expectations
+  config" and "config" and instead use "expectation suite" to refer to a
+  collection (or suite!) of expectations that can be used for validating a
+  data asset.
+* Major refactor of autoinspect. Autoinspect is now built around a module
+  called "profile" which provides a class-based structure for building
+  expectation suites. There is no longer a default  "autoinspect_func" --
+  calling autoinspect requires explicitly passing the desired profiler.
 
 v.0.6.1
 ________________

diff --git a/docs/source/standard_arguments.rst b/docs/source/standard_arguments.rst
@@ -7,7 +7,7 @@ Standard arguments for expectations
 All Expectations return a json-serializable dictionary when evaluated, and share four standard (optional) arguments:
 
  - :ref:`result_format`: controls what information is returned from the evaluation of the expectation expectation.
- - :ref:`include_config`: If true, then the expectation config itself is returned as part of the result object.
+ - :ref:`include_config`: If true, then the expectation suite itself is returned as part of the result object.
  - :ref:`catch_exceptions`: If true, execution will not fail if the Expectation encounters an error. Instead, it will \
    return success = False and provide an informative error message.
  - :ref:`meta`: allows user-supplied meta-data to be stored with an expectation.
@@ -23,7 +23,7 @@ See :ref:`result_format` for more information.
 `include_config`
 ------------------------------------------------------------------------------
 
-All Expectations accept a boolean `include_config` parameter. If true, then the expectation config itself is returned as part of the result object
+All Expectations accept a boolean `include_config` parameter. If true, then the expectation suite itself is returned as part of the result object
 
 .. code-block:: bash
 

diff --git a/docs/source/validation.rst b/docs/source/validation.rst
@@ -10,10 +10,10 @@ Once you've constructed and stored Expectations, you can use them to validate ne
 
     >> import json
     >> import great_expectations as ge
-    >> my_expectations_config = json.load(file("my_titanic_expectations.json"))
+    >> my_expectation_suite = json.load(file("my_titanic_expectations.json"))
     >> my_df = ge.read_csv(
         "./tests/examples/titanic.csv",
-        expectations_config=my_expectations_config
+        expectation_suite=my_expectation_suite
     )
     >> my_df.validate()
 

diff --git a/examples/integrations/airflow/hooks/db_hook.py b/examples/integrations/airflow/hooks/db_hook.py
@@ -1,6 +1,11 @@
 import great_expectations as ge
 from airflow.hooks.mysql_hook import MySqlHook
 
+####
+#
+# NOTE: this code has not been updated for the new GE 0.7 naming conventions
+#
+####
 
 class ExpectationMySQLHook(MySqlHook):
 

diff --git a/examples/integrations/airflow/hooks/s3_csv_hook.py b/examples/integrations/airflow/hooks/s3_csv_hook.py
@@ -5,6 +5,11 @@
 from airflow.hooks.S3_hook import S3Hook
 import great_expectations as ge
 
+####
+#
+# NOTE: this code has not been updated for the new GE 0.7 naming conventions
+#
+####
 
 class ExpectationS3CsvHook(S3Hook):