This notebook is for the Expectation Suite creation

In [1]:
# Imports

import great_expectations as gx
from great_expectations.data_context import FileDataContext
from great_expectations.core.expectation_configuration import ExpectationConfiguration

In [2]:
# Initiate a Filesystem Data Context
# Note: Replace /Users/fernandoembrioni/Documents/Fer/repos/ for the path previous to this repository

path_to_empty_folder = "/Users/fernandoembrioni/Documents/Fer/repos/fer-gx/datacontext"
context = FileDataContext.create(project_root_dir=path_to_empty_folder)

In [5]:
# Creation of an Expectation Suite

suite = context.add_expectation_suite(expectation_suite_name="taxi_expectation_suite")

In [6]:
# Create Expectation Configurations

# Create an Expectation
expectation_configuration_1 = ExpectationConfiguration(
    # Name of expectation type being added
    expectation_type="expect_table_columns_to_match_ordered_list",
    # These are the arguments of the expectation
    # The keys allowed in the dictionary are Parameters and
    # Keyword Arguments of this Expectation Type
    kwargs={
        "column_list": [
            "vendor_id",
            "pickup_datetime",
            "dropoff_datetime",
            "passenger_count",
            "trip_distance",
            "rate_code_id",
            "store_and_fwd_flag",
            "pickup_location_id",
            "dropoff_location_id",
            "payment_type",
            "fare_amount",
            "extra",
            "mta_tax",
            "tip_amount",
            "tolls_amount",
            "improvement_surcharge",
            "total_amount",
            "congestion_surcharge",
        ]
    },
    # This is how you can optionally add a comment about this expectation.
    # It will be rendered in Data Docs.
    # See this guide for details:
    # `How to add comments to Expectations and display them in Data Docs`.
    meta={
        "notes": {
            "format": "markdown",
            "content": "Check all the expected columns are ordered",
        }
    },
)
# Add the Expectation to the suite
suite.add_expectation(expectation_configuration=expectation_configuration_1)

{"expectation_type": "expect_table_columns_to_match_ordered_list", "kwargs": {"column_list": ["vendor_id", "pickup_datetime", "dropoff_datetime", "passenger_count", "trip_distance", "rate_code_id", "store_and_fwd_flag", "pickup_location_id", "dropoff_location_id", "payment_type", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount", "congestion_surcharge"]}, "meta": {"notes": {"format": "markdown", "content": "Check all the expected columns are ordered"}}}

In [7]:
# Create another expectation

expectation_configuration_2 = ExpectationConfiguration(
    expectation_type="expect_column_values_to_be_in_set",
    kwargs={
        "column": "rate_code_id",
        "value_set": [1, ],
    },
    # Note optional comments omitted
)
suite.add_expectation(expectation_configuration=expectation_configuration_2)

{"expectation_type": "expect_column_values_to_be_in_set", "kwargs": {"column": "rate_code_id", "value_set": [1]}, "meta": {}}

In [8]:
# Create another expectation

expectation_configuration_3 = ExpectationConfiguration(
    expectation_type="expect_column_values_to_not_be_null",
    kwargs={
        "column": "vendor_id",
        "mostly": 0.95,
    },
    meta={
        "notes": {
            "format": "markdown",
            "content": "Check that `vendor_id` column is not null",
        }
    },
)
suite.add_expectation(expectation_configuration=expectation_configuration_3)

{"expectation_type": "expect_column_values_to_not_be_null", "kwargs": {"column": "vendor_id", "mostly": 0.95}, "meta": {"notes": {"format": "markdown", "content": "Check that `vendor_id` column is not null"}}}

In [9]:
# Save expectations for future use

context.save_expectation_suite(expectation_suite=suite)

'/Users/fernandoembrioni/Documents/Fer/repos/fer-gx/datacontext/great_expectations/expectations/taxi_expectation_suite.json'