In [None]:
"""Example: How to create an Expectation Suite with the Missingness Data Assistant

--documentation--
    https://docs.greatexpectations.io/docs/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant
"""
import great_expectations as gx

context = gx.get_context()

In [None]:
datasource = context.sources.add_pandas_filesystem(
    name="my_custom_datasource_name",  # custom name to assign to new datasource, can be used to retrieve datasource later
    base_directory="../../tests/test_sets/taxi_yellow_tripdata_samples/",  # replace with your data directory
)

In [None]:
asset = datasource.add_csv_asset(
    "my_custom_asset_name",  # custom name to assign to the asset, can be used to retrieve asset later
    batching_regex=r"yellow_tripdata_sample_(?P<year>\d{4})-(?P<month>\d{2})\.csv",
)

In [None]:
batch_request = asset.build_batch_request()

In [None]:
# Run the Missingness Assistant

exclude_column_names = [
    "VendorID",
    "store_and_fwd_flag",
    "pickup_datetime",
    "dropoff_datetime",
    "RatecodeID",
    "PULocationID",
    "DOLocationID",
    "payment_type",
    "fare_amount",
    "extra",
    "mta_tax",
    "tip_amount",
    "tolls_amount",
    "improvement_surcharge",
]

data_assistant_result = context.assistants.missingness.run(
    batch_request=batch_request,
    exclude_column_names=exclude_column_names,
)

In [None]:
# View generated expectations

data_assistant_result.show_expectations_by_expectation_type()

In [None]:
# To view the metrics that were computed and used to generate expectations, you can use the `plot_metrics` method:

data_assistant_result.plot_metrics()

In [None]:
validator.expectation_suite = data_assistant_result.get_expectation_suite(
    expectation_suite_name="my_custom_expectation_suite_name"  # Your custom name here
)
validator.save_expectation_suite(discard_failed_expectations=False)

In [None]:
# Use a Checkpoint to verify that your new Expectation Suite works.
checkpoint = context.add_or_update_checkpoint(
    name="yellow_tripdata_sample_all_years_checkpoint",
    batch_request=batch_request,
    expectation_suite_name="my_custom_expectation_suite_name",
)
checkpoint_result = checkpoint.run(run_name="my_run_name")

assert checkpoint_result["success"] is True

In [None]:
context.build_data_docs()

In [None]:
context.open_data_docs()