In [8]:
import great_expectations as gx
from great_expectations.checkpoint.types.checkpoint_result import CheckpointResult
from great_expectations.checkpoint import Checkpoint
from great_expectations.core.batch import BatchRequest
from great_expectations.datasource.fluent.interfaces import DataAsset

In [9]:
context = gx.get_context(context_root_dir="./great_expectations/")

`batch_request` for retrieving `event_names` table

In [10]:
# validator
visits_asset: DataAsset = context.datasources["visits_datasource"].get_asset(
    "visits_asset"
)

In [11]:
batch_request: BatchRequest = visits_asset.build_batch_request()

`checkpoint` configuration 

In [12]:
checkpoint: Checkpoint = Checkpoint(
    name="my_checkpoint",
    run_name_template="%Y%m%d-%H%M%S-my-run-name-template",
    data_context=context,
    batch_request=batch_request,
    expectation_suite_name="visitors_exp",
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
    ],
)

In [13]:
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "visits_datasource",
    "data_asset_name": "visits_asset",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "visitors_exp",
  "module_name": "great_expectations.checkpoint",
  "name": "my_checkpoint",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template",
  "runtime_configuration": {},
  "validations": []
}

# Example 1: 
* `unexpected_index_column_names` not configured.

In [14]:
results: CheckpointResult = checkpoint.run()

Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

In [15]:
evrs = results.list_validation_results()

In [16]:
evrs[0]["results"][0]["result"]

{'element_count': 6,
 'unexpected_count': 3,
 'unexpected_percent': 50.0,
 'partial_unexpected_list': ['user_signup', 'purchase', 'download'],
 'missing_count': 0,
 'missing_percent': 0.0,
 'unexpected_percent_total': 50.0,
 'unexpected_percent_nonmissing': 50.0,
 'partial_unexpected_counts': [{'value': 'download', 'count': 1},
  {'value': 'purchase', 'count': 1},
  {'value': 'user_signup', 'count': 1}]}

In [17]:
context.open_data_docs()

# Example 2:
* Single `unexpected_index_column_names` configured.

In [18]:
result_format: dict = {
    "result_format": "COMPLETE",
    "unexpected_index_column_names": ["event_id"],
    # "return_unexpected_index_query" : False  # unexpected_index_query is returned by default. Uncommenting this line will suppress the output.
}

In [19]:
results = checkpoint.run(result_format=result_format)

Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

In [20]:
evrs = results.list_validation_results()

In [21]:
evrs[0]["results"][0]["result"]

{'element_count': 6,
 'unexpected_count': 3,
 'unexpected_percent': 50.0,
 'partial_unexpected_list': ['user_signup', 'purchase', 'download'],
 'unexpected_index_column_names': ['event_id'],
 'missing_count': 0,
 'missing_percent': 0.0,
 'unexpected_percent_total': 50.0,
 'unexpected_percent_nonmissing': 50.0,
 'partial_unexpected_index_list': [{'event_id': 3,
   'event_type': 'user_signup'},
  {'event_id': 4, 'event_type': 'purchase'},
  {'event_id': 5, 'event_type': 'download'}],
 'partial_unexpected_counts': [{'value': 'download', 'count': 1},
  {'value': 'purchase', 'count': 1},
  {'value': 'user_signup', 'count': 1}],
 'unexpected_list': ['user_signup', 'purchase', 'download'],
 'unexpected_index_list': [{'event_id': 3, 'event_type': 'user_signup'},
  {'event_id': 4, 'event_type': 'purchase'},
  {'event_id': 5, 'event_type': 'download'}],
 'unexpected_index_query': "SELECT event_id, event_type \nFROM event_names \nWHERE event_type IS NOT NULL AND (event_type NOT IN ('page_load', '

In [22]:
context.open_data_docs()

# Example 3:
* Multiple `unexpected_index_column_names` configured.

In [23]:
result_format: dict = {
    "result_format": "COMPLETE",
    "unexpected_index_column_names": ["event_id", "visit_id"],
    # "return_unexpected_index_query" : False  # unexpected_index_query is returned by default. Uncommenting this line will suppress the output.
}

In [24]:
results = checkpoint.run(result_format=result_format)

Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

In [25]:
evrs = results.list_validation_results()

In [26]:
evrs[0]["results"][0]["result"]

{'element_count': 6,
 'unexpected_count': 3,
 'unexpected_percent': 50.0,
 'partial_unexpected_list': ['user_signup', 'purchase', 'download'],
 'unexpected_index_column_names': ['event_id', 'visit_id'],
 'missing_count': 0,
 'missing_percent': 0.0,
 'unexpected_percent_total': 50.0,
 'unexpected_percent_nonmissing': 50.0,
 'partial_unexpected_index_list': [{'event_id': 3,
   'visit_id': 1470387700,
   'event_type': 'user_signup'},
  {'event_id': 4, 'visit_id': 1470438716, 'event_type': 'purchase'},
  {'event_id': 5, 'visit_id': 1470420524, 'event_type': 'download'}],
 'partial_unexpected_counts': [{'value': 'download', 'count': 1},
  {'value': 'purchase', 'count': 1},
  {'value': 'user_signup', 'count': 1}],
 'unexpected_list': ['user_signup', 'purchase', 'download'],
 'unexpected_index_list': [{'event_id': 3,
   'visit_id': 1470387700,
   'event_type': 'user_signup'},
  {'event_id': 4, 'visit_id': 1470438716, 'event_type': 'purchase'},
  {'event_id': 5, 'visit_id': 1470420524, 'event_

In [27]:
context.open_data_docs()