# Testing data validation with great_expectations

In [1]:
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext
from great_expectations.core.expectation_configuration import ExpectationConfiguration
import datetime
import random

## Defining DataContext in code
[Documentation](https://legacy.docs.greatexpectations.io/en/latest/guides/how_to_guides/configuring_data_contexts/how_to_instantiate_a_data_context_without_a_yml_file.html)

In [5]:
project_config = DataContextConfig(
    datasources={
        "local_datasource": {
            "data_asset_type": {
                "class_name": "PandasDataset",
                "module_name": "great_expectations.dataset",
            },
            "class_name": "PandasDatasource",
            "module_name": "great_expectations.datasource",
            "batch_kwargs_generators": {},
        }
    },
    store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/tmp/shared_data/great_expectations"),
    validation_operators={
         "action_list_operator": {
             "class_name": "ActionListValidationOperator",
             "action_list": [
                 {
                     "name": "store_validation_result",
                     "action": {"class_name": "StoreValidationResultAction"},
                 },
                 {
                     "name": "store_evaluation_params",
                     "action": {"class_name": "StoreEvaluationParametersAction"},
                 },
                 {
                     "name": "update_data_docs",
                     "action": {"class_name": "UpdateDataDocsAction"},
                 },
             ],
             "result_format": {'result_format': 'COMPLETE'},
         }
     }
)

context = BaseDataContext(project_config=project_config)

## Create expectation suite

In [6]:
suite = context.create_expectation_suite(
    "validate_data", overwrite_existing=True
)

In [7]:
context.list_expectation_suite_names()

['validate_data']

## Create Expectation Configurations

In [8]:
expectation_configuration = ExpectationConfiguration(
    expectation_type="expect_column_distinct_values_to_be_in_set",
    kwargs={
        "column": "doornumber",
        "value_set": ["two", "four"],
        "result_format": {"result_format": "COMPLETE"}
    }
)

suite.add_expectation(expectation_configuration=expectation_configuration)

{"meta": {}, "kwargs": {"column": "doornumber", "value_set": ["two", "four"], "result_format": {"result_format": "COMPLETE"}}, "expectation_context": {"description": null}, "ge_cloud_id": null, "expectation_type": "expect_column_distinct_values_to_be_in_set"}

In [9]:
expectation_configuration = ExpectationConfiguration(
    expectation_type="expect_table_column_count_to_equal",
    kwargs={
        "value": 26,
        "result_format": {"result_format": "COMPLETE"}
    }
)

suite.add_expectation(expectation_configuration=expectation_configuration)

{"meta": {}, "kwargs": {"value": 26, "result_format": {"result_format": "COMPLETE"}}, "expectation_context": {"description": null}, "ge_cloud_id": null, "expectation_type": "expect_table_column_count_to_equal"}

## Save expectation suite

In [10]:
context.save_expectation_suite(suite, "validate_data")

'/tmp/shared_data/great_expectations/expectations/validate_data.json'

## List expectations from suite

In [11]:
context.get_expectation_suite("validate_data")

{
  "meta": {
    "great_expectations_version": "0.13.44"
  },
  "expectations": [
    {
      "meta": {},
      "kwargs": {
        "column": "doornumber",
        "result_format": {
          "result_format": "COMPLETE"
        },
        "value_set": [
          "two",
          "four"
        ]
      },
      "expectation_context": {
        "description": null
      },
      "ge_cloud_id": null,
      "expectation_type": "expect_column_distinct_values_to_be_in_set"
    },
    {
      "meta": {},
      "kwargs": {
        "result_format": {
          "result_format": "COMPLETE"
        },
        "value": 26
      },
      "expectation_context": {
        "description": null
      },
      "ge_cloud_id": null,
      "expectation_type": "expect_table_column_count_to_equal"
    }
  ],
  "data_asset_type": null,
  "expectation_suite_name": "validate_data",
  "ge_cloud_id": null
}

## Create batch
Load a Pandas DataFrame as a Batch

In [12]:
batch_kwargs = {
    "datasource": "local_datasource",
    "path": "https://ucfdataopstest.blob.core.windows.net/rawdata/CarPrice.csv?sp=r&st=2021-12-02T15:48:31Z&se=2021-12-02T23:48:31Z&spr=https&sv=2020-08-04&sr=b&sig=0aM70jzXPrQTI36Y3AjCVw3w3O2Q6eAfUrvsvbwaPhM%3D",
    "reader_method": "read_csv"
}
batch = context.get_batch(batch_kwargs, "validate_data")

## Validate data and print results

In [13]:
run_id = {
    "run_name": "test_run_" + str(random.randint(10,99)),
    "run_time": datetime.datetime.now(datetime.timezone.utc)
}

results = context.run_validation_operator(
    "action_list_operator",
    assets_to_validate=[batch],
    run_id=run_id,
    result_format={'result_format': 'COMPLETE'}
)

In [14]:
print(results)

{
  "run_id": {
    "run_name": "test_run_97",
    "run_time": "2021-12-02T16:30:22.774298+00:00"
  },
  "success": true,
  "validation_operator_config": {
    "class_name": "ActionListValidationOperator",
    "module_name": "great_expectations.validation_operators",
    "name": "action_list_operator",
    "kwargs": {
      "action_list": [
        {
          "name": "store_validation_result",
          "action": {
            "class_name": "StoreValidationResultAction"
          }
        },
        {
          "name": "store_evaluation_params",
          "action": {
            "class_name": "StoreEvaluationParametersAction"
          }
        },
        {
          "name": "update_data_docs",
          "action": {
            "class_name": "UpdateDataDocsAction"
          }
        }
      ],
      "result_format": {
        "result_format": "COMPLETE",
        "partial_unexpected_count": 20
      }
    }
  },
  "run_results": {
    "ValidationResultIdentifier::validate_data/test_