# Great Expectation

## A database as a data source

In [5]:
import great_expectations as gx
import pandas as pd
# Create a DataContext as an entry point to the GX Python API

context = gx.get_context()

# Run this line to initialize a new Filesystem data context with the
# contents of the Ephemeral Data Context
context = context.convert_to_file_context()

Successfully migrated to FileDataContext!


In [8]:
# Similar to a folder, you can add a database as a data source. Remember to keep your credentials secure as in this guide
# https://docs.greatexpectations.io/docs/guides/connecting_to_your_data/fluent/database/connect_sql_source_data/#create-a-postgresql-data-source
datasource_name = "dunghc-diabetes-new"
my_connection_string = (
    "postgresql+psycopg2://k6:k6@localhost:5432/k6"
)

datasource = context.sources.add_postgres(
    name=datasource_name, connection_string=my_connection_string
)

# TODO: Try to validate some tables on your own :)

In [9]:
table_asset = datasource.add_table_asset(name="my_table_asset", table_name="diabetes_new")

In [10]:
batch_request = table_asset.build_batch_request()

In [11]:
context.add_or_update_expectation_suite("my_expectation_suite")

{
  "expectation_suite_name": "my_expectation_suite",
  "ge_cloud_id": null,
  "expectations": [],
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.17.21"
  }
}

In [12]:
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name="my_expectation_suite",
)
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,created,content,pregnancies,glucose,bloodpressure,skinthickness,insulin,bmi,diabetespedigreefunction,age
0,1699891119598,Hi,2,98,29,67,457,57.050919,2.22086,29
1,1699891121619,Hi,7,6,8,11,522,4.979407,1.132764,60
2,1699891123649,Hi,11,177,62,92,214,10.801221,1.194071,78
3,1699891125668,Hi,16,47,102,83,145,55.136469,2.065157,51
4,1699891127687,Hi,4,9,86,99,49,43.974775,1.218999,67


In [14]:
validator.expect_column_values_to_not_be_null(column="glucose")
validator.expect_column_values_to_not_be_null(column="bloodpressure")
validator.expect_column_values_to_not_be_null(column="skinthickness")
validator.expect_column_values_to_not_be_null(column="insulin")
validator.expect_column_values_to_not_be_null(column="bmi")
validator.expect_column_values_to_not_be_null(column="age")
validator.expect_column_values_to_not_be_null(column="pregnancies")

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 122,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [15]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [16]:
checkpoint = context.add_or_update_checkpoint(
    name="diabetes_asset_checkpoint",
    validator=validator
)

# Get the result after validation
checkpoint_result = checkpoint.run()

# Quick view on the validation result
context.view_validation_result(checkpoint_result)

Calculating Metrics:   0%|          | 0/46 [00:00<?, ?it/s]