In [1]:
# Import Library
import great_expectations as gx
import pandas as pd
from great_expectations.checkpoint import Checkpoint

In [2]:
# Load Data
df = pd.read_csv('P2M3_trisna_data_clean.csv')
context = gx.get_context()

In [3]:
# Give a name to a Datasource
datasource = context.sources.add_pandas(name="pandas_datasource")

name = "sales"
data_asset = datasource.add_dataframe_asset(name=name)

# Build batch request
my_batch_request = data_asset.build_batch_request(dataframe=df)

In [4]:
# Created an Expectation Suite
context.add_or_update_expectation_suite("my_expectation_suite")

{
  "expectation_suite_name": "my_expectation_suite",
  "ge_cloud_id": null,
  "expectations": [],
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.17.23"
  }
}

In [5]:
# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request=my_batch_request,
    expectation_suite_name="my_expectation_suite",
)

In [6]:
# Expectation 1 : Column `invoice_id` must be unique

validator.expect_column_values_to_be_unique('invoice_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [7]:
# Expectation 2: Column 'rating' to be between 1 and 10

validator.expect_column_values_to_be_between(column='rating', min_value=1, max_value=10)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
# Expectation 3: Column 'payment' must contain one of the following 3 things: Ewallet, Credit card, and Cash

validator.expect_column_values_to_be_in_set(column='payment', value_set=['Ewallet', 'Credit card', 'Cash'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
# Expectation 4: Column 'quantity' must in form of integer

validator.expect_column_values_to_be_in_type_list('quantity', ['int64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
# Expectation 5: Column 'customer_type' must contain 2 unique value: Member and Normal

validator.expect_column_unique_value_count_to_be_between('customer_type', 0, 2)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 2
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [11]:
# Expectation 6: Column 'invoice id' must have length 11 character.

validator.expect_column_value_lengths_to_be_between('invoice_id', 0, 11)


Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [12]:
# Expectation 8 : The minimum value of column `quantity` must be 1

validator.expect_column_min_to_be_between('quantity', 0, 1)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 1
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [13]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

In [14]:
# Build checkpoint

checkpoint = context.add_or_update_checkpoint(
    name="my_checkpoint",
    validations=[
        {
            "batch_request": my_batch_request,
            "expectation_suite_name": "my_expectation_suite",
        },
    ],
)

In [15]:
# Run a checkpoint

checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/32 [00:00<?, ?it/s]

In [16]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://C:\\Users\\user\\AppData\\Local\\Temp\\tmp8ie2t0ad\\index.html'}