In [2]:
pip install great_expectations

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


### Import Libraries

In [4]:
from lib2to3.fixes.fix_input import context

import pandas as pd
import json
import great_expectations as gx
import sys
import re
import os

from dask.graph_manipulation import checkpoint
from great_expectations.checkpoint.actions import EmailAction
from great_expectations.expectations.expectation import ColumnMapExpectation
from great_expectations.execution_engine.pandas_execution_engine import PandasExecutionEngine
from great_expectations.execution_engine.sqlalchemy_execution_engine import SqlAlchemyExecutionEngine
from great_expectations.expectations.metrics import ColumnMapMetricProvider
from great_expectations.expectations.metrics.map_metric_provider import (column_map_metric_provider,
                                                                         column_condition_partial)

  from lib2to3.fixes.fix_input import context


### Creating GX Context

In [5]:
context = gx.get_context(mode="ephemeral")

### Creating Datasource

In [7]:
data_source_name ='temperature_data'
data_source = context.data_sources.add_pandas(name=data_source_name)

### Creating DataAsset

In [8]:
data_asset_name = "emp_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)

### Creating Batch Definition

In [9]:
batch_definition_name = "emp_batch"
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

### Creating Metric for PAN Validation

In [10]:
class ColumnValuesToMatchPanFormat(ColumnMapMetricProvider):
    condition_metric_name = "column_values.match_pan_format"

    @column_condition_partial(engine=PandasExecutionEngine)
    def _pandas(cls, column, **kwargs):
        pan_regex = r"^[A-Z]{5}[0-9]{4}[A-Z]{1}$"
        return column.apply(lambda x: bool(re.match(pan_regex, str(x))))

    @column_condition_partial(engine=SqlAlchemyExecutionEngine)
    def _sqlalchemy(cls, column, **kwargs):
        pan_regex = r"^[A-Z]{5}[0-9]{4}[A-Z]{1}$"
        return column.like(f'[A-Z]{5}[0-9]{4}[A-Z]{1}')

### Create Custom Expectation

In [11]:
class ExpectColumnValuesToMatchPanFormat(ColumnMapExpectation):
    map_metric = "column_values.match_pan_format"
    success_keys = ("mostly",)
    default_kwarg_values = {
        "mostly": 1.0,
        "result_format": "BASIC",
        "include_config": True
    }

### Creating Expectation

In [12]:
expectation_pan = ExpectColumnValuesToMatchPanFormat(column="pan")

### Creating ExpectationSuite and Adding Expectations

In [13]:
expectation_suite_name = "emp_suite"
expectation_suite_ref = gx.ExpectationSuite(name=expectation_suite_name)
expectation_suite = context.suites.add(expectation_suite_ref)

In [16]:
expectation_suite.add_expectation(expectation_pan)

ExpectColumnValuesToMatchPanFormat(id='fc561e25-2d4c-4137-941f-69884e079482', meta=None, notes=None, result_format=<ResultFormat.BASIC: 'BASIC'>, description=None, catch_exceptions=True, rendered_content=None, windows=None, batch_id=None, column='pan', mostly=1, row_condition=None, condition_parser=None, default_kwarg_values={'mostly': 1.0, 'result_format': 'BASIC', 'include_config': True})

### Creating Validation Definition

In [17]:
validation_def_name = "emp_data_validation"
validation_definition_ref = gx.ValidationDefinition(data=batch_definition,
                                                    suite=expectation_suite,
                                                    name=validation_def_name)

In [20]:
validation_definition = context.validation_definitions.add(validation_definition_ref)

### Creating Send Email Action

In [21]:
with open('../Email Credentials.json') as f:
    data = json.load(f)

mailserveraddr = data['email']['mailServer']
fromaddr = data['email']['from']
password = data['email']['password']
to = data['email']['to']
port = data['email']['port']

In [22]:
email_action = EmailAction(
    name="email_action",
    notify_on="all",  # or "failure" or "success" or "warning"
    smtp_address=mailserveraddr,
    smtp_port=port,
    sender_password=password,
    sender_login=fromaddr,
    sender_emails=[to],
    use_tls=True,
    use_ssl=False
)

ValidationError: 2 validation errors for EmailAction
receiver_emails
  field required (type=value_error.missing)
sender_emails
  extra fields not permitted (type=value_error.extra)

### Creating Checkpoint

In [None]:
checkpoint_name = "emp_run_checkpoint"
checkpoint_to_add = gx.Checkpoint(
    name=checkpoint_name,
    validation_definitions=[validation_definition],
    actions=[email_action],
    result_format="COMPLETE"
)

In [None]:
checkpoint = context.checkpoints.add(checkpoint_to_add)

### Reading Data in Pandas DataFrame

In [None]:
data_df = pd.read_csv('temperature.csv')
data_df.head()

### Creating BatchParameter

In [None]:
batch_parameters = {"dataframe": data_df}

In [None]:
validation_results = checkpoint.run(batch_parameters=batch_parameters)


### Displaying validation results

In [None]:
print(validation_results)