In [None]:
import great_expectations as gx
import pandas as pd
from great_expectations.expectations import (
    ExpectColumnValuesToMatchRegex,
    ExpectColumnValuesToBeUnique,
    ExpectColumnValuesToBeBetween, 
    ExpectColumnValuesToNotBeNull,    
)


Definir Contexto de GX

Si inicializa Great Expectations en la sesi칩n definiendo el contexto

In [None]:
context = gx.get_context()

In [None]:
df = pd.DataFrame({
    "id": [1,2,3,3],
    "email": ["a@x.com","b@x.com", None, "d@x.com"],
    "amount": [100, -5, 30, 40]
})

In [None]:
df

GX necesita definir 3 componentes:

- Data Source: Tipo de fuente a la que se conectar치.
- Data Asset: Apuntador de la fuente
- Batch Definition: Parametrizaci칩n que define si puede traer todo los datos o una fracci칩n

In [None]:
batch_definition = context.data_sources.pandas_default.add_dataframe_asset(name="demo_df_asset")
batch_def = batch_definition.add_batch_definition_whole_dataframe(name="whole_df")
batch = batch_def.get_batch(batch_parameters={"dataframe": df})


Crear Expectations Suite

In [None]:
suite_name = "demo_suite"
suite = gx.ExpectationSuite(name=suite_name)
suite = context.suites.add(suite)

# 4. Expectativas
suite.add_expectation(
    ExpectColumnValuesToBeBetween(
        column="amount", 
        min_value=0, 
    )
)

suite.add_expectation(
    ExpectColumnValuesToBeUnique(
        column="id", 
    )
)

suite.add_expectation(
    ExpectColumnValuesToBeUnique(
        column="email", 
    )
)

suite.add_expectation(
    ExpectColumnValuesToMatchRegex(
        column="email", 
        regex=r"^[^@]+@[^@]+\.[^@]+$"
    )
)

suite.add_expectation(
    ExpectColumnValuesToNotBeNull(
        column="id",
    )
)

suite.add_expectation(
    ExpectColumnValuesToNotBeNull(
        column="email",
    )
)


Validar Expectativas

In [None]:

# Test the Expectation:
validation_results = batch.validate(suite, result_format="COMPLETE")

# Evaluate the Validation Results:
print(validation_results)


In [None]:
# Print expectation results
for result in validation_results["results"]:
    print("column: ", result['expectation_config']['kwargs']['column'])
    print(f"Expectation: {result['expectation_config']['type']}")
    print(f"Success: {result['success']}")

    if not result['success']:
        print(f"Details: {result['result']}")
    
    print("-" * 80)
