In [1]:
import numpy as np
import pandas as pd
import duckdb
import sqlalchemy
from pandas_gbq import read_gbq

In [2]:
import great_expectations as gx
from great_expectations import expectations as gxe

import pprint
import os

<div class="alert alert-block alert-info">
GX_CUSTOMERS

In [3]:
context = gx.get_context()
# query bigquery
project_id = "projectm2-aiess"
query = "SELECT * FROM olist_brazilian_ecommerce_target.DIM_CUSTOMERS"
df_customers = read_gbq(query, project_id=project_id)


data_source_name = "olist.dim_customers"
data_source = context.data_sources.add_pandas(name=data_source_name)

Downloading: 100%|[32m██████████[0m|


In [4]:
# create asset
data_asset_name = "olist.dim_customers_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)


In [5]:
# create batch
batch_definition_name = "batch_customers_dataframe"
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

batch_parameters = {"dataframe": df_customers}

new_batch = batch_definition.get_batch(batch_parameters=batch_parameters)

In [6]:
print(new_batch.head(4))

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 165.23it/s]

                    pk_customer_sid                customer_unique_id  \
0  10ad09201fcc1c82d181ff7234bcdb3b  94742cd1fbac9146be7e2a139b63e13c   
1  8a0108267d9258a0ec9f74381bc9b0de  7a2dc4682890550ebe3b8befcea3d55c   
2  888d2ebe1af2a8c93c75dae5dfc23719  721d1092e1a6460c67e6a0e691d899a3   
3  dad907e170748a35ef4e92238b7308f3  36b1c0516f123351ffa87430416dcae5   

  customer_zip_code_prefix customer_city customer_state  \
0                    69900    rio branco             AC   
1                    69900    rio branco             AC   
2                    69900    rio branco             AC   
3                    69900    rio branco             AC   

                load_date  
0  2025-06-18 03:31:23 AM  
1  2025-06-18 03:31:23 AM  
2  2025-06-18 03:31:23 AM  
3  2025-06-18 03:31:23 AM  





In [7]:
# Create a new suite for all dimension tables schema validation
suite_name = "schema_dim_customers_expectation"
suite = gx.ExpectationSuite(name=suite_name)

schema_dim_customers_expectation = gx.expectations.ExpectColumnToExist(
    column="pk_customer_sid", column_index=0
)

context.suites.add_or_update(suite)
suite.add_expectation(schema_dim_customers_expectation)

definition_name = "schema_dim_customers_definition"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)


In [8]:
validation_results = validation_definition.run(batch_parameters=batch_parameters)
print(validation_results)

Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 322.65it/s]

{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist.dim_customers-olist.dim_customers_asset",
          "column": "pk_customer_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "baa5c5c1-9453-4557-9cfd-054f81dd2074"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    }
  ],
  "suite_name": "schema_dim_customers_expectation",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 1,
    "successful_expectations": 1,
    "unsuccessful_expectations": 0,
    "success_percent": 100.0
  },
  "meta": {
    "great_expectations_version": "1.5.1",
    "batch_spec": {
      "batch_data": "PandasDataFrame"
    },
    "batch_markers": {
      "ge_load_time": "20250618T235450.982011Z"




In [9]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_customers.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")


 Full GX test results saved to gx_output/gx_results_customers.txt


<div class="alert alert-block alert-info">
GX_dtype_summary

In [10]:
context = gx.get_context()
# List of GBQ tables and their expected columns with types
gbq_tables_with_columns_and_types = {
    "olist_brazilian_ecommerce_target.FCT_PAYMENTS": {"fk_order_sid": "string"},
    "olist_brazilian_ecommerce_target.FCT_REVIEWS": {"fk_order_sid": "string"},
    "olist_brazilian_ecommerce_target.DIM_GEOLOCATION": {"geolocation_zip_code_prefix": "string"},
    "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS": {"fk_order_sid": "string"},
    "olist_brazilian_ecommerce_target.DIM_DATE": {"pk_date_sid": "integer"},
}

output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
summary_file_path = os.path.join(output_folder, "gx_dtype_summary.txt")

with open(summary_file_path, "w") as f:
    f.write("Great Expectations Full Validation Results\n")
    f.write("=" * 60 + "\n\n")

 # Iterate over the list of tables and process each one   
    for table_name, expected_columns in gbq_tables_with_columns_and_types.items():
        query = f"SELECT * FROM {table_name}"
        df_table = read_gbq(query, project_id="projectm2-aiess")

        # Generate unique names for data source and asset
        data_source_name = f"{table_name}_data_source"
        asset_name = f"{table_name}_asset"

        # Add data source
        data_source = context.data_sources.add_pandas(name=data_source_name)
        # Add DataFrame asset
        data_asset = data_source.add_dataframe_asset(name=asset_name)
        # Add batch definition
        batch_definition = data_asset.add_batch_definition_whole_dataframe(table_name)
        batch_parameters = {"dataframe": df_table}
        batch = batch_definition.get_batch(batch_parameters=batch_parameters)

        # Create Expectation Suite
        suite_name = f"{table_name}_suite"
        suite = gx.ExpectationSuite(name=suite_name)
        suite = context.suites.add(suite)
        # Add ExpectColumnValuesToBeOfType expectations for each expected column
        for column, column_type in expected_columns.items():
            expectation = gx.expectations.ExpectColumnValuesToBeOfType(
                column=column, type_=column_type
            )
            suite.add_expectation(expectation)

        print(f" Running validation for: {table_name}")
        validation_definition = gx.ValidationDefinition(
            data=batch_definition, suite=suite, name=f"{table_name}_validation"
        )
        results = validation_definition.run(batch_parameters=batch_parameters)
        print(f"Validation results for {table_name}:")
        print(results)

        f.write(f"Table: {table_name}\n")
        f.write(pprint.pformat(results))
        print(f" Finished validation for: {table_name}")

print(f"Combined gx dytppe test results saved to {summary_file_path}")


Downloading: 100%|[32m██████████[0m|
 Running validation for: olist_brazilian_ecommerce_target.FCT_PAYMENTS


Calculating Metrics:  50%|█████     | 5/10 [00:00<00:00, 61.76it/s]  


Validation results for olist_brazilian_ecommerce_target.FCT_PAYMENTS:
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "column": "fk_order_sid",
          "type_": "string",
          "batch_id": "olist_brazilian_ecommerce_target.FCT_PAYMENTS_data_source-olist_brazilian_ecommerce_target.FCT_PAYMENTS_asset"
        },
        "meta": {},
        "id": "30a29ad5-791c-49e9-bc62-204d244490cd"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "MetricConfigurationID(metric_name='column_values.of_type.condition', metric_domain_kwargs_id='5b6378d6c76a9d045392b5e52bdfb713', metric_value_kwargs_id='type_=string')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/chuhao/miniconda3/envs/dagster_meltano/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py\", line 534, 

Calculating Metrics:  50%|█████     | 5/10 [00:00<00:00, 59.36it/s]  


Validation results for olist_brazilian_ecommerce_target.FCT_REVIEWS:
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "column": "fk_order_sid",
          "type_": "string",
          "batch_id": "olist_brazilian_ecommerce_target.FCT_REVIEWS_data_source-olist_brazilian_ecommerce_target.FCT_REVIEWS_asset"
        },
        "meta": {},
        "id": "fb8d17c0-960e-4f23-9721-19314093f665"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "MetricConfigurationID(metric_name='column_values.of_type.condition', metric_domain_kwargs_id='5b6378d6c76a9d045392b5e52bdfb713', metric_value_kwargs_id='type_=string')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/chuhao/miniconda3/envs/dagster_meltano/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py\", line 534, in 

Calculating Metrics:  50%|█████     | 5/10 [00:00<00:00, 15.06it/s]  


Validation results for olist_brazilian_ecommerce_target.DIM_GEOLOCATION:
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "column": "geolocation_zip_code_prefix",
          "type_": "string",
          "batch_id": "olist_brazilian_ecommerce_target.DIM_GEOLOCATION_data_source-olist_brazilian_ecommerce_target.DIM_GEOLOCATION_asset"
        },
        "meta": {},
        "id": "5a4a463f-5227-4a16-b60f-ca9c4a120131"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "MetricConfigurationID(metric_name='column_values.of_type.condition', metric_domain_kwargs_id='512b8da6f36e3964b470b5838155831e', metric_value_kwargs_id='type_=string')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/chuhao/miniconda3/envs/dagster_meltano/lib/python3.11/site-packages/great_expectations/execution_engine/execution

Calculating Metrics:  50%|█████     | 5/10 [00:00<00:00, 44.65it/s]  


Validation results for olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "column": "fk_order_sid",
          "type_": "string",
          "batch_id": "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset"
        },
        "meta": {},
        "id": "a14b4990-fbc4-4b89-b58f-a83eadfbe570"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "MetricConfigurationID(metric_name='column_values.of_type.condition', metric_domain_kwargs_id='5b6378d6c76a9d045392b5e52bdfb713', metric_value_kwargs_id='type_=string')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/chuhao/miniconda3/envs/dagster_meltano/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py\", l

Calculating Metrics:  50%|█████     | 5/10 [00:00<00:00, 166.79it/s] 

Validation results for olist_brazilian_ecommerce_target.DIM_DATE:
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "column": "pk_date_sid",
          "type_": "integer",
          "batch_id": "olist_brazilian_ecommerce_target.DIM_DATE_data_source-olist_brazilian_ecommerce_target.DIM_DATE_asset"
        },
        "meta": {},
        "id": "1bca6766-4907-4840-8725-447d3f88ad92"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "MetricConfigurationID(metric_name='column_values.of_type.condition', metric_domain_kwargs_id='8a34c2aaf764a7bff22df977fd1f456c', metric_value_kwargs_id='type_=integer')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/chuhao/miniconda3/envs/dagster_meltano/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py\", line 534, in _process




<div class="alert alert-block alert-info">
GX_FCT_ORDER_ITEMS

In [12]:
context = gx.get_context()

# Query the fact table from GBQ
project_id = "projectm2-aiess"
fact_table_name = "projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS"
query = f"SELECT * FROM {fact_table_name}"
df_fact_table = read_gbq(query, project_id=project_id)

# Generate unique names for data source and asset
data_source_name = f"{fact_table_name}_data_source"
asset_name = f"{fact_table_name}_asset"

# Add data source
data_source = context.data_sources.add_pandas(name=data_source_name)

# Add DataFrame asset
data_asset = data_source.add_dataframe_asset(name=asset_name)

# Add batch definition
batch_definition_name = fact_table_name
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

# Get the batch and print the first few rows
batch_parameters = {"dataframe": df_fact_table}
batch = batch_definition.get_batch(batch_parameters=batch_parameters)
print(f"Batch for {fact_table_name}:")
print(batch.head(4))

# Add column expectations
schema_fct_orders_expectation_1 = gx.expectations.ExpectColumnToExist(
    column="fk_order_sid", column_index=0
)
schema_fct_orders_expectation_2 = gx.expectations.ExpectColumnToExist(
    column="pk_order_id", column_index=1
)
schema_fct_orders_expectation_3 = gx.expectations.ExpectColumnToExist(
    column="pk_order_item_id", column_index=2
)
schema_fct_orders_expectation_4 = gx.expectations.ExpectColumnToExist(
    column="fk_shipping_limit_date_sid", column_index=3
)

# Create a new suite for the fact table schema validation
suite_name = "schema_fct_orders_expectation"
suite = gx.ExpectationSuite(name=suite_name)
suite = context.suites.add(suite)

suite.add_expectation(schema_fct_orders_expectation_1)
suite.add_expectation(schema_fct_orders_expectation_2)
suite.add_expectation(schema_fct_orders_expectation_3)
suite.add_expectation(schema_fct_orders_expectation_4)

# Create validation definition
definition_name = "schema_fct_orders_definition"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)

# Run validation
validation_results = validation_definition.run(batch_parameters=batch_parameters)
print(f"Validation results for {fact_table_name}:")
print(validation_results)


Downloading: 100%|[32m██████████[0m|
Batch for projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 273.01it/s]

                       fk_order_sid                       pk_order_id  \
0  7f39ba4c9052be115350065d07583cac  7f39ba4c9052be115350065d07583cac   
1  9dc8d1a6f16f1b89874c29c9d8d30447  9dc8d1a6f16f1b89874c29c9d8d30447   
2  d455a8cb295653b55abda06d434ab492  d455a8cb295653b55abda06d434ab492   
3  5d70582531ed37f284797ba1354e0c50  5d70582531ed37f284797ba1354e0c50   

  pk_order_item_id fk_shipping_limit_date_sid  \
0                1                   20171024   
1                1                   20171018   
2                1                   20171012   
3                1                   20170330   

                         product_id                         seller_id  \
0  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
1  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
2  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
3  08574b074924071f4e201e151b152b4e  001cca7ae9ae17fb1caed9dfb1094831   

        shipping_limit


Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 250.63it/s]

Validation results for projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset",
          "column": "fk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "506d4a3a-23f2-4694-965b-3b6c54bfbf29"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "projectm2-aiess.olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-projectm2-aiess.ol




In [13]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_FCT_ORDER_ITEMS.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")

 Full GX test results saved to gx_output/gx_results_FCT_ORDER_ITEMS.txt


<div class="alert alert-block alert-info">
GX_DS_orders_delivery

In [None]:
context = gx.get_context()

# Query the fact table from GBQ
project_id = "projectm2-aiess"
fact_table_name = "olist_brazilian_ecommerce_DS.DS_orders_delivery"
query = f"SELECT * FROM {fact_table_name}"
df_fact_table = read_gbq(query, project_id=project_id)

# Generate unique names for data source and asset
data_source_name = f"{fact_table_name}_data_source"
asset_name = f"{fact_table_name}_asset"

# Add data source
data_source = context.data_sources.add_pandas(name=data_source_name)

# Add DataFrame asset
data_asset = data_source.add_dataframe_asset(name=asset_name)

# Add batch definition
batch_definition_name = fact_table_name
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

# Get the batch and print the first few rows
batch_parameters = {"dataframe": df_fact_table}
batch = batch_definition.get_batch(batch_parameters=batch_parameters)
print(f"Batch for {fact_table_name}:")
print(batch.head(4))

# Add column expectations
schema_fct_orders_expectation_1 = gx.expectations.ExpectColumnToExist(
    column="pk_order_sid", column_index=0
)
schema_fct_orders_expectation_2 = gx.expectations.ExpectColumnToExist(
    column="fk_customer_sid", column_index=1
)

# Create a new suite for the fact table schema validation
suite_name = "schema_fct_orders_expectation"
suite = gx.ExpectationSuite(name=suite_name)
suite = context.suites.add(suite)

suite.add_expectation(schema_fct_orders_expectation_1)
suite.add_expectation(schema_fct_orders_expectation_2)

# Create validation definition
definition_name = "schema_fct_orders_definition"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)

# Run validation
validation_results = validation_definition.run(batch_parameters=batch_parameters)
print(f"Validation results for {fact_table_name}:")
print(validation_results)

Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_DS.DS_orders_delivery:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 249.77it/s]

                       pk_order_sid                   fk_customer_sid  \
0  809a282bbd5dbcabb6f2f724fca862ec  622e13439d6b5a0b486c435618b2679e   
1  bfbd0f9bdef84302105ad712db648a6c  86dc2ffce2dfff336de2f386a786e574   
2  2e7a8482f6fb09756ca50c10d7bfc047  08c5351a6aca1c1589a38f244edeee9d   
3  e5215415bb6f76fe3b7cb68103a0d1c0  b6f6cbfc126f1ae6723fe2f9b3751208   

  fk_order_purchased_date_sid fk_order_approved_at_date_sid  \
0                    20160913                      20161007   
1                    20160915                      20160915   
2                    20160904                      20161007   
3                    20161022                          None   

  fk_order_delivered_carrier_date_sid fk_order_delivered_customer_date_sid  \
0                                None                                 None   
1                            20161107                             20161109   
2                            20161018                                 None   
3     


Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 425.58it/s]

Validation results for olist_brazilian_ecommerce_DS.DS_orders_delivery:
{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_DS.DS_orders_delivery_data_source-olist_brazilian_ecommerce_DS.DS_orders_delivery_asset",
          "column": "pk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "ce0fb3eb-589c-4b93-872c-0f39b36e18ce"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_DS.DS_orders_delivery_data_source-olist_brazilian_ecommerce_DS.DS_orders_delivery_asset",
          "column": "fk_custo




In [17]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_DS_fct_orders_delivery.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")

 Full GX test results saved to gx_output/gx_results_DS_fct_orders_delivery.txt


<div class="alert alert-block alert-info">
GX_geo

In [16]:
context = gx.get_context()
# query bigquery
project_id = "projectm2-aiess"
query = "SELECT * FROM olist_brazilian_ecommerce_DS.DS_land_geolocation"
df_geolocation = read_gbq(query, project_id=project_id)

data_source_name = "geolocation_df"
data_source = context.data_sources.add_pandas(name=data_source_name)

# create asset
data_asset_name = "geolocation_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)

# create batch
batch_definition_name = "batch_geolocation_dataframe"
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

batch_parameters = {"dataframe": df_geolocation}

new_batch = batch_definition.get_batch(batch_parameters=batch_parameters)

Downloading: 100%|[32m██████████[0m|


In [None]:
suite_name = "br_ecom_expectation"
suite = gx.ExpectationSuite(name=suite_name)

preset_lat_expectation = gx.expectations.ExpectColumnValuesToBeBetween(
    column="geolocation_lat", min_value=-35, max_value=5
)

preset_long_expectation = gx.expectations.ExpectColumnValuesToBeBetween(
    column="geolocation_lng", min_value=-75, max_value=-35
)

context.suites.add_or_update(suite)
suite.add_expectation(preset_lat_expectation)
suite.add_expectation(preset_long_expectation)

definition_name = "br_ecom_validation_definition_V2"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)


In [39]:
validation_results = validation_definition.run(batch_parameters=batch_parameters)
print(validation_results)

Calculating Metrics: 100%|██████████| 17/17 [00:00<00:00, 91.52it/s] 

{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_between",
        "kwargs": {
          "batch_id": "geolocation_df-geolocation_asset",
          "column": "geolocation_lat",
          "min_value": -35.0,
          "max_value": 5.0
        },
        "meta": {},
        "id": "41196b5f-ad1f-4bf4-8189-cc19f337b758"
      },
      "result": {
        "element_count": 738299,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": [],
        "missing_count": 0,
        "missing_percent": 0.0,
        "unexpected_percent_total": 0.0,
        "unexpected_percent_nonmissing": 0.0,
        "partial_unexpected_counts": [],
        "partial_unexpected_index_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success"




In [40]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_geo.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")


 Full GX test results saved to gx_output/gx_results_geo.txt
