In [1]:
import numpy as np
import pandas as pd
import duckdb
import sqlalchemy
from pandas_gbq import read_gbq

In [2]:
import great_expectations as gx
from great_expectations import expectations as gxe

import pprint
import os

<div class="alert alert-block alert-info">
GX_CUSTOMERS

In [3]:
context = gx.get_context()
# query bigquery
project_id = "projectm2-aiess"
query = "SELECT * FROM olist_brazilian_ecommerce_target.DIM_CUSTOMERS"
df_customers = read_gbq(query, project_id=project_id)


data_source_name = "olist_brazilian_ecommerce_target.DIM_CUSTOMERS"
data_source = context.data_sources.add_pandas(name=data_source_name)

Downloading: 100%|[32m██████████[0m|


In [4]:
# create asset
data_asset_name = "olist_brazilian_ecommerce_target.DIM_CUSTOMERS_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)


In [5]:
# create batch
batch_definition_name = "batch_DIM_CUSTOMERS_dataframe"
batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

batch_parameters = {"dataframe": df_customers}

new_batch = batch_definition.get_batch(batch_parameters=batch_parameters)

In [6]:
print(new_batch.head(4))

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 226.90it/s]

                    pk_customer_sid                customer_unique_id  \
0  10ad09201fcc1c82d181ff7234bcdb3b  94742cd1fbac9146be7e2a139b63e13c   
1  8a0108267d9258a0ec9f74381bc9b0de  7a2dc4682890550ebe3b8befcea3d55c   
2  888d2ebe1af2a8c93c75dae5dfc23719  721d1092e1a6460c67e6a0e691d899a3   
3  dad907e170748a35ef4e92238b7308f3  36b1c0516f123351ffa87430416dcae5   

  customer_zip_code_prefix customer_city customer_state  \
0                    69900    rio branco             AC   
1                    69900    rio branco             AC   
2                    69900    rio branco             AC   
3                    69900    rio branco             AC   

                load_date  
0  2025-06-18 03:31:23 AM  
1  2025-06-18 03:31:23 AM  
2  2025-06-18 03:31:23 AM  
3  2025-06-18 03:31:23 AM  





In [7]:
# Create a new suite for all dimension tables schema validation
suite_name = "schema_DIM_CUSTOMERS_expectation"
suite = gx.ExpectationSuite(name=suite_name)

schema_DIM_CUSTOMERS_expectation = gx.expectations.ExpectColumnToExist(
    column="pk_customer_sid", column_index=0
)

context.suites.add_or_update(suite)
suite.add_expectation(schema_DIM_CUSTOMERS_expectation)

definition_name = "schema_DIM_CUSTOMERS_definition"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)

In [8]:
validation_results = validation_definition.run(batch_parameters=batch_parameters)
print(validation_results)

Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 429.35it/s]


{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_CUSTOMERS-olist_brazilian_ecommerce_target.DIM_CUSTOMERS_asset",
          "column": "pk_customer_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "fdd40240-1684-48aa-81c6-0ed9dbcb4a92"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    }
  ],
  "suite_name": "schema_DIM_CUSTOMERS_expectation",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 1,
    "successful_expectations": 1,
    "unsuccessful_expectations": 0,
    "success_percent": 100.0
  },
  "meta": {
    "great_expectations_version": "1.4.6",
    "batch_spec": {
      "batch_data": "PandasDataFrame"
    },
    "batch_marke

In [9]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_DIM_CUSTOMERS_schema_check.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")

 Full GX test results saved to gx_output/gx_results_DIM_CUSTOMERS_schema_check.txt


<div class="alert alert-block alert-info">
GX_PK_SCHEMA_CHECK 1/2

In [10]:
context = gx.get_context()

# List of GBQ dimension tables and their primary key columns with dtypes
gbq_tables_with_columns_index_and_types = {
    "olist_brazilian_ecommerce_target.DIM_DATE": [
        {"column": "pk_date_sid", "column_index": 0, "type": "object"}
    ],
    "olist_brazilian_ecommerce_target.LKP_STATUS_DESC": [
        {"column": "order_status", "column_index": 0, "type": "object"},
        {"column": "payment_status", "column_index": 1, "type": "object"}
    ],
    "olist_brazilian_ecommerce_target.DIM_ORDERS": [
        {"column": "pk_order_sid", "column_index": 0, "type": "object"}
    ],
    "olist_brazilian_ecommerce_target.DIM_GEOLOCATION": [
        {"column": "geolocation_zip_code_prefix", "column_index": 0, "type": "object"}
    ],
}

# Iterate over the list of tables and process each one
for table_name, expected_columns in gbq_tables_with_columns_index_and_types.items():
    # Query the table from GBQ
    query = f"SELECT * FROM {table_name}"
    df_table = read_gbq(query, project_id="projectm2-aiess")

    # Generate unique names for data source and asset
    data_source_name = f"{table_name}_data_source"
    asset_name = f"{table_name}_asset"

    # Add data source
    data_source = context.data_sources.add_pandas(name=data_source_name)

    # Add DataFrame asset
    data_asset = data_source.add_dataframe_asset(name=asset_name)

    # Add batch definition
    batch_definition_name = table_name
    batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

    # Get the batch and print the first few rows
    batch_parameters = {"dataframe": df_table}
    batch = batch_definition.get_batch(batch_parameters=batch_parameters)
    print(f"Batch for {table_name}:")
    print(batch.head(4))

    # Create Expectation Suite
    suite_name = f"{table_name}_expectation_suite"
    suite = gx.ExpectationSuite(name=suite_name)
    suite = context.suites.add_or_update(suite)

    # Add expectations for each expected column
    for column_info in expected_columns:
        column = column_info["column"]
        column_index = column_info["column_index"]
        column_type = column_info["type"]

        column_expectation = gx.expectations.ExpectColumnToExist(
            column=column, column_index=column_index
        )
        suite.add_expectation(column_expectation)

        dtype_expectation = gx.expectations.ExpectColumnValuesToBeOfType(
            column=column, type_=column_type
        )
        suite.add_expectation(dtype_expectation)

    # Create validation definition
    definition_name = f"{table_name}_validation_definition"
    validation_definition = gx.ValidationDefinition(
        data=batch_definition, suite=suite, name=definition_name
    )

    # Run validation
    validation_results = validation_definition.run(batch_parameters=batch_parameters)
    print(f"Validation results for {table_name}:")
    print(validation_results)

Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.DIM_DATE:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 246.00it/s]


  pk_date_sid   full_date  year  month month_name  day_of_month  day_name  \
0    20150104  2015-01-04  2015      1    January             4    Sunday   
1    20150111  2015-01-11  2015      1    January            11    Sunday   
2    20160107  2016-01-07  2016      1    January             7  Thursday   
3    20190105  2019-01-05  2019      1    January             5  Saturday   

   is_weekday               load_date  
0           0  2025-06-18 03:31:28 AM  
1           0  2025-06-18 03:31:28 AM  
2           1  2025-06-18 03:31:28 AM  
3           0  2025-06-18 03:31:28 AM  


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 596.77it/s] 

Validation results for olist_brazilian_ecommerce_target.DIM_DATE:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_DATE_data_source-olist_brazilian_ecommerce_target.DIM_DATE_asset",
          "column": "pk_date_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "f7933167-42c4-4af9-812f-8cbca389a7e6"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_DATE_data_source-olist_brazilian_ecommerce_target.DIM_DATE_asset",
          "column": "pk_date_sid",
          "typ




Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.LKP_STATUS_DESC:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 250.12it/s]


  order_status payment_status  record_count profit_lost  \
0     Invoiced      Completed           273      Profit   
1   Processing      Completed           269      Profit   
2  Unavailable      Completed             6      Profit   
3     Canceled      Completed           411      Profit   

                                  status_description  
0               Invoice processed, payment received.  
1           Order finalized with successful payment.  
2      Payment received despite item unavailability.  
3  Payment was completed despite order cancellation.  


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 238.74it/s] 

Validation results for olist_brazilian_ecommerce_target.LKP_STATUS_DESC:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.LKP_STATUS_DESC_data_source-olist_brazilian_ecommerce_target.LKP_STATUS_DESC_asset",
          "column": "order_status",
          "column_index": 0
        },
        "meta": {},
        "id": "7e935a1e-214a-4a2c-b0c8-98625da4f8fc"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.LKP_STATUS_DESC_data_source-olist_brazilian_ecommerce_target.LKP_STATUS_DESC_asset",
          "co




Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.DIM_ORDERS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 269.25it/s]


                       pk_order_sid                   fk_customer_sid  \
0  809a282bbd5dbcabb6f2f724fca862ec  622e13439d6b5a0b486c435618b2679e   
1  bfbd0f9bdef84302105ad712db648a6c  86dc2ffce2dfff336de2f386a786e574   
2  2e7a8482f6fb09756ca50c10d7bfc047  08c5351a6aca1c1589a38f244edeee9d   
3  e5215415bb6f76fe3b7cb68103a0d1c0  b6f6cbfc126f1ae6723fe2f9b3751208   

  fk_order_purchased_date_sid fk_order_approved_at_date_sid  \
0                    20160913                      20161007   
1                    20160915                      20160915   
2                    20160904                      20161007   
3                    20161022                          None   

  fk_order_delivered_carrier_date_sid fk_order_delivered_customer_date_sid  \
0                                None                                 None   
1                            20161107                             20161109   
2                            20161018                                 None   
3     

Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 591.36it/s] 


Validation results for olist_brazilian_ecommerce_target.DIM_ORDERS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_ORDERS_data_source-olist_brazilian_ecommerce_target.DIM_ORDERS_asset",
          "column": "pk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "80ea5dff-16fb-47b4-9ec3-23ecfa2df57b"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_ORDERS_data_source-olist_brazilian_ecommerce_target.DIM_ORDERS_asset",
          "column": "pk_order_sid",
  

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 314.37it/s] 

  geolocation_zip_code_prefix  geolocation_lat  geolocation_lng  \
0                       81470       -25.571748       -49.334374   
1                       28930       -22.969370       -42.029834   
2                       87365       -24.271860       -53.069433   
3                       87365       -24.281905       -53.074516   

     geolocation_city geolocation_state               load_date  
0            * cidade                PR  2025-06-18 03:31:31 AM  
1  ...arraial do cabo                RJ  2025-06-18 03:31:31 AM  
2      4o. centenario                PR  2025-06-18 03:31:31 AM  
3       4º centenario                PR  2025-06-18 03:31:31 AM  



Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 449.98it/s] 

Validation results for olist_brazilian_ecommerce_target.DIM_GEOLOCATION:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_GEOLOCATION_data_source-olist_brazilian_ecommerce_target.DIM_GEOLOCATION_asset",
          "column": "geolocation_zip_code_prefix",
          "column_index": 0
        },
        "meta": {},
        "id": "d254f623-be89-49b8-940e-bc447128fb11"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_GEOLOCATION_data_source-olist_brazilian_ecommerce_target.DIM_GEOLOCATION_asset"




In [11]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_DIM_Tables_primary_key_check.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")

 Full GX test results saved to gx_output/gx_results_DIM_Tables_primary_key_check.txt


<div class="alert alert-block alert-info">
GX_PK_SCHEMA_CHECK 2/2

In [12]:
context = gx.get_context()

# List of GBQ fact tables and their primary columns with dtypes
gbq_tables_with_columns_index_and_types = {
    "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS": [
        {"column": "pk_order_id", "column_index": 1, "type": "object"},
        {"column": "pk_order_item_id", "column_index": 2, "type": "object"},
    ],
    "olist_brazilian_ecommerce_target.FCT_PAYMENTS": [
        {"column": "pk_order_id", "column_index": 1, "type": "object"},
        {"column": "pk_payment_sequential", "column_index": 2, "type": "int64"}
    ],
    "olist_brazilian_ecommerce_target.FCT_REVIEWS": [
        {"column": "pk_order_id", "column_index": 1, "type": "object"},
        {"column": "pk_review_id", "column_index": 2, "type": "object"},
    ],
}

# Iterate over the list of tables and process each one
for table_name, expected_columns in gbq_tables_with_columns_index_and_types.items():
    # Query the table from GBQ
    query = f"SELECT * FROM {table_name}"
    df_table = read_gbq(query, project_id="projectm2-aiess")

    # Generate unique names for data source and asset
    data_source_name = f"{table_name}_data_source"
    asset_name = f"{table_name}_asset"

    # Add data source
    data_source = context.data_sources.add_pandas(name=data_source_name)

    # Add DataFrame asset
    data_asset = data_source.add_dataframe_asset(name=asset_name)

    # Add batch definition
    batch_definition_name = table_name
    batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

    # Get the batch and print the first few rows
    batch_parameters = {"dataframe": df_table}
    batch = batch_definition.get_batch(batch_parameters=batch_parameters)
    print(f"Batch for {table_name}:")
    print(batch.head(4))

    # Create Expectation Suite
    suite_name = f"{table_name}_expectation_suite"
    suite = gx.ExpectationSuite(name=suite_name)
    suite = context.suites.add_or_update(suite)

    # Add expectations for each expected column
    for column_info in expected_columns:
        column = column_info["column"]
        column_index = column_info["column_index"]
        column_type = column_info["type"]

        column_expectation = gx.expectations.ExpectColumnToExist(
            column=column, column_index=column_index
        )
        suite.add_expectation(column_expectation)

        dtype_expectation = gx.expectations.ExpectColumnValuesToBeOfType(
            column=column, type_=column_type
        )
        suite.add_expectation(dtype_expectation)

    # Create validation definition
    definition_name = f"{table_name}_validation_definition"
    validation_definition = gx.ValidationDefinition(
        data=batch_definition, suite=suite, name=definition_name
    )

    # Run validation
    validation_results = validation_definition.run(batch_parameters=batch_parameters)
    print(f"Validation results for {table_name}:")
    print(validation_results)

Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 300.30it/s]

                       fk_order_sid                       pk_order_id  \
0  7f39ba4c9052be115350065d07583cac  7f39ba4c9052be115350065d07583cac   
1  9dc8d1a6f16f1b89874c29c9d8d30447  9dc8d1a6f16f1b89874c29c9d8d30447   
2  d455a8cb295653b55abda06d434ab492  d455a8cb295653b55abda06d434ab492   
3  5d70582531ed37f284797ba1354e0c50  5d70582531ed37f284797ba1354e0c50   

  pk_order_item_id fk_shipping_limit_date_sid  \
0                1                   20171024   
1                1                   20171018   
2                1                   20171012   
3                1                   20170330   

                         product_id                         seller_id  \
0  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
1  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
2  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
3  08574b074924071f4e201e151b152b4e  001cca7ae9ae17fb1caed9dfb1094831   

        shipping_limit


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 550.87it/s] 


Validation results for olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset",
          "column": "pk_order_id",
          "column_index": 1
        },
        "meta": {},
        "id": "4a4bbcf1-b1f2-4bf4-8ee8-125860178bb6"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset",
          "col

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 283.13it/s]


                       fk_order_sid                       pk_order_id  \
0  e6670c9089973cc68e5b90b7d1f4fec4  e6670c9089973cc68e5b90b7d1f4fec4   
1  d022bdad33a575ca5edb8c4d51e44310  d022bdad33a575ca5edb8c4d51e44310   
2  90ea2b1be9c98c67406de151652daa63  90ea2b1be9c98c67406de151652daa63   
3  033e33708df28579692a3d19492c6553  033e33708df28579692a3d19492c6553   

   pk_payment_sequential payment_type  payment_installments  payment_value  \
0                      1  credit_card                     1          88.49   
1                      1       boleto                     1          70.03   
2                      1       boleto                     1          59.22   
3                      1  credit_card                     1         145.57   

                load_date  
0  2025-06-18 03:31:51 AM  
1  2025-06-18 03:31:51 AM  
2  2025-06-18 03:31:51 AM  
3  2025-06-18 03:31:51 AM  


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 603.96it/s] 

Validation results for olist_brazilian_ecommerce_target.FCT_PAYMENTS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_PAYMENTS_data_source-olist_brazilian_ecommerce_target.FCT_PAYMENTS_asset",
          "column": "pk_order_id",
          "column_index": 1
        },
        "meta": {},
        "id": "db2f537b-5880-4079-aa1a-76a4964a8eae"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_PAYMENTS_data_source-olist_brazilian_ecommerce_target.FCT_PAYMENTS_asset",
          "column": "pk_order




Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.FCT_REVIEWS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 302.29it/s]

                       fk_order_sid                       pk_order_id  \
0  809a282bbd5dbcabb6f2f724fca862ec  809a282bbd5dbcabb6f2f724fca862ec   
1  bfbd0f9bdef84302105ad712db648a6c  bfbd0f9bdef84302105ad712db648a6c   
2  e5215415bb6f76fe3b7cb68103a0d1c0  e5215415bb6f76fe3b7cb68103a0d1c0   
3  8beb59392e21af5eb9547ae1a9938d06  8beb59392e21af5eb9547ae1a9938d06   

                       pk_review_id fk_review_creation_date_sid  review_score  \
0  69ac6a27fde9855ebeaaecac0f78058b                    20161002             1   
1  6916ca4502d6d3bfd39818759d55d536                    20161006             1   
2  49f695dffa457eaba90d388a5c37e942                    20161009             1   
3  b28309e8253951a954123e039d3242f7                    20161020             1   

  review_comment_title                             review_comment_message  \
0                       MEU PEDIDO NÃO FOI ENTREGUE E NÃO FOI DADA NEN...   
1                          nao recebi o produto e nem resposta da empresa 


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 589.50it/s] 

Validation results for olist_brazilian_ecommerce_target.FCT_REVIEWS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_REVIEWS_data_source-olist_brazilian_ecommerce_target.FCT_REVIEWS_asset",
          "column": "pk_order_id",
          "column_index": 1
        },
        "meta": {},
        "id": "c88efc3e-9553-4480-a9c2-ffbd9c41ea51"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_REVIEWS_data_source-olist_brazilian_ecommerce_target.FCT_REVIEWS_asset",
          "column": "pk_order_id",




<div class="alert alert-block alert-info">
GX_FK_Schema_Check_ORDERS

In [13]:
context = gx.get_context()

# List of GBQ dimension table and the foreign key columns with dtypes
gbq_tables_with_columns_index_and_types = {
    "olist_brazilian_ecommerce_target.DIM_ORDERS": [
        {"column": "fk_customer_sid", "column_index": 1, "type": "object"},
        {"column": "fk_order_purchased_date_sid", "column_index": 2, "type": "object"},
        {"column": "fk_order_approved_at_date_sid", "column_index": 3, "type": "object"},
        {"column": "fk_order_delivered_carrier_date_sid", "column_index": 4, "type": "object"},
        {"column": "fk_order_delivered_customer_date_sid", "column_index": 5, "type": "object"},
        {"column": "fk_order_estimated_delivery_date_sid", "column_index": 6, "type": "object"},
        {"column": "order_status", "column_index": 7, "type": "object"},
        {"column": "payment_status", "column_index": 8, "type": "object"} ]
}

# Iterate over the list of tables and process each one
for table_name, expected_columns in gbq_tables_with_columns_index_and_types.items():
    # Query the table from GBQ
    query = f"SELECT * FROM {table_name}"
    df_table = read_gbq(query, project_id="projectm2-aiess")

    # Generate unique names for data source and asset
    data_source_name = f"{table_name}_data_source"
    asset_name = f"{table_name}_asset"

    # Add data source
    data_source = context.data_sources.add_pandas(name=data_source_name)

    # Add DataFrame asset
    data_asset = data_source.add_dataframe_asset(name=asset_name)

    # Add batch definition
    batch_definition_name = table_name
    batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

    # Get the batch and print the first few rows
    batch_parameters = {"dataframe": df_table}
    batch = batch_definition.get_batch(batch_parameters=batch_parameters)
    print(f"Batch for {table_name}:")
    print(batch.head(4))

    # Create Expectation Suite
    suite_name = f"{table_name}_expectation_suite"
    suite = gx.ExpectationSuite(name=suite_name)
    suite = context.suites.add_or_update(suite)

    # Add expectations for each expected column
    for column_info in expected_columns:
        column = column_info["column"]
        column_index = column_info["column_index"]
        column_type = column_info["type"]

        column_expectation = gx.expectations.ExpectColumnToExist(
            column=column, column_index=column_index
        )
        suite.add_expectation(column_expectation)

        dtype_expectation = gx.expectations.ExpectColumnValuesToBeOfType(
            column=column, type_=column_type
        )
        suite.add_expectation(dtype_expectation)

    # Create validation definition
    definition_name = f"{table_name}_validation_definition"
    validation_definition = gx.ValidationDefinition(
        data=batch_definition, suite=suite, name=definition_name
    )

    # Run validation
    validation_results = validation_definition.run(batch_parameters=batch_parameters)
    print(f"Validation results for {table_name}:")
    print(validation_results)

Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.DIM_ORDERS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 285.48it/s] 

                       pk_order_sid                   fk_customer_sid  \
0  809a282bbd5dbcabb6f2f724fca862ec  622e13439d6b5a0b486c435618b2679e   
1  bfbd0f9bdef84302105ad712db648a6c  86dc2ffce2dfff336de2f386a786e574   
2  2e7a8482f6fb09756ca50c10d7bfc047  08c5351a6aca1c1589a38f244edeee9d   
3  e5215415bb6f76fe3b7cb68103a0d1c0  b6f6cbfc126f1ae6723fe2f9b3751208   

  fk_order_purchased_date_sid fk_order_approved_at_date_sid  \
0                    20160913                      20161007   
1                    20160915                      20160915   
2                    20160904                      20161007   
3                    20161022                          None   

  fk_order_delivered_carrier_date_sid fk_order_delivered_customer_date_sid  \
0                                None                                 None   
1                            20161107                             20161109   
2                            20161018                                 None   
3     


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 671.16it/s] 

Validation results for olist_brazilian_ecommerce_target.DIM_ORDERS:
{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_ORDERS_data_source-olist_brazilian_ecommerce_target.DIM_ORDERS_asset",
          "column": "fk_customer_sid",
          "column_index": 1
        },
        "meta": {},
        "id": "b6f3589d-a0c2-4f67-bea0-75d0faf3f6e5"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.DIM_ORDERS_data_source-olist_brazilian_ecommerce_target.DIM_ORDERS_asset",
          "column": "fk_customer_s




<div class="alert alert-block alert-info">
GX_FK_SCHEMA_CHECK_FACT_TABLES

In [14]:
context = gx.get_context()

# List of GBQ fact tables and their foreign key columns with dtypes
gbq_tables_with_columns_index_and_types = {
    "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS": [
        {"column": "fk_order_sid", "column_index": 0, "type": "object"},
        {"column": "fk_shipping_limit_date_sid", "column_index": 3, "type": "object"},
    ],
    "olist_brazilian_ecommerce_target.FCT_PAYMENTS": [
        {"column": "fk_order_sid", "column_index": 0, "type": "object"}
    ],
    "olist_brazilian_ecommerce_target.FCT_REVIEWS": [
        {"column": "fk_order_sid", "column_index": 0, "type": "object"},
        {"column": "fk_review_creation_date_sid", "column_index": 3, "type": "object"},
    ],
}

# Iterate over the list of tables and process each one
for table_name, expected_columns in gbq_tables_with_columns_index_and_types.items():
    # Query the table from GBQ
    query = f"SELECT * FROM {table_name}"
    df_table = read_gbq(query, project_id="projectm2-aiess")

    # Generate unique names for data source and asset
    data_source_name = f"{table_name}_data_source"
    asset_name = f"{table_name}_asset"

    # Add data source
    data_source = context.data_sources.add_pandas(name=data_source_name)

    # Add DataFrame asset
    data_asset = data_source.add_dataframe_asset(name=asset_name)

    # Add batch definition
    batch_definition_name = table_name
    batch_definition = data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

    # Get the batch and print the first few rows
    batch_parameters = {"dataframe": df_table}
    batch = batch_definition.get_batch(batch_parameters=batch_parameters)
    print(f"Batch for {table_name}:")
    print(batch.head(4))

    # Create Expectation Suite
    suite_name = f"{table_name}_expectation_suite"
    suite = gx.ExpectationSuite(name=suite_name)
    suite = context.suites.add_or_update(suite)

    # Add expectations for each expected column
    for column_info in expected_columns:
        column = column_info["column"]
        column_index = column_info["column_index"]
        column_type = column_info["type"]

        column_expectation = gx.expectations.ExpectColumnToExist(
            column=column, column_index=column_index
        )
        suite.add_expectation(column_expectation)

        dtype_expectation = gx.expectations.ExpectColumnValuesToBeOfType(
            column=column, type_=column_type
        )
        suite.add_expectation(dtype_expectation)

    # Create validation definition
    definition_name = f"{table_name}_validation_definition"
    validation_definition = gx.ValidationDefinition(
        data=batch_definition, suite=suite, name=definition_name
    )

    # Run validation
    validation_results = validation_definition.run(batch_parameters=batch_parameters)
    print(f"Validation results for {table_name}:")
    print(validation_results)

Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 280.35it/s] 

                       fk_order_sid                       pk_order_id  \
0  7f39ba4c9052be115350065d07583cac  7f39ba4c9052be115350065d07583cac   
1  9dc8d1a6f16f1b89874c29c9d8d30447  9dc8d1a6f16f1b89874c29c9d8d30447   
2  d455a8cb295653b55abda06d434ab492  d455a8cb295653b55abda06d434ab492   
3  5d70582531ed37f284797ba1354e0c50  5d70582531ed37f284797ba1354e0c50   

  pk_order_item_id fk_shipping_limit_date_sid  \
0                1                   20171024   
1                1                   20171018   
2                1                   20171012   
3                1                   20170330   

                         product_id                         seller_id  \
0  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
1  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
2  a2ff5a97bf95719e38ea2e3b4105bce8  0015a82c2db000af6aaaf3ae2ecb0532   
3  08574b074924071f4e201e151b152b4e  001cca7ae9ae17fb1caed9dfb1094831   

        shipping_limit


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 570.45it/s] 


Validation results for olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset",
          "column": "fk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "eace59e2-4518-40e2-970d-321304362e07"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_data_source-olist_brazilian_ecommerce_target.FCT_ORDER_ITEMS_asset",
          "co

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 227.47it/s]


                       fk_order_sid                       pk_order_id  \
0  e6670c9089973cc68e5b90b7d1f4fec4  e6670c9089973cc68e5b90b7d1f4fec4   
1  d022bdad33a575ca5edb8c4d51e44310  d022bdad33a575ca5edb8c4d51e44310   
2  90ea2b1be9c98c67406de151652daa63  90ea2b1be9c98c67406de151652daa63   
3  033e33708df28579692a3d19492c6553  033e33708df28579692a3d19492c6553   

   pk_payment_sequential payment_type  payment_installments  payment_value  \
0                      1  credit_card                     1          88.49   
1                      1       boleto                     1          70.03   
2                      1       boleto                     1          59.22   
3                      1  credit_card                     1         145.57   

                load_date  
0  2025-06-18 03:31:51 AM  
1  2025-06-18 03:31:51 AM  
2  2025-06-18 03:31:51 AM  
3  2025-06-18 03:31:51 AM  


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 547.32it/s] 

Validation results for olist_brazilian_ecommerce_target.FCT_PAYMENTS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_PAYMENTS_data_source-olist_brazilian_ecommerce_target.FCT_PAYMENTS_asset",
          "column": "fk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "2837a2aa-e322-49b8-8f7e-3e0d4613be97"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_PAYMENTS_data_source-olist_brazilian_ecommerce_target.FCT_PAYMENTS_asset",
          "column": "fk_orde




Downloading: 100%|[32m██████████[0m|
Batch for olist_brazilian_ecommerce_target.FCT_REVIEWS:


Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 252.33it/s] 

                       fk_order_sid                       pk_order_id  \
0  809a282bbd5dbcabb6f2f724fca862ec  809a282bbd5dbcabb6f2f724fca862ec   
1  bfbd0f9bdef84302105ad712db648a6c  bfbd0f9bdef84302105ad712db648a6c   
2  e5215415bb6f76fe3b7cb68103a0d1c0  e5215415bb6f76fe3b7cb68103a0d1c0   
3  8beb59392e21af5eb9547ae1a9938d06  8beb59392e21af5eb9547ae1a9938d06   

                       pk_review_id fk_review_creation_date_sid  review_score  \
0  69ac6a27fde9855ebeaaecac0f78058b                    20161002             1   
1  6916ca4502d6d3bfd39818759d55d536                    20161006             1   
2  49f695dffa457eaba90d388a5c37e942                    20161009             1   
3  b28309e8253951a954123e039d3242f7                    20161020             1   

  review_comment_title                             review_comment_message  \
0                       MEU PEDIDO NÃO FOI ENTREGUE E NÃO FOI DADA NEN...   
1                          nao recebi o produto e nem resposta da empresa 


Calculating Metrics: 100%|██████████| 3/3 [00:00<00:00, 671.91it/s] 

Validation results for olist_brazilian_ecommerce_target.FCT_REVIEWS:
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_REVIEWS_data_source-olist_brazilian_ecommerce_target.FCT_REVIEWS_asset",
          "column": "fk_order_sid",
          "column_index": 0
        },
        "meta": {},
        "id": "3a02e39a-bcfb-44a4-ab4e-62bbfc63c5da"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_of_type",
        "kwargs": {
          "batch_id": "olist_brazilian_ecommerce_target.FCT_REVIEWS_data_source-olist_brazilian_ecommerce_target.FCT_REVIEWS_asset",
          "column": "fk_order_sid




In [15]:
# Save full results to file
output_folder = "gx_output"
os.makedirs(output_folder, exist_ok=True)
result_path = os.path.join(output_folder, "gx_results_schema_check.txt")

with open(result_path, "w") as f:
    f.write(pprint.pformat(validation_results))

print(f" Full GX test results saved to {result_path}")

 Full GX test results saved to gx_output/gx_results_schema_check.txt
