In [None]:
import great_expectations as gx
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import connectorx as cx

print("GE version:", gx.__version__)

context = gx.get_context() 
print(f"Context type: {type(context).__name__}")

In [None]:
datasource = context.data_sources.add_pandas(name="ashrae_pandas_ds")

In [None]:
asset = datasource.add_dataframe_asset(name="my_runtime_asset")

In [None]:
import pandas as pd
df = pd.DataFrame({"a": [1, 2, 3, None]})


In [None]:
batch_definition = asset.add_batch_definition_whole_dataframe("my_batch_definition")
print(f"Batch definition type: {type(batch_definition).__name__}")

In [None]:
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})

In [None]:
validator = context.get_validator(batch=batch)

In [None]:
validator.expect_column_values_to_not_be_null("a")
print(validator.validate())

In [None]:
new_suite = gx.ExpectationSuite(name="ashrae_quality_suite")

In [None]:
suite = context.suites.add(new_suite)

In [None]:
validator.expectation_suite = suite

In [None]:
validator.expect_column_values_to_not_be_null("a")

In [None]:
import pandas as pd
import numpy as np

# 1. Create a new DataFrame with a null (None or np.nan)
df_with_nulls = pd.DataFrame({"a": [1, None, 3]})

# 2. Get a NEW batch using this new DataFrame
# (Since the data changed, we need a new batch object)
new_batch = batch_definition.get_batch(batch_parameters={"dataframe": df_with_nulls})

# 3. Get a validator for this batch
validator = context.get_validator(batch=new_batch)

# 4. Run the expectation
result = validator.expect_column_values_to_not_be_null("a")

# 5. Look at the results
print(f"Validation Success: {result.success}")
print(f"Unexpected Count: {result.result['unexpected_count']}")
print(f"Unexpected Values: {result.result['partial_unexpected_list']}")

In [None]:
import pandas as pd
import numpy as np
import great_expectations as gx

# 1. SETUP
context = gx.get_context()

# 2. DATA PREPARATION
df = pd.DataFrame({
    "id": [101, 102, 103, 104],
    "score": [85.0, 92.0, np.nan, 88.0]
})

# 3. INFRASTRUCTURE (Robust checks for existing objects)
datasource_name = "production_pandas_datasource"
datasource = context.data_sources.add_or_update_pandas(name=datasource_name)

asset_name = "user_metrics_asset"
try:
    asset = datasource.get_asset(asset_name)
except LookupError:
    asset = datasource.add_dataframe_asset(name=asset_name)

batch_definition_name = "whole_dataframe_definition"
try:
    batch_definition = asset.get_batch_definition(batch_definition_name)
except LookupError:
    batch_definition = asset.add_batch_definition_whole_dataframe(batch_definition_name)

# 4. SUITE: Create or get the suite
suite_name = "user_metrics_suite_2.0"
suite = context.suites.add_or_update(gx.ExpectationSuite(name=suite_name))

# 5. VALIDATION: Get the Validator
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})
validator = context.get_validator(batch=batch, expectation_suite=suite)

# --- Define Expectations ---
validator.expect_column_values_to_not_be_null(column="id")
validator.expect_column_values_to_not_be_null(column="score")
validator.expect_column_values_to_be_between(column="score", min_value=0, max_value=100)

# 6. EXECUTION: Run validation
validation_result = validator.validate()

# FIX: Instead of validator.save_expectation_suite(), use the context factory directly
# This pulls the expectations we just ran and saves/updates them in the context.
context.suites.add_or_update(validator.get_expectation_suite())

# 7. REPORTING
print(f"\nValidation Report: {suite_name}")
print(f"Status: {'‚úÖ SUCCESS' if validation_result.success else '‚ùå FAILED'}")
print("-" * 30)

for result in validation_result.results:
    res_status = "Pass" if result.success else "Fail"
    col = result.expectation_config.kwargs.get('column')
    print(f"[{res_status}] {col}: {result.expectation_config.type}")

In [1]:
import pandas as pd
import numpy as np
import great_expectations as gx
import datetime

# ==========================================
# 1. SETUP: CONTEXT & DATA
# ==========================================
context = gx.get_context()

# Create example data: 'id' is clean, 'score' has a null, 'status' has an invalid value
df = pd.DataFrame({
    "id": [101, 102, 103, 104, 105],
    "score": [85.0, 92.0, np.nan, 88.0, 76.0],
    "status": ["active", "active", "pending", "inactive", "unknown"]
})

# ==========================================
# 2. INFRASTRUCTURE: DATASOURCE & ASSET
# ==========================================
datasource_name = "production_datasource"
datasource = context.data_sources.add_or_update_pandas(name=datasource_name)

asset_name = "user_metrics_asset"
try:
    asset = datasource.get_asset(asset_name)
except LookupError:
    asset = datasource.add_dataframe_asset(name=asset_name)

batch_def_name = "full_dataframe_batch"
try:
    batch_definition = asset.get_batch_definition(batch_def_name)
except LookupError:
    batch_definition = asset.add_batch_definition_whole_dataframe(batch_def_name)

# ==========================================
# 3. SUITE & VALIDATOR
# ==========================================
suite_name = "user_data_quality_suite"
# Create or Update the suite
suite = context.suites.add_or_update(gx.ExpectationSuite(name=suite_name))

# Get the batch of data
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})

# Initialize Validator
validator = context.get_validator(batch=batch, expectation_suite=suite)

# ==========================================
# 4. DEFINE RULES (EXPECTATIONS)
# ==========================================

# Rule 1: 'id' must exist (PASS)
validator.expect_column_values_to_not_be_null(column="id")

# Rule 2: 'score' must exist (FAIL - because of np.nan)
validator.expect_column_values_to_not_be_null(column="score")

# Rule 3: 'score' must be between 0-100 (PASS)
validator.expect_column_values_to_be_between(column="score", min_value=0, max_value=100)

# Rule 4: 'status' must be in a specific list (FAIL - "unknown" is not in list)
validator.expect_column_values_to_be_in_set(
    column="status", 
    value_set=["active", "pending", "inactive"]
)

# ==========================================
# 5. PERSISTENCE (SAVE RULES)
# ==========================================
# Manual save to avoid "Already Exists" error in GX 1.x
context.suites.add_or_update(validator.get_expectation_suite())

# ==========================================
# 6. EXECUTION & DETAILED REPORTING
# ==========================================
validation_result = validator.validate()

def print_detailed_report(result, name):
    stats = result.statistics
    print("="*80)
    print(f"üìä DETAILED VALIDATION REPORT: {name}")
    print(f"Executed at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("="*80)
    
    # Summary Table
    print(f"{'OVERALL STATUS':<20}: {'‚úÖ PASSED' if result.success else '‚ùå FAILED'}")
    print(f"{'SUCCESS RATE':<20}: {stats['success_percent']:.2f}%")
    print(f"{'EXPECTATIONS':<20}: {stats['evaluated_expectations']} total | {stats['successful_expectations']} passed | {stats['unsuccessful_expectations']} failed")
    print("-" * 80)

    # Breakdown Table Header
    print(f"{'COLUMN':<15} | {'EXPECTATION':<35} | {'STATUS'}")
    print("-" * 80)
    
    for res in result.results:
        col = res.expectation_config.kwargs.get("column", "Table")
        etype = res.expectation_config.type
        status = "‚úÖ PASS" if res.success else "‚ùå FAIL"
        
        print(f"{str(col):<15} | {etype:<35} | {status}")
        
        # detailed failure analysis
        if not res.success:
            details = res.result
            count = details.get('unexpected_count', 0)
            total = details.get('element_count', 0)
            pct = details.get('unexpected_percent', 0)
            bad_values = details.get('partial_unexpected_list', [])
            
            print(f"   ‚îî‚îÄ ‚ö†Ô∏è  FAILURE DETAIL: {count}/{total} values failed ({pct:.1f}%)")
            if bad_values:
                print(f"   ‚îî‚îÄ üîç SAMPLE BAD DATA: {bad_values}")
            print("")

    print("="*80)

# Print the custom console report
print_detailed_report(validation_result, suite_name)

# ==========================================
# 7. VISUAL DATA DOCS (HTML)
# ==========================================
# This creates a professional website with graphs and details
context.build_data_docs()
# Uncomment the line below to automatically open the report in your browser
# context.open_data_docs()

print("\n[INFO] Data Docs generated. Use 'context.open_data_docs()' to view the visual report.")



Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]



Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/20 [00:00<?, ?it/s]

üìä DETAILED VALIDATION REPORT: user_data_quality_suite
Executed at: 2025-12-29 12:29:59
OVERALL STATUS      : ‚ùå FAILED
SUCCESS RATE        : 50.00%
EXPECTATIONS        : 4 total | 2 passed | 2 failed
--------------------------------------------------------------------------------
COLUMN          | EXPECTATION                         | STATUS
--------------------------------------------------------------------------------
id              | expect_column_values_to_not_be_null | ‚úÖ PASS
score           | expect_column_values_to_not_be_null | ‚ùå FAIL
   ‚îî‚îÄ ‚ö†Ô∏è  FAILURE DETAIL: 1/5 values failed (20.0%)
   ‚îî‚îÄ üîç SAMPLE BAD DATA: [nan]

score           | expect_column_values_to_be_between  | ‚úÖ PASS
status          | expect_column_values_to_be_in_set   | ‚ùå FAIL
   ‚îî‚îÄ ‚ö†Ô∏è  FAILURE DETAIL: 1/5 values failed (20.0%)
   ‚îî‚îÄ üîç SAMPLE BAD DATA: ['unknown']


[INFO] Data Docs generated. Use 'context.open_data_docs()' to view the visual report.
