In [20]:
# STEP 1: Install Great Expectations if not already installed
!pip install great_expectations

# STEP 2: Required imports
import os
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint
from great_expectations.core.batch import RuntimeBatchRequest

# STEP 3: Initialize file-based context
context = gx.get_context(mode="file")

# STEP 4: Add a Pandas datasource with runtime data connector
datasource_name = "my_csv_datasource"
if datasource_name not in [ds["name"] for ds in context.list_datasources()]:
    context.add_datasource(
        name=datasource_name,
        class_name="Datasource",
        execution_engine={"class_name": "PandasExecutionEngine"},
        data_connectors={
            "default_runtime_data_connector_name": {
                "class_name": "RuntimeDataConnector",
                "batch_identifiers": ["default_identifier_name"]
            }
        }
    )

# STEP 5: Create Expectation Suite
suite_name = "expect_no_missing_values"
if suite_name not in [s.expectation_suite_name for s in context.list_expectation_suites()]:
    context.add_expectation_suite(expectation_suite_name=suite_name)

# STEP 6: Set path to the CSV
csv_path = "data/score_data.csv"  # Ensure this path exists and file is present

# STEP 7: Create RuntimeBatchRequest
batch_request = RuntimeBatchRequest(
    datasource_name=datasource_name,
    data_connector_name="default_runtime_data_connector_name",
    data_asset_name="score_data",  # this can be any name
    runtime_parameters={"path": csv_path},
    batch_identifiers={"default_identifier_name": "default_id"}
)

# STEP 8: Create Validator and Expectation
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=suite_name
)

# Add expectation to check for missing values
validator.expect_column_values_to_not_be_null("score")

# Save expectation suite
validator.save_expectation_suite()

# STEP 9: Validate data
checkpoint_result = validator.validate()
print(checkpoint_result)


Defaulting to user installation because normal site-packages is not writeable


DataContextError: Datasource is not a FluentDatasource

In [None]:
import pandas as pd

# Create a sample DataFrame
data = {
    'id': [1, 2, 3, 4, 5],
    'score': [85, None, 90, 88, None]  # Missing values in 'score'
}

df = pd.DataFrame(data)

# Save DataFrame to a CSV file
df.to_csv('score_data.csv', index=False)

print("CSV file 'score_data.csv' created successfully.")


CSV file 'score_data.csv' created successfully.
