# Great Expectations Data Quality Validation

This notebook demonstrates how to use Great Expectations to validate data quality for CSV files.

In [4]:
import great_expectations as gx
from great_expectations.core.batch import RuntimeBatchRequest
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.data_context import EphemeralDataContext
from great_expectations.data_context.types.base import (
    DataContextConfig,
    InMemoryStoreBackendDefaults,
)

# Create an in-memory store backend
store_backend_defaults = InMemoryStoreBackendDefaults()

# Create a data context config
data_context_config = DataContextConfig(
    store_backend_defaults=store_backend_defaults,
    checkpoint_store_name=store_backend_defaults.checkpoint_store_name,
    expectations_store_name=store_backend_defaults.expectations_store_name,
    validations_store_name=store_backend_defaults.validations_store_name,
    evaluation_parameter_store_name=store_backend_defaults.evaluation_parameter_store_name,
)

# Create the ephemeral data context
context = EphemeralDataContext(
    project_config=data_context_config,
)

# Create a simple expectation suite
expectation_suite_name = "my_suite"
context.create_expectation_suite(
    expectation_suite_name=expectation_suite_name,
    overwrite_existing=True
)

print("Great Expectations context created successfully!")

AttributeError: 'InMemoryStoreBackendDefaults' object has no attribute 'validations_store_name'

## Initialize Great Expectations Context

In [2]:
# Initialize a project
context = gx.get_context()

## Configure Datasource

We'll create a datasource that points to our CSV files in the dummy_data directory.

In [3]:
# Create a new Datasource using the fluent API
datasource = context.get_datasource("my_pandas_datasource") or context.sources.add_pandas(
    name="my_pandas_datasource"
)

# Add CSV assets using the fluent API
csv_asset = datasource.add_csv_asset(
    name="my_csv_files",
    filepath_or_buffer="dummy_data/*.csv"
)

print("Data source configured successfully!")



ValueError: "Could not find a datasource named 'my_pandas_datasource'"

## List Available Data Assets

In [None]:
# List all available data assets
context.list_datasources()