Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Rename BatchConfig to BatchDefinition #9645

Merged
merged 10 commits into from
Mar 25, 2024
2 changes: 1 addition & 1 deletion docs/sphinx_api_docs_source/public_api_excludes.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@
),
IncludeExcludeDefinition(
reason="Not yet part of the public API, under active development",
name="BatchConfig",
name="BatchDefinition",
filepath=pathlib.Path("great_expectations/core/batch_config.py"),
),
IncludeExcludeDefinition(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from great_expectations.datasource.fluent.interfaces import DataAsset


class BatchConfig(pydantic.BaseModel):
class BatchDefinition(pydantic.BaseModel):
"""Configuration for a batch of data.

References the DataAsset to be used, and any additional parameters needed to fetch the data.
Expand Down Expand Up @@ -47,7 +47,7 @@ def build_batch_request(
)

def save(self) -> None:
self.data_asset._save_batch_config(self)
self.data_asset._save_batch_definition(self)

def identifier_bundle(self) -> _EncodedValidationData:
# Utilized as a custom json_encoder
Expand Down
20 changes: 10 additions & 10 deletions great_expectations/core/validation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
ValidationError,
validator,
)
from great_expectations.core.batch_config import BatchConfig
from great_expectations.core.batch_definition import BatchDefinition
from great_expectations.core.expectation_suite import (
ExpectationSuite,
expectationSuiteSchema,
Expand Down Expand Up @@ -87,11 +87,11 @@ class Config:
""" # noqa: E501
json_encoders = {
ExpectationSuite: lambda e: e.identifier_bundle(),
BatchConfig: lambda b: b.identifier_bundle(),
BatchDefinition: lambda b: b.identifier_bundle(),
}

name: str = Field(..., allow_mutation=False)
data: BatchConfig = Field(..., allow_mutation=False)
data: BatchDefinition = Field(..., allow_mutation=False)
suite: ExpectationSuite = Field(..., allow_mutation=False)
id: Union[str, None] = None
_validation_results_store: ValidationsStore = PrivateAttr()
Expand All @@ -103,7 +103,7 @@ def __init__(self, **data: Any):
self._validation_results_store = project_manager.get_validations_store()

@property
def batch_definition(self) -> BatchConfig:
def batch_definition(self) -> BatchDefinition:
return self.data

@property
Expand All @@ -126,14 +126,14 @@ def _validate_suite(cls, v: dict | ExpectationSuite):
)

@validator("data", pre=True)
def _validate_data(cls, v: dict | BatchConfig):
def _validate_data(cls, v: dict | BatchDefinition):
# Input will be a dict of identifiers if being deserialized or a rich type if being constructed by a user. # noqa: E501
if isinstance(v, dict):
return cls._decode_data(v)
elif isinstance(v, BatchConfig):
elif isinstance(v, BatchDefinition):
return v
raise ValueError(
"Data must be a dictionary (if being deserialized) or a BatchConfig object."
"Data must be a dictionary (if being deserialized) or a BatchDefinition object."
)

@classmethod
Expand All @@ -158,7 +158,7 @@ def _decode_suite(cls, suite_dict: dict) -> ExpectationSuite:
return ExpectationSuite(**expectationSuiteSchema.load(config))

@classmethod
def _decode_data(cls, data_dict: dict) -> BatchConfig:
def _decode_data(cls, data_dict: dict) -> BatchDefinition:
# Take in raw JSON, ensure it contains appropriate identifiers, and use them to retrieve the actual data. # noqa: E501
try:
data_identifiers = _EncodedValidationData.parse_obj(data_dict)
Expand Down Expand Up @@ -187,7 +187,7 @@ def _decode_data(cls, data_dict: dict) -> BatchConfig:
) from e

try:
batch_definition = asset.get_batch_config(batch_definition_name)
batch_definition = asset.get_batch_definition(batch_definition_name)
except KeyError as e:
raise ValueError(
f"Could not find batch definition named '{batch_definition_name}' within '{asset_name}' asset and '{ds_name}' datasource." # noqa: E501
Expand All @@ -204,7 +204,7 @@ def run(
result_format: ResultFormat = ResultFormat.SUMMARY,
) -> ExpectationSuiteValidationResult:
validator = Validator(
batch_config=self.batch_definition,
batch_definition=self.batch_definition,
batch_request_options=batch_parameters,
result_format=result_format,
)
Expand Down
6 changes: 3 additions & 3 deletions great_expectations/datasource/fluent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
)

# Now that DataAsset has both been defined, we need to
# provide it to the BatchConfig pydantic model.
from great_expectations.core.batch_config import BatchConfig
# provide it to the BatchDefinition pydantic model.
from great_expectations.core.batch_definition import BatchDefinition

BatchConfig.update_forward_refs(DataAsset=DataAsset)
BatchDefinition.update_forward_refs(DataAsset=DataAsset)


from great_expectations.datasource.fluent.batch_request import (
Expand Down
46 changes: 23 additions & 23 deletions great_expectations/datasource/fluent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from great_expectations.compatibility.typing_extensions import override
from great_expectations.datasource.fluent.constants import (
_ASSETS_KEY,
_BATCH_CONFIG_NAME_KEY,
_BATCH_CONFIGS_KEY,
_BATCH_DEFINITION_NAME_KEY,
_BATCH_DEFINITIONS_KEY,
_DATA_ASSET_NAME_KEY,
_DATASOURCE_NAME_KEY,
_FLUENT_DATASOURCES_KEY,
Expand Down Expand Up @@ -95,8 +95,8 @@ class GxConfig(FluentBaseModel):
_DATA_ASSET_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501
}

_EXCLUDE_FROM_BATCH_CONFIG_SERIALIZATION: ClassVar[Set[str]] = {
_BATCH_CONFIG_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501
_EXCLUDE_FROM_BATCH_DEFINITION_SERIALIZATION: ClassVar[Set[str]] = {
_BATCH_DEFINITION_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501
}

class Config:
Expand Down Expand Up @@ -358,15 +358,15 @@ def _exclude_name_fields_from_fluent_datasources(
for data_asset_config in data_assets
}
for data_asset in data_assets_config_as_dict.values():
if _BATCH_CONFIGS_KEY in data_asset:
data_asset[_BATCH_CONFIGS_KEY] = {
batch_config[
_BATCH_CONFIG_NAME_KEY
if _BATCH_DEFINITIONS_KEY in data_asset:
data_asset[_BATCH_DEFINITIONS_KEY] = {
batch_definition[
_BATCH_DEFINITION_NAME_KEY
]: _exclude_fields_from_serialization(
source_dict=batch_config,
exclusions=self._EXCLUDE_FROM_BATCH_CONFIG_SERIALIZATION,
source_dict=batch_definition,
exclusions=self._EXCLUDE_FROM_BATCH_DEFINITION_SERIALIZATION,
)
for batch_config in data_asset[_BATCH_CONFIGS_KEY]
for batch_definition in data_asset[_BATCH_DEFINITIONS_KEY]
}
datasource_config["assets"] = data_assets_config_as_dict

Expand Down Expand Up @@ -406,13 +406,13 @@ def _convert_fluent_datasources_loaded_from_yaml_to_internal_object_representati
data_asset_config: dict
for data_asset_name, data_asset_config in data_assets.items():
data_asset_config[_DATA_ASSET_NAME_KEY] = data_asset_name
if _BATCH_CONFIGS_KEY in data_asset_config:
batch_config_list = (
_convert_batch_configs_from_yaml_to_internal_object_representation(
data_asset_config[_BATCH_CONFIGS_KEY]
if _BATCH_DEFINITIONS_KEY in data_asset_config:
batch_definition_list = (
_convert_batch_definitions_from_yaml_to_internal_object_representation(
data_asset_config[_BATCH_DEFINITIONS_KEY]
)
)
data_asset_config[_BATCH_CONFIGS_KEY] = batch_config_list
data_asset_config[_BATCH_DEFINITIONS_KEY] = batch_definition_list

datasource_config[_ASSETS_KEY] = list(data_assets.values())

Expand All @@ -423,12 +423,12 @@ def _convert_fluent_datasources_loaded_from_yaml_to_internal_object_representati
return config


def _convert_batch_configs_from_yaml_to_internal_object_representation(
batch_configs: Dict[str, Dict],
def _convert_batch_definitions_from_yaml_to_internal_object_representation(
batch_definitions: Dict[str, Dict],
) -> List[Dict]:
for (
batch_config_name,
batch_config,
) in batch_configs.items():
batch_config[_BATCH_CONFIG_NAME_KEY] = batch_config_name
return list(batch_configs.values())
batch_definition_name,
batch_definition,
) in batch_definitions.items():
batch_definition[_BATCH_DEFINITION_NAME_KEY] = batch_definition_name
return list(batch_definitions.values())
4 changes: 2 additions & 2 deletions great_expectations/datasource/fluent/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
_DATASOURCE_NAME_KEY: Final[str] = "name"
_ASSETS_KEY: Final[str] = "assets"
_DATA_ASSET_NAME_KEY: Final[str] = "name"
_BATCH_CONFIGS_KEY: Final[str] = "batch_configs"
_BATCH_CONFIG_NAME_KEY: Final[str] = "name"
_BATCH_DEFINITIONS_KEY: Final[str] = "batch_definitions"
_BATCH_DEFINITION_NAME_KEY: Final[str] = "name"

_DATA_CONNECTOR_NAME: Final[str] = "fluent"

Expand Down