Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] ValidationConfigStore #9523

Merged
merged 39 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
44172f8
start impl
cdkini Feb 23, 2024
2b92cd4
update init
cdkini Feb 23, 2024
008bd50
remove type ignore
cdkini Feb 23, 2024
31aafb3
patch tests
cdkini Feb 23, 2024
be70160
patch test
cdkini Feb 26, 2024
3e9a833
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Feb 27, 2024
295eba0
start tests
cdkini Feb 27, 2024
fe3ebfe
more tests
cdkini Feb 27, 2024
ddddc8d
simplify store
cdkini Feb 27, 2024
9fa6eba
get tests passing
cdkini Feb 27, 2024
0997f8b
plug in store to factory
cdkini Feb 27, 2024
d62cd3b
remove debug call
cdkini Feb 27, 2024
8f088d6
remove extraneous init
cdkini Feb 27, 2024
3771806
misc updates
cdkini Feb 27, 2024
764ad93
from __future__
cdkini Feb 27, 2024
537def9
Update great_expectations/data_context/data_context/abstract_data_con…
cdkini Feb 27, 2024
3eba6a4
remove forward refs
cdkini Feb 27, 2024
61a1c23
mypy
cdkini Feb 27, 2024
76700d8
patch spark test
cdkini Feb 27, 2024
dffbbe1
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Feb 28, 2024
b7d1072
add id
cdkini Feb 28, 2024
a216c92
fix unit tests
cdkini Feb 28, 2024
2c766f9
patch cloud test
cdkini Feb 28, 2024
07acaea
update validation_config_store prop
cdkini Feb 28, 2024
f137989
add property
cdkini Feb 28, 2024
8a948fb
misc updates
cdkini Feb 28, 2024
ec7c5f8
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Feb 29, 2024
947d901
add default stores
cdkini Feb 29, 2024
72bdf51
update store backend defaults and tests
cdkini Feb 29, 2024
6882737
mypy
cdkini Feb 29, 2024
deb1076
ephemeral defaults
cdkini Feb 29, 2024
695cc17
update templates
cdkini Feb 29, 2024
182edab
patch s3 tests
cdkini Feb 29, 2024
a65ce0b
patch cloud tests
cdkini Feb 29, 2024
c533188
patch cloud tests
cdkini Feb 29, 2024
f987bef
fix docs tests
cdkini Feb 29, 2024
22e5d55
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Feb 29, 2024
1c256ca
update validator
cdkini Feb 29, 2024
a82f189
update docs tests
cdkini Feb 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 8 additions & 2 deletions great_expectations/core/factory/validation_factory.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from great_expectations._docs_decorators import public_api
from great_expectations.compatibility.typing_extensions import override
from great_expectations.core.factory.factory import Factory
from great_expectations.core.validation_config import ValidationConfig

if TYPE_CHECKING:
from great_expectations.data_context.store.validation_config_store import (
ValidationConfigStore,
)


# TODO: Add analytics as needed
class ValidationFactory(Factory[ValidationConfig]):
def __init__(self, store) -> None:
# TODO: Update type hints when new ValidationConfigStore is implemented
def __init__(self, store: ValidationConfigStore) -> None:
self._store = store

@public_api
Expand Down
30 changes: 21 additions & 9 deletions great_expectations/core/validation_config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from great_expectations._docs_decorators import public_api
from great_expectations.compatibility.pydantic import BaseModel

if TYPE_CHECKING:
from great_expectations.core.batch_config import BatchConfig
from great_expectations.core.expectation_suite import ExpectationSuite

# from great_expectations.datasource.fluent.interfaces import DataAsset
from great_expectations.compatibility.pydantic import BaseModel, validator
from great_expectations.core.batch_config import BatchConfig # noqa: TCH001
from great_expectations.core.expectation_suite import (
ExpectationSuite,
expectationSuiteSchema,
)


class ValidationConfig(BaseModel):
Expand All @@ -23,10 +20,25 @@ class ValidationConfig(BaseModel):

"""

class Config:
arbitrary_types_allowed = True
json_encoders = {
ExpectationSuite: lambda v: expectationSuiteSchema.dump(v),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think we should add a comment on the why here in the code.

}
Comment on lines +26 to +29
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concurrently using Marshmallow and Pydantic results in something like this. We need custom encoders/decoders but this should work as intended


name: str
data: BatchConfig # TODO: Should support a union of Asset | BatchConfig
suite: ExpectationSuite

@validator("suite", pre=True)
def _validate_suite(cls, v):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above - I think it's worth a docstring explaining this is just because the suite isn't pydantic.

if isinstance(v, dict):
return ExpectationSuite(**expectationSuiteSchema.load(v))
return v
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check if v is an expectationSuiteSchema and raise if it's not in the fall through case? Or does pydantic do this for us since it won't be the right type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pydantic validation will happen after this return - I've updated this to be a bit more explicit but it should work as-is


@public_api
def run(self):
raise NotImplementedError


ValidationConfig.update_forward_refs()
cdkini marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions great_expectations/data_context/cloud_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ class GXCloudRESTResource(str, Enum):
PROFILER = "profiler"
RENDERED_DATA_DOC = "rendered_data_doc"
VALIDATION_RESULT = "validation_result"
VALIDATION_CONFIG = "validation_config"
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@
DataDocsSiteConfigTypedDict,
StoreConfigTypedDict,
)
from great_expectations.data_context.store.validation_config_store import (
ValidationConfigStore,
)
from great_expectations.data_context.store.validations_store import ValidationsStore
from great_expectations.data_context.types.resource_identifiers import (
GXCloudIdentifier,
Expand Down Expand Up @@ -331,8 +334,13 @@ def _init_factories(self) -> None:
context=self,
)

# TODO: Update to follow existing pattern once new ValidationConfigStore is implemented
self._validations: ValidationFactory | None = None
if validation_config_store := self.stores.get(
cdkini marked this conversation as resolved.
Show resolved Hide resolved
self.validation_config_store_name
):
self._validations = ValidationFactory(
store=validation_config_store,
)

def _init_analytics(self) -> None:
init_analytics(
Expand Down Expand Up @@ -617,6 +625,13 @@ def validations_store_name(self, value: str) -> None:
def validations_store(self) -> ValidationsStore:
return self.stores[self.validations_store_name]

@property
def validation_config_store_name(self) -> str:
return DataContextConfigDefaults.DEFAULT_VALIDATION_CONFIG_STORE_NAME.value

def validaiton_config_store(self) -> ValidationConfigStore:
cdkini marked this conversation as resolved.
Show resolved Hide resolved
return self.stores[self.validation_config_store_name]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't sure about this but deciding to not make validation_config_store_name a context variable. I've wavered on it but I'm now confident that store naming just causes more harm than benefit.

An argument against this is that it is a deviation from the existing pattern and perhaps consistency is more important?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm usually not pro-deviation, but I think in this case it's probably reasonable. It'd require minimal changes if we changed our mind later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know this key will always be present?
What is the harm you see from making this a context variable? By context variable, do you mean a top level property?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By adding it as a context variable, its another field that a user can modify.

These top level store names are dangerous in that they can enable the following:

context.checkpoint_store_name = "my_nonexistent_store"
...

context.checkpoint_store # Raises an error
context.checkpoints.add(...) # Raises an error
...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We allow customization of the names but don't automatically update the stores dict, resulting in the possibility of orphaning a store by losing its name.

Seems relatively unlikely since we don't advertise this much in docs/public API.

We should always have a ValidationConfigStore but the name may vary.


@property
def checkpoint_store_name(self) -> Optional[str]:
from great_expectations.data_context.store.checkpoint_store import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class SerializableDataContext(AbstractDataContext):
DataContextConfigDefaults.EXPECTATIONS_BASE_DIRECTORY.value,
DataContextConfigDefaults.PLUGINS_BASE_DIRECTORY.value,
DataContextConfigDefaults.PROFILERS_BASE_DIRECTORY.value,
DataContextConfigDefaults.VALIDATION_CONFIGS_BASE_DIRECTORY.value,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updates to enable proper filesystem scaffolding

GX_UNCOMMITTED_DIR,
]
GX_DIR: ClassVar[str] = "gx"
Expand Down
1 change: 1 addition & 0 deletions great_expectations/data_context/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
from .profiler_store import ProfilerStore # isort:skip
from .data_context_store import DataContextStore # isort:skip
from .data_asset_store import DataAssetStore # isort:skip
from .validation_config_store import ValidationConfigStore # isort:skip
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta):
GXCloudRESTResource.PROFILER: "profiler",
GXCloudRESTResource.RENDERED_DATA_DOC: "rendered_data_doc",
GXCloudRESTResource.VALIDATION_RESULT: "result",
GXCloudRESTResource.VALIDATION_CONFIG: "validation_config",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updates to enable proper Cloud integration (purely mocking for now)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like the enum can be modified/restructured if we have to list all these enums and define a new value for them. This makes it appear that our enum/abstraction is wrong.

}

ALLOWED_SET_KWARGS_BY_RESOURCE_TYPE: Dict[GXCloudRESTResource, Set[str]] = {
Expand All @@ -157,6 +158,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta):
GXCloudRESTResource.PROFILER: "profilers",
GXCloudRESTResource.RENDERED_DATA_DOC: "rendered_data_docs",
GXCloudRESTResource.VALIDATION_RESULT: "validation_results",
GXCloudRESTResource.VALIDATION_CONFIG: "validation_configs",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly here.

}
)

Expand Down Expand Up @@ -295,6 +297,8 @@ def _put(self, id: str, value: Any) -> GXCloudResourceRef | bool:
# resource for others. Since we route all update calls through this single
# method, we need to handle both cases.

print("CHETAN", value, type(value.get("suite")))

resource_type = self.ge_cloud_resource_type
organization_id = self.ge_cloud_credentials["organization_id"]
attributes_key = self.PAYLOAD_ATTRIBUTES_KEYS[resource_type]
Expand Down
51 changes: 51 additions & 0 deletions great_expectations/data_context/store/validation_config_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

from great_expectations.compatibility.typing_extensions import override
from great_expectations.core.data_context_key import StringKey
from great_expectations.core.validation_config import ValidationConfig
from great_expectations.data_context.cloud_constants import GXCloudRESTResource
from great_expectations.data_context.store.store import Store
from great_expectations.data_context.types.resource_identifiers import (
GXCloudIdentifier,
)


class ValidationConfigStore(Store):
_key_class = StringKey

def __init__(
self,
store_name: str,
store_backend: dict | None = None,
runtime_environment: dict | None = None,
) -> None:
super().__init__(
store_name=store_name,
store_backend=store_backend,
runtime_environment=runtime_environment,
)

def get_key(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementing the bare minimum required in the factory. Tests should reflect this as well

self, name: str, id: str | None = None
) -> GXCloudIdentifier | StringKey:
"""Given a name and optional ID, build the correct key for use in the ValidationConfigStore."""
if self.cloud_mode:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR but we need to get away from these cloud mode checks.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. It's poorly encapsulated but I couldn't think of a better way to get both file and cloud working

return GXCloudIdentifier(
resource_type=GXCloudRESTResource.VALIDATION_CONFIG,
id=id,
resource_name=name,
)
return StringKey(key=name)

@override
def serialize(self, value):
if self.cloud_mode:
data = value.dict()
data["suite"] = data["suite"].to_json_dict()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another necessity due to Marshmallow

return data

return value.json()

@override
def deserialize(self, value):
return ValidationConfig.parse_raw(value)
13 changes: 13 additions & 0 deletions great_expectations/data_context/types/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,6 +1624,12 @@ class DataContextConfigDefaults(enum.Enum):
DEFAULT_VALIDATIONS_STORE_BASE_DIRECTORY_RELATIVE_NAME = (
f"{UNCOMMITTED}/{VALIDATIONS_BASE_DIRECTORY}/"
)
DEFAULT_VALIDATION_CONFIG_STORE_NAME = "validation_config_store"
VALIDATION_CONFIGS_BASE_DIRECTORY = "validation_configs"
DEFAULT_VALIDATION_CONFIG_STORE_BASE_DIRECTORY_RELATIVE_NAME = (
f"{VALIDATION_CONFIGS_BASE_DIRECTORY}/"
)

DEFAULT_EVALUATION_PARAMETER_STORE_NAME = "evaluation_parameter_store"
DEFAULT_EVALUATION_PARAMETER_STORE_BASE_DIRECTORY_RELATIVE_NAME = (
"evaluation_parameters/"
Expand Down Expand Up @@ -1690,6 +1696,13 @@ class DataContextConfigDefaults(enum.Enum):
"base_directory": DEFAULT_VALIDATIONS_STORE_BASE_DIRECTORY_RELATIVE_NAME,
},
},
DEFAULT_VALIDATION_CONFIG_STORE_NAME: {
"class_name": "ValidationConfigStore",
"store_backend": {
"class_name": "TupleFilesystemStoreBackend",
"base_directory": DEFAULT_VALIDATION_CONFIG_STORE_BASE_DIRECTORY_RELATIVE_NAME,
},
},
DEFAULT_EVALUATION_PARAMETER_STORE_NAME: {
"class_name": "EvaluationParameterStore"
},
Expand Down
1 change: 1 addition & 0 deletions tests/data_context/migrator/test_file_migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def test_migrate_scaffolds_filesystem(
DataContextConfigDefaults.PLUGINS_BASE_DIRECTORY.value,
DataContextConfigDefaults.PROFILERS_BASE_DIRECTORY.value,
DataContextConfigDefaults.UNCOMMITTED.value,
DataContextConfigDefaults.VALIDATION_CONFIGS_BASE_DIRECTORY.value,
]


Expand Down
128 changes: 128 additions & 0 deletions tests/data_context/store/test_validation_config_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from unittest import mock

import pytest

from great_expectations.core.batch_config import BatchConfig
from great_expectations.core.data_context_key import StringKey
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.core.validation_config import ValidationConfig
from great_expectations.data_context.cloud_constants import GXCloudRESTResource
from great_expectations.data_context.store import ValidationConfigStore
from great_expectations.data_context.types.resource_identifiers import GXCloudIdentifier
from tests.datasource.fluent._fake_cloud_api import CloudDetails


@pytest.fixture
def ephemeral_store():
return ValidationConfigStore(store_name="ephemeral_validation_config_store")


@pytest.fixture
def file_backed_store(tmp_path):
base_directory = tmp_path / "base_dir"
return ValidationConfigStore(
store_name="file_backed_validation_config_store",
store_backend={
"class_name": "TupleFilesystemStoreBackend",
"base_directory": base_directory,
},
)


@pytest.fixture
def cloud_backed_store(cloud_details: CloudDetails):
return ValidationConfigStore(
store_name="cloud_backed_validation_config_store",
store_backend={
"class_name": "GXCloudStoreBackend",
"ge_cloud_resource_type": GXCloudRESTResource.VALIDATION_CONFIG,
"ge_cloud_credentials": {
"access_token": cloud_details.access_token,
"organization_id": cloud_details.org_id,
},
},
)


@pytest.fixture
def validation_config() -> ValidationConfig:
return ValidationConfig(
name="my_validation",
data=BatchConfig(name="my_batch_config"),
suite=ExpectationSuite(name="my_suite"),
)


@pytest.mark.parametrize("store_fixture", ["ephemeral_store", "file_backed_store"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why use strings for the stores and then call getfixturevalue? Can't you just reference the fixture directly?

Copy link
Member

@Kilo59 Kilo59 Feb 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the fixture won't be properly instantiated unless you do it this way.
Fixtures only get loaded by pytest if you include them in the test constructor.
Although this is a common complaint because it prevents you from using fixtures as part of parametrize.

There are packages out there like pytest-lazy-fixture that essentially do this work for you... but you still have to reference it by the string.
https://github.com/tvorog/pytest-lazy-fixture

@pytest.mark.unit
def test_add(request, store_fixture: str, validation_config: ValidationConfig):
Copy link
Member

@Kilo59 Kilo59 Feb 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def test_add(request, store_fixture: str, validation_config: ValidationConfig):
def test_add(request: pytest.FixtureRequest, store_fixture: str, validation_config: ValidationConfig):

Annoting request will developers understand what's going on.

https://docs.pytest.org/en/stable/reference/reference.html#request

store: ValidationConfigStore = request.getfixturevalue(store_fixture)

key = StringKey(key="my_validation")
store.add(key=key, value=validation_config)

assert store.get(key) == validation_config


@pytest.mark.cloud
def test_add_cloud(
cloud_backed_store: ValidationConfigStore, validation_config: ValidationConfig
):
store = cloud_backed_store

id = "5a8ada9f-5b71-461b-b1af-f1d93602a156"
name = "my_validation"
key = GXCloudIdentifier(
resource_type=GXCloudRESTResource.VALIDATION_CONFIG,
id=id,
resource_name=name,
)

with mock.patch("requests.Session.put", autospec=True) as mock_put:
store.add(key=key, value=validation_config)

mock_put.assert_called_once_with(
mock.ANY,
f"https://api.greatexpectations.io/organizations/12345678-1234-5678-1234-567812345678/validation-configs/{id}",
json={
"data": {
"type": "validation_config",
"id": id,
"attributes": {
"organization_id": "12345678-1234-5678-1234-567812345678",
"validation_config": {
"name": name,
"data": {
"id": None,
"name": "my_batch_config",
"partitioner": None,
},
"suite": {
"expectation_suite_name": "my_suite",
"id": None,
"expectations": [],
"data_asset_type": None,
"meta": mock.ANY,
"notes": None,
},
},
},
}
},
)


@pytest.mark.parametrize("store_fixture", ["ephemeral_store", "file_backed_store"])
@pytest.mark.unit
def test_get_key(request, store_fixture: str):
store: ValidationConfigStore = request.getfixturevalue(store_fixture)

name = "my_validation"
assert store.get_key(name=name) == StringKey(key=name)


@pytest.mark.cloud
def test_get_key_cloud(cloud_backed_store: ValidationConfigStore):
key = cloud_backed_store.get_key(name="my_validation")
assert key.resource_type == GXCloudRESTResource.VALIDATION_CONFIG
assert key.resource_name == "my_validation"
3 changes: 3 additions & 0 deletions tests/data_context/test_data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ def test_data_context_create_makes_uncommitted_dirs_when_all_are_missing(
data_docs/
validations/
.ge_store_backend_id
validation_configs/
"""
)

Expand Down Expand Up @@ -809,6 +810,7 @@ def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist(
data_docs/
validations/
.ge_store_backend_id
validation_configs/
"""
project_path = str(tmp_path_factory.mktemp("stuff"))
ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118
Expand Down Expand Up @@ -910,6 +912,7 @@ def test_scaffold_directories(tmp_path_factory):
"expectations",
".gitignore",
"uncommitted",
"validation_configs",
}
assert set(
os.listdir(os.path.join(empty_directory, "uncommitted")) # noqa: PTH118
Expand Down