-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEATURE] ValidationConfigStore
#9523
Changes from 11 commits
44172f8
2b92cd4
008bd50
31aafb3
be70160
3e9a833
295eba0
fe3ebfe
ddddc8d
9fa6eba
0997f8b
d62cd3b
8f088d6
3771806
764ad93
537def9
3eba6a4
61a1c23
76700d8
dffbbe1
b7d1072
a216c92
2c766f9
07acaea
f137989
8a948fb
ec7c5f8
947d901
72bdf51
6882737
deb1076
695cc17
182edab
a65ce0b
c533188
f987bef
22e5d55
1c256ca
a82f189
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,12 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
from great_expectations._docs_decorators import public_api | ||
from great_expectations.compatibility.pydantic import BaseModel | ||
|
||
if TYPE_CHECKING: | ||
from great_expectations.core.batch_config import BatchConfig | ||
from great_expectations.core.expectation_suite import ExpectationSuite | ||
|
||
# from great_expectations.datasource.fluent.interfaces import DataAsset | ||
from great_expectations.compatibility.pydantic import BaseModel, validator | ||
from great_expectations.core.batch_config import BatchConfig # noqa: TCH001 | ||
from great_expectations.core.expectation_suite import ( | ||
ExpectationSuite, | ||
expectationSuiteSchema, | ||
) | ||
|
||
|
||
class ValidationConfig(BaseModel): | ||
|
@@ -23,10 +20,25 @@ class ValidationConfig(BaseModel): | |
|
||
""" | ||
|
||
class Config: | ||
arbitrary_types_allowed = True | ||
json_encoders = { | ||
ExpectationSuite: lambda v: expectationSuiteSchema.dump(v), | ||
} | ||
Comment on lines
+26
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Concurrently using Marshmallow and Pydantic results in something like this. We need custom encoders/decoders but this should work as intended |
||
|
||
name: str | ||
data: BatchConfig # TODO: Should support a union of Asset | BatchConfig | ||
suite: ExpectationSuite | ||
|
||
@validator("suite", pre=True) | ||
def _validate_suite(cls, v): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above - I think it's worth a docstring explaining this is just because the suite isn't pydantic. |
||
if isinstance(v, dict): | ||
return ExpectationSuite(**expectationSuiteSchema.load(v)) | ||
return v | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check if v is an expectationSuiteSchema and raise if it's not in the fall through case? Or does pydantic do this for us since it won't be the right type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pydantic validation will happen after this return - I've updated this to be a bit more explicit but it should work as-is |
||
|
||
@public_api | ||
def run(self): | ||
raise NotImplementedError | ||
|
||
|
||
ValidationConfig.update_forward_refs() | ||
cdkini marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -148,6 +148,9 @@ | |
DataDocsSiteConfigTypedDict, | ||
StoreConfigTypedDict, | ||
) | ||
from great_expectations.data_context.store.validation_config_store import ( | ||
ValidationConfigStore, | ||
) | ||
from great_expectations.data_context.store.validations_store import ValidationsStore | ||
from great_expectations.data_context.types.resource_identifiers import ( | ||
GXCloudIdentifier, | ||
|
@@ -331,8 +334,13 @@ def _init_factories(self) -> None: | |
context=self, | ||
) | ||
|
||
# TODO: Update to follow existing pattern once new ValidationConfigStore is implemented | ||
self._validations: ValidationFactory | None = None | ||
if validation_config_store := self.stores.get( | ||
cdkini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.validation_config_store_name | ||
): | ||
self._validations = ValidationFactory( | ||
store=validation_config_store, | ||
) | ||
|
||
def _init_analytics(self) -> None: | ||
init_analytics( | ||
|
@@ -617,6 +625,13 @@ def validations_store_name(self, value: str) -> None: | |
def validations_store(self) -> ValidationsStore: | ||
return self.stores[self.validations_store_name] | ||
|
||
@property | ||
def validation_config_store_name(self) -> str: | ||
return DataContextConfigDefaults.DEFAULT_VALIDATION_CONFIG_STORE_NAME.value | ||
|
||
def validaiton_config_store(self) -> ValidationConfigStore: | ||
cdkini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self.stores[self.validation_config_store_name] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wasn't sure about this but deciding to not make An argument against this is that it is a deviation from the existing pattern and perhaps consistency is more important? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm usually not pro-deviation, but I think in this case it's probably reasonable. It'd require minimal changes if we changed our mind later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we know this key will always be present? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By adding it as a context variable, its another field that a user can modify. These top level store names are dangerous in that they can enable the following: context.checkpoint_store_name = "my_nonexistent_store"
...
context.checkpoint_store # Raises an error
context.checkpoints.add(...) # Raises an error
... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We allow customization of the names but don't automatically update the Seems relatively unlikely since we don't advertise this much in docs/public API. We should always have a |
||
|
||
@property | ||
def checkpoint_store_name(self) -> Optional[str]: | ||
from great_expectations.data_context.store.checkpoint_store import ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,7 @@ class SerializableDataContext(AbstractDataContext): | |
DataContextConfigDefaults.EXPECTATIONS_BASE_DIRECTORY.value, | ||
DataContextConfigDefaults.PLUGINS_BASE_DIRECTORY.value, | ||
DataContextConfigDefaults.PROFILERS_BASE_DIRECTORY.value, | ||
DataContextConfigDefaults.VALIDATION_CONFIGS_BASE_DIRECTORY.value, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updates to enable proper filesystem scaffolding |
||
GX_UNCOMMITTED_DIR, | ||
] | ||
GX_DIR: ClassVar[str] = "gx" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,6 +133,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta): | |
GXCloudRESTResource.PROFILER: "profiler", | ||
GXCloudRESTResource.RENDERED_DATA_DOC: "rendered_data_doc", | ||
GXCloudRESTResource.VALIDATION_RESULT: "result", | ||
GXCloudRESTResource.VALIDATION_CONFIG: "validation_config", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updates to enable proper Cloud integration (purely mocking for now) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like the enum can be modified/restructured if we have to list all these enums and define a new value for them. This makes it appear that our enum/abstraction is wrong. |
||
} | ||
|
||
ALLOWED_SET_KWARGS_BY_RESOURCE_TYPE: Dict[GXCloudRESTResource, Set[str]] = { | ||
|
@@ -157,6 +158,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta): | |
GXCloudRESTResource.PROFILER: "profilers", | ||
GXCloudRESTResource.RENDERED_DATA_DOC: "rendered_data_docs", | ||
GXCloudRESTResource.VALIDATION_RESULT: "validation_results", | ||
GXCloudRESTResource.VALIDATION_CONFIG: "validation_configs", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly here. |
||
} | ||
) | ||
|
||
|
@@ -295,6 +297,8 @@ def _put(self, id: str, value: Any) -> GXCloudResourceRef | bool: | |
# resource for others. Since we route all update calls through this single | ||
# method, we need to handle both cases. | ||
|
||
print("CHETAN", value, type(value.get("suite"))) | ||
|
||
resource_type = self.ge_cloud_resource_type | ||
organization_id = self.ge_cloud_credentials["organization_id"] | ||
attributes_key = self.PAYLOAD_ATTRIBUTES_KEYS[resource_type] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from __future__ import annotations | ||
|
||
from great_expectations.compatibility.typing_extensions import override | ||
from great_expectations.core.data_context_key import StringKey | ||
from great_expectations.core.validation_config import ValidationConfig | ||
from great_expectations.data_context.cloud_constants import GXCloudRESTResource | ||
from great_expectations.data_context.store.store import Store | ||
from great_expectations.data_context.types.resource_identifiers import ( | ||
GXCloudIdentifier, | ||
) | ||
|
||
|
||
class ValidationConfigStore(Store): | ||
_key_class = StringKey | ||
|
||
def __init__( | ||
self, | ||
store_name: str, | ||
store_backend: dict | None = None, | ||
runtime_environment: dict | None = None, | ||
) -> None: | ||
super().__init__( | ||
store_name=store_name, | ||
store_backend=store_backend, | ||
runtime_environment=runtime_environment, | ||
) | ||
|
||
def get_key( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implementing the bare minimum required in the factory. Tests should reflect this as well |
||
self, name: str, id: str | None = None | ||
) -> GXCloudIdentifier | StringKey: | ||
"""Given a name and optional ID, build the correct key for use in the ValidationConfigStore.""" | ||
if self.cloud_mode: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for this PR but we need to get away from these cloud mode checks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. It's poorly encapsulated but I couldn't think of a better way to get both file and cloud working |
||
return GXCloudIdentifier( | ||
resource_type=GXCloudRESTResource.VALIDATION_CONFIG, | ||
id=id, | ||
resource_name=name, | ||
) | ||
return StringKey(key=name) | ||
|
||
@override | ||
def serialize(self, value): | ||
if self.cloud_mode: | ||
data = value.dict() | ||
data["suite"] = data["suite"].to_json_dict() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another necessity due to Marshmallow |
||
return data | ||
|
||
return value.json() | ||
|
||
@override | ||
def deserialize(self, value): | ||
return ValidationConfig.parse_raw(value) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,128 @@ | ||||||
from unittest import mock | ||||||
|
||||||
import pytest | ||||||
|
||||||
from great_expectations.core.batch_config import BatchConfig | ||||||
from great_expectations.core.data_context_key import StringKey | ||||||
from great_expectations.core.expectation_suite import ExpectationSuite | ||||||
from great_expectations.core.validation_config import ValidationConfig | ||||||
from great_expectations.data_context.cloud_constants import GXCloudRESTResource | ||||||
from great_expectations.data_context.store import ValidationConfigStore | ||||||
from great_expectations.data_context.types.resource_identifiers import GXCloudIdentifier | ||||||
from tests.datasource.fluent._fake_cloud_api import CloudDetails | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def ephemeral_store(): | ||||||
return ValidationConfigStore(store_name="ephemeral_validation_config_store") | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def file_backed_store(tmp_path): | ||||||
base_directory = tmp_path / "base_dir" | ||||||
return ValidationConfigStore( | ||||||
store_name="file_backed_validation_config_store", | ||||||
store_backend={ | ||||||
"class_name": "TupleFilesystemStoreBackend", | ||||||
"base_directory": base_directory, | ||||||
}, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def cloud_backed_store(cloud_details: CloudDetails): | ||||||
return ValidationConfigStore( | ||||||
store_name="cloud_backed_validation_config_store", | ||||||
store_backend={ | ||||||
"class_name": "GXCloudStoreBackend", | ||||||
"ge_cloud_resource_type": GXCloudRESTResource.VALIDATION_CONFIG, | ||||||
"ge_cloud_credentials": { | ||||||
"access_token": cloud_details.access_token, | ||||||
"organization_id": cloud_details.org_id, | ||||||
}, | ||||||
}, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.fixture | ||||||
def validation_config() -> ValidationConfig: | ||||||
return ValidationConfig( | ||||||
name="my_validation", | ||||||
data=BatchConfig(name="my_batch_config"), | ||||||
suite=ExpectationSuite(name="my_suite"), | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize("store_fixture", ["ephemeral_store", "file_backed_store"]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why use strings for the stores and then call getfixturevalue? Can't you just reference the fixture directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the fixture won't be properly instantiated unless you do it this way. There are packages out there like |
||||||
@pytest.mark.unit | ||||||
def test_add(request, store_fixture: str, validation_config: ValidationConfig): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Annoting https://docs.pytest.org/en/stable/reference/reference.html#request |
||||||
store: ValidationConfigStore = request.getfixturevalue(store_fixture) | ||||||
|
||||||
key = StringKey(key="my_validation") | ||||||
store.add(key=key, value=validation_config) | ||||||
|
||||||
assert store.get(key) == validation_config | ||||||
|
||||||
|
||||||
@pytest.mark.cloud | ||||||
def test_add_cloud( | ||||||
cloud_backed_store: ValidationConfigStore, validation_config: ValidationConfig | ||||||
): | ||||||
store = cloud_backed_store | ||||||
|
||||||
id = "5a8ada9f-5b71-461b-b1af-f1d93602a156" | ||||||
name = "my_validation" | ||||||
key = GXCloudIdentifier( | ||||||
resource_type=GXCloudRESTResource.VALIDATION_CONFIG, | ||||||
id=id, | ||||||
resource_name=name, | ||||||
) | ||||||
|
||||||
with mock.patch("requests.Session.put", autospec=True) as mock_put: | ||||||
store.add(key=key, value=validation_config) | ||||||
|
||||||
mock_put.assert_called_once_with( | ||||||
mock.ANY, | ||||||
f"https://api.greatexpectations.io/organizations/12345678-1234-5678-1234-567812345678/validation-configs/{id}", | ||||||
json={ | ||||||
"data": { | ||||||
"type": "validation_config", | ||||||
"id": id, | ||||||
"attributes": { | ||||||
"organization_id": "12345678-1234-5678-1234-567812345678", | ||||||
"validation_config": { | ||||||
"name": name, | ||||||
"data": { | ||||||
"id": None, | ||||||
"name": "my_batch_config", | ||||||
"partitioner": None, | ||||||
}, | ||||||
"suite": { | ||||||
"expectation_suite_name": "my_suite", | ||||||
"id": None, | ||||||
"expectations": [], | ||||||
"data_asset_type": None, | ||||||
"meta": mock.ANY, | ||||||
"notes": None, | ||||||
}, | ||||||
}, | ||||||
}, | ||||||
} | ||||||
}, | ||||||
) | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize("store_fixture", ["ephemeral_store", "file_backed_store"]) | ||||||
@pytest.mark.unit | ||||||
def test_get_key(request, store_fixture: str): | ||||||
store: ValidationConfigStore = request.getfixturevalue(store_fixture) | ||||||
|
||||||
name = "my_validation" | ||||||
assert store.get_key(name=name) == StringKey(key=name) | ||||||
|
||||||
|
||||||
@pytest.mark.cloud | ||||||
def test_get_key_cloud(cloud_backed_store: ValidationConfigStore): | ||||||
key = cloud_backed_store.get_key(name="my_validation") | ||||||
assert key.resource_type == GXCloudRESTResource.VALIDATION_CONFIG | ||||||
assert key.resource_name == "my_validation" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I think we should add a comment on the why here in the code.