Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] V1 Checkpoint #9590

Merged
merged 30 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
44546e2
add new class
cdkini Mar 7, 2024
d93bd88
start writing tests
cdkini Mar 7, 2024
fe25879
start on tests
cdkini Mar 7, 2024
452953f
move bundle to shared place
cdkini Mar 7, 2024
ecbba25
move bundle to shared place again
cdkini Mar 7, 2024
df5b13f
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 8, 2024
21ef021
misc updates
cdkini Mar 8, 2024
0435290
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 15, 2024
97b2106
remove json_encoder for actions
cdkini Mar 15, 2024
e0b9a30
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 15, 2024
b32079b
write basic serialization tests
cdkini Mar 15, 2024
198427a
add test cases
cdkini Mar 15, 2024
a0023fb
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 19, 2024
29ac1a9
add more test cases
cdkini Mar 19, 2024
bfc2855
more progress
cdkini Mar 19, 2024
ffc8ec0
refactor to serialize methods
cdkini Mar 19, 2024
479f1ff
add id test
cdkini Mar 19, 2024
f4aac67
get tests passing
cdkini Mar 19, 2024
4a2bd89
make renderer func private again
cdkini Mar 19, 2024
aa53728
misc cleanup
cdkini Mar 19, 2024
092c9fe
remove comment
cdkini Mar 19, 2024
9f18740
bolster tests
cdkini Mar 19, 2024
10c12c1
mypy
cdkini Mar 19, 2024
b55d802
mypy
cdkini Mar 19, 2024
12f69b9
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 19, 2024
50da4c2
cleanup based on bill's initial review
cdkini Mar 19, 2024
36d1ea9
mypy
cdkini Mar 19, 2024
d047ac4
Merge branch 'develop' of https://github.com/great-expectations/great…
cdkini Mar 19, 2024
e5c072d
misc updates around serialization
cdkini Mar 19, 2024
571f245
update renderer call
cdkini Mar 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
105 changes: 105 additions & 0 deletions great_expectations/checkpoint/v1_checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Dict, List, Union

from great_expectations import project_manager
from great_expectations._docs_decorators import public_api
from great_expectations.checkpoint.actions import ValidationAction
from great_expectations.compatibility.pydantic import BaseModel, validator
from great_expectations.core.serdes import _IdentifierBundle
from great_expectations.core.validation_config import ValidationConfig

if TYPE_CHECKING:
from great_expectations.checkpoint.types.checkpoint_result import CheckpointResult


def _encode_validation_config(validation: ValidationConfig) -> dict:
if not validation.id:
validation_config_store = project_manager.get_validation_config_store()
key = validation_config_store.get_key(name=validation.name, id=None)
validation_config_store.add(key=key, value=validation)

return _IdentifierBundle(name=validation.name, id=validation.id)


def _encode_action(action: ValidationAction) -> dict:
data = action.__dict__
data.pop("data_context")
return data
cdkini marked this conversation as resolved.
Show resolved Hide resolved


class Checkpoint(BaseModel):
"""
A Checkpoint is the primary means for validating data in a production deployment of Great Expectations.

Checkpoints provide a convenient abstraction for running a number of validations and triggering a set of actions
to be taken after the validation step.

Args:
validations: List of validation configs to be run.
cdkini marked this conversation as resolved.
Show resolved Hide resolved
actions: List of actions to be taken after the validations are run.

"""

name: str
validations: List[ValidationConfig]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not a list of ValidationDefinitions instead of config objects? That's what this shows: https://greatexpectations.atlassian.net/wiki/spaces/SUP/pages/917471267/Validation+Workflows

Or does the ValidationConfig need to be renamed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think ValidationConfig does need to be renamed. That can be done in a separate PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs to be renamed!

actions: List[ValidationAction]
cdkini marked this conversation as resolved.
Show resolved Hide resolved
id: Union[str, None] = None

class Config:
arbitrary_types_allowed = (
True # Necessary for compatibility with ValidationAction's Marshmallow dep
)
"""
When serialized, the validations field should be encoded as a set of identifiers.
cdkini marked this conversation as resolved.
Show resolved Hide resolved
cdkini marked this conversation as resolved.
Show resolved Hide resolved
These will be used as foreign keys to retrieve the actual objects from the appropriate stores.

Example:
cdkini marked this conversation as resolved.
Show resolved Hide resolved
{
"name": "my_checkpoint",
"validations": [
{
"name": "my_first_validation",
"id": "a758816-64c8-46cb-8f7e-03c12cea1d67"
},
{
"name": "my_second_validation",
"id": "1339js16-64c8-46cb-8f7e-03c12cea1d67"
},
],
"actions": [
{
"name": "my_slack_action",
"slack_webhook": "https://hooks.slack.com/services/ABC123/DEF456/XYZ789",
"notify_on": "all",
"notify_with": ["my_data_docs_site"],
"renderer": {
"class_name": "SlackRenderer",
}
}
"""
json_encoders = {
ValidationConfig: lambda v: _encode_validation_config(v),
ValidationAction: lambda a: _encode_action(a),
}

@validator("validations")
def _validate_validations(
cls, validations: list[ValidationConfig]
) -> list[ValidationConfig]:
if len(validations) == 0:
raise ValueError("Checkpoint must contain at least one validation")

return validations

@public_api
def run(
self,
batch_params: Dict[str, Any] | None = None,
cdkini marked this conversation as resolved.
Show resolved Hide resolved
suite_params: Dict[str, Any] | None = None,
cdkini marked this conversation as resolved.
Show resolved Hide resolved
) -> CheckpointResult:
raise NotImplementedError

@public_api
def save(self) -> None:
raise NotImplementedError
10 changes: 10 additions & 0 deletions great_expectations/core/serdes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Union

from great_expectations.compatibility.pydantic import (
BaseModel,
)


class _IdentifierBundle(BaseModel):
name: str
id: Union[str, None]
6 changes: 1 addition & 5 deletions great_expectations/core/validation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ExpectationSuite,
expectationSuiteSchema,
)
from great_expectations.core.serdes import _IdentifierBundle
from great_expectations.data_context.store.validation_config_store import (
ValidationConfigStore, # noqa: TCH001
)
Expand All @@ -32,11 +33,6 @@
)


class _IdentifierBundle(BaseModel):
name: str
id: Union[str, None]


class _EncodedValidationData(BaseModel):
datasource: _IdentifierBundle
asset: _IdentifierBundle
Expand Down
128 changes: 128 additions & 0 deletions tests/checkpoint/test_v1_checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

import json
from unittest import mock
from unittest.mock import Mock

import pytest

import great_expectations as gx
from great_expectations.checkpoint.actions import (
MicrosoftTeamsNotificationAction,
SlackNotificationAction,
ValidationAction,
)
from great_expectations.checkpoint.v1_checkpoint import Checkpoint
from great_expectations.core.batch_config import BatchConfig
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.core.validation_config import ValidationConfig
from great_expectations.data_context.data_context.ephemeral_data_context import (
EphemeralDataContext,
)


@pytest.mark.unit
def test_checkpoint_no_validations_raises_error():
with pytest.raises(ValueError) as e:
Checkpoint(name="my_checkpoint", validations=[], actions=[])

assert "Checkpoint must contain at least one validation" in str(e.value)


class TestCheckpointSerialization:
@pytest.fixture
def in_memory_context(self) -> EphemeralDataContext:
return gx.get_context(mode="ephemeral")

@pytest.fixture
def validation_config_1(self, in_memory_context: EphemeralDataContext):
vc = ValidationConfig(
name="my_first_validation",
data=Mock(spec=BatchConfig),
suite=Mock(spec=ExpectationSuite),
)

with mock.patch.object(ValidationConfig, "json", return_value={}):
yield vc

@pytest.fixture
def validation_config_2(self, in_memory_context: EphemeralDataContext):
vc = ValidationConfig(
name="my_second_validation",
data=Mock(spec=BatchConfig),
suite=Mock(spec=ExpectationSuite),
)

with mock.patch.object(ValidationConfig, "json", return_value={}):
yield vc

@pytest.fixture
def validation_configs(
validation_config_1: ValidationConfig, validation_config_2: ValidationConfig
) -> list[ValidationConfig]:
return [validation_config_1, validation_config_2]

@pytest.fixture
def slack_action(self, in_memory_context: EphemeralDataContext):
return SlackNotificationAction(
data_context=in_memory_context,
renderer={
"class_name": "SlackRenderer",
"module_name": "great_expectations.render.renderer",
},
)

@pytest.fixture
def teams_action(self, in_memory_context: EphemeralDataContext):
return MicrosoftTeamsNotificationAction(
data_context=in_memory_context,
microsoft_teams_webhook="https://teams.microsoft.com/...",
renderer={
"class_name": "MicrosoftTeamsRenderer",
"module_name": "great_expectations.render.renderer",
},
)

@pytest.fixture
def actions(
slack_action: SlackNotificationAction,
teams_action: MicrosoftTeamsNotificationAction,
) -> list[ValidationAction]:
return [slack_action, teams_action]

@pytest.mark.unit
@pytest.mark.parametrize("actions_fixture_name", ["actions", None])
def test_checkpoint_serialization_success(
self,
validation_configs: list[ValidationConfig],
actions_fixture_name: str | None,
request: pytest.FixtureRequest,
):
actions = (
request.getfixturevalue(actions_fixture_name)
if actions_fixture_name
else []
)

cp = Checkpoint(
name="my_checkpoint", validations=validation_configs, actions=actions
)

actual = json.loads(cp.json(models_as_dict=False))
expected = {
"name": cp.name,
"validations": [],
"actions": [],
"id": cp.id,
}

assert actual == expected

def test_checkpoint_serialization_failure(self):
pass

def test_checkpoint_deserialization_success(self):
pass

def test_checkpoint_deserialization_failure(self):
pass