Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUGFIX] Show FDS Asset name in DataDocs #9950

Merged
merged 15 commits into from
May 21, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@
from great_expectations.core.expectation_configuration import (
ExpectationConfiguration,
)
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_context.data_context_variables import (
DataContextVariables,
)
Expand Down Expand Up @@ -3875,9 +3878,13 @@ def _open_url_in_browser(url: str) -> None:

def get_docs_sites_urls(
self,
resource_identifier=None,
resource_identifier: ExpectationSuiteIdentifier
| ValidationResultIdentifier
| GXCloudIdentifier
| str
| None = None,
site_name: Optional[str] = None,
only_if_exists=True,
only_if_exists: bool = True,
site_names: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
"""
Expand Down Expand Up @@ -4794,7 +4801,7 @@ def get_validation_result( # noqa: PLR0913
validations_store_name=None,
failed_only=False,
include_rendered_content=None,
):
) -> ExpectationValidationResult:
"""Get validation results from a configured store.

Args:
Expand Down Expand Up @@ -4832,7 +4839,7 @@ def get_validation_result( # noqa: PLR0913
# run_id_set = set([key.run_id for key in filtered_key_list])
if len(filtered_key_list) == 0:
logger.warning("No valid run_id values found.")
return {}
return {} # type: ignore[return-value]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make the return type a union with a dict so you don't have to ignore this here? This may cause you to assert at the call sites though.


filtered_key_list = sorted(filtered_key_list, key=lambda x: x.run_id)

Expand Down Expand Up @@ -5272,11 +5279,13 @@ def profile_datasource( # noqa: C901, PLR0912, PLR0913, PLR0915
@public_api
def build_data_docs(
self,
site_names=None,
resource_identifiers=None,
dry_run=False,
site_names: list[str] | None = None,
resource_identifiers: list[ExpectationSuiteIdentifier]
| list[ValidationResultIdentifier]
| None = None,
dry_run: bool = False,
build_index: bool = True,
):
) -> dict[str, str]:
"""Build Data Docs for your project.

--Documentation--
Expand Down
41 changes: 33 additions & 8 deletions great_expectations/render/renderer/site_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

import logging
import os
import pathlib
import traceback
import urllib
from collections import OrderedDict
from typing import Any, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, List, Optional, Tuple

from great_expectations import exceptions
from great_expectations.core import ExpectationSuite
Expand All @@ -23,6 +25,12 @@
from great_expectations.data_context.util import instantiate_class_from_config
from great_expectations.render.util import resource_key_passes_run_name_filter

if TYPE_CHECKING:
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_context import AbstractDataContext

logger = logging.getLogger(__name__)

FALSEY_YAML_STRINGS = [
Expand Down Expand Up @@ -114,7 +122,7 @@ class SiteBuilder:

def __init__( # noqa: C901, PLR0912, PLR0913
self,
data_context,
data_context: AbstractDataContext,
store_backend,
site_name=None,
site_index_builder=None,
Expand Down Expand Up @@ -347,7 +355,7 @@ class DefaultSiteSectionBuilder:
def __init__( # noqa: PLR0913
self,
name,
data_context,
data_context: AbstractDataContext,
target_store,
source_store_name,
custom_styles_directory=None,
Expand Down Expand Up @@ -516,7 +524,7 @@ def __init__( # noqa: PLR0913
self,
name,
site_name,
data_context,
data_context: AbstractDataContext,
target_store,
site_section_builders_config,
custom_styles_directory=None,
Expand Down Expand Up @@ -904,8 +912,7 @@ def _add_profiling_to_index_links(
run_id=profiling_result_key.run_id,
run_time=profiling_result_key.run_id.run_time,
run_name=profiling_result_key.run_id.run_name,
asset_name=batch_kwargs.get("data_asset_name")
or batch_spec.get("data_asset_name"),
asset_name=_resolve_asset_name(validation),
batch_kwargs=batch_kwargs,
batch_spec=batch_spec,
)
Expand Down Expand Up @@ -961,8 +968,7 @@ def _add_validations_to_index_links(
validation_success=validation_success,
run_time=validation_result_key.run_id.run_time,
run_name=validation_result_key.run_id.run_name,
asset_name=batch_kwargs.get("data_asset_name")
or batch_spec.get("data_asset_name"),
asset_name=_resolve_asset_name(validation),
batch_kwargs=batch_kwargs,
batch_spec=batch_spec,
)
Expand All @@ -971,6 +977,25 @@ def _add_validations_to_index_links(
logger.warning(error_msg)


def _resolve_asset_name(validation_results: ExpectationValidationResult) -> str | None:
"""
Resolve the asset name from the validation results meta data.
FDS does not store data_asset_name in batch_kwargs or batch_spec and it must be
pulled from the active batch definition.
"""
Comment on lines +980 to +985
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@morphatic Thanks so much for the detailed issue description.
I ended up using exactly what you suggested.

Would you like me to make you a CoAuthor on this PR?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Glad it helped! I would not complain about being listed as a co-author. Thank you for asking!

batch_kwargs = validation_results.meta.get("batch_kwargs", {})
batch_spec = validation_results.meta.get("batch_spec", {})
Comment on lines +986 to +987
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of both of these may be going away in V1 but active_batch_definition will still be present.


asset_name = batch_kwargs.get("data_asset_name") or batch_spec.get(
"data_asset_name"
)
if asset_name:
return asset_name
# FDS does not store data_asset_name in batch_kwargs or batch_spec
active_batch = validation_results.meta.get("active_batch_definition", {})
return active_batch.get("data_asset_name")


class CallToActionButton:
def __init__(self, title, link) -> None:
self.title = title
Expand Down
12 changes: 10 additions & 2 deletions tests/render/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import json
import os
from typing import TYPE_CHECKING

import pytest

Expand All @@ -10,9 +13,14 @@
from great_expectations.data_context.util import file_relative_path
from great_expectations.self_check.util import expectationSuiteValidationResultSchema

if TYPE_CHECKING:
from great_expectations.core.expectation_validation_result import (
ExpectationSuiteValidationResult,
)


@pytest.fixture(scope="module")
def empty_data_context_module_scoped(tmp_path_factory):
def empty_data_context_module_scoped(tmp_path_factory) -> FileDataContext:
# Re-enable GE_USAGE_STATS
project_path = str(tmp_path_factory.mktemp("empty_data_context"))
context = gx.data_context.FileDataContext.create(project_path)
Expand All @@ -23,7 +31,7 @@ def empty_data_context_module_scoped(tmp_path_factory):


@pytest.fixture
def titanic_profiled_name_column_evrs():
def titanic_profiled_name_column_evrs() -> ExpectationSuiteValidationResult:
# This is a janky way to fetch expectations matching a specific name from an EVR suite.
# TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column
from great_expectations.render.renderer.renderer import Renderer
Expand Down
73 changes: 64 additions & 9 deletions tests/render/test_page_renderer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from __future__ import annotations

import json
import re
from pprint import pformat as pf
from typing import TYPE_CHECKING

import mistune
import pytest

from great_expectations import DataContext
from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.data_context.util import file_relative_path
Expand All @@ -16,14 +19,22 @@
)
from great_expectations.render.renderer_configuration import MetaNotesFormat

if TYPE_CHECKING:
from pytest_mock import MockerFixture

from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_context import AbstractDataContext

# module level markers
pytestmark = pytest.mark.filesystem


def test_ExpectationSuitePageRenderer_render_expectation_suite_notes(
empty_data_context,
):
context: DataContext = empty_data_context
context: AbstractDataContext = empty_data_context
result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
ExpectationSuite(
expectation_suite_name="test", meta={"notes": "*hi*"}, data_context=context
Expand Down Expand Up @@ -139,9 +150,9 @@ def test_ExpectationSuitePageRenderer_render_expectation_suite_notes(


def test_expectation_summary_in_ExpectationSuitePageRenderer_render_expectation_suite_notes(
empty_data_context,
empty_data_context: AbstractDataContext,
):
context: ExpectationSuite = empty_data_context
context: AbstractDataContext = empty_data_context
result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
ExpectationSuite(
expectation_suite_name="test",
Expand Down Expand Up @@ -206,14 +217,16 @@ def test_expectation_summary_in_ExpectationSuitePageRenderer_render_expectation_
)


def test_ProfilingResultsPageRenderer(titanic_profiled_evrs_1):
def test_ProfilingResultsPageRenderer(
titanic_profiled_evrs_1: ExpectationValidationResult,
):
document = ProfilingResultsPageRenderer().render(titanic_profiled_evrs_1)
assert isinstance(document, RenderedDocumentContent)
assert len(document.sections) == 8


def test_ValidationResultsPageRenderer_render_validation_header(
titanic_profiled_evrs_1,
titanic_profiled_evrs_1: ExpectationValidationResult,
):
validation_header = ValidationResultsPageRenderer._render_validation_header(
titanic_profiled_evrs_1
Expand Down Expand Up @@ -269,7 +282,9 @@ def test_ValidationResultsPageRenderer_render_validation_header(
assert validation_header == expected_validation_header


def test_ValidationResultsPageRenderer_render_validation_info(titanic_profiled_evrs_1):
def test_ValidationResultsPageRenderer_render_validation_info(
titanic_profiled_evrs_1: ExpectationValidationResult,
):
validation_info = ValidationResultsPageRenderer._render_validation_info(
titanic_profiled_evrs_1
).to_json_dict()
Expand Down Expand Up @@ -532,7 +547,7 @@ def ValidationResultsPageRenderer_render_with_run_info_at_start():


def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_end(
titanic_profiled_evrs_1,
titanic_profiled_evrs_1: ExpectationValidationResult,
ValidationResultsPageRenderer_render_with_run_info_at_end,
):
validation_results_page_renderer = ValidationResultsPageRenderer(
Expand All @@ -556,7 +571,7 @@ def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_end(


def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_start(
titanic_profiled_evrs_1,
titanic_profiled_evrs_1: ExpectationValidationResult,
ValidationResultsPageRenderer_render_with_run_info_at_start,
):
validation_results_page_renderer = ValidationResultsPageRenderer(
Expand All @@ -577,3 +592,43 @@ def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_start(
rendered_validation_results
== ValidationResultsPageRenderer_render_with_run_info_at_start
)


def test_asset_name_is_part_of_resource_info_index(mocker: MockerFixture):
"""
DefaultSiteIndexBuilder.add_resource_info_to_index_links_dict is what supplies the
the resource meta-data to the index page.
This test checks that the asset_name is being provided when checkpoints with fluent datasources are run.
"""
import great_expectations as gx
from great_expectations.render.renderer.site_builder import DefaultSiteIndexBuilder

context = gx.get_context(mode="ephemeral")

add_resource_info_spy = mocker.spy(
DefaultSiteIndexBuilder,
"add_resource_info_to_index_links_dict",
)

asset_name = "my_asset"
validator = context.sources.pandas_default.read_csv(
"https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv",
asset_name=asset_name,
)

validator.expect_column_values_to_not_be_null("pickup_datetime")
validator.expect_column_values_to_be_between(
"passenger_count", min_value=1, max_value=6
)
validator.save_expectation_suite(discard_failed_expectations=False)

checkpoint = context.add_or_update_checkpoint(
name="my_quickstart_checkpoint",
validator=validator,
)

checkpoint.run()

last_call = add_resource_info_spy.call_args_list[-1]
print(f"Last call kwargs:\n{pf(last_call.kwargs, depth=1)}")
assert last_call.kwargs["asset_name"] == asset_name
Loading