Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/browse): Re-emit browse path v2 aspects to avoid race condition #9227

Merged
merged 1 commit into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/api/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
auto_materialize_referenced_tags,
auto_status_aspect,
auto_workunit_reporter,
re_emit_browse_path_v2,
)
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
Expand Down Expand Up @@ -278,13 +279,14 @@ def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor
if isinstance(config, PlatformInstanceConfigMixin) and config.platform_instance:
platform_instance = config.platform_instance

return partial(
browse_path_processor = partial(
auto_browse_path_v2,
platform=platform,
platform_instance=platform_instance,
drop_dirs=[s for s in browse_path_drop_dirs if s is not None],
dry_run=dry_run,
)
return lambda stream: re_emit_browse_path_v2(browse_path_processor(stream))


class TestableSource(Source):
Expand Down
15 changes: 15 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,21 @@ def auto_lowercase_urns(
yield wu


def re_emit_browse_path_v2(
stream: Iterable[MetadataWorkUnit],
) -> Iterable[MetadataWorkUnit]:
"""Re-emit browse paths v2 aspects, to avoid race condition where server overwrites with default."""
browse_path_v2_workunits = []

for wu in stream:
yield wu
if wu.is_primary_source and wu.get_aspect_of_type(BrowsePathsV2Class):
browse_path_v2_workunits.append(wu)

for wu in browse_path_v2_workunits:
yield wu


def auto_browse_path_v2(
stream: Iterable[MetadataWorkUnit],
*,
Expand Down