-
Notifications
You must be signed in to change notification settings - Fork 89
ENG-3593: Eliminate redundant System fetches in /system/upsert #8080
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c053b32
a5adcde
7f46213
ffc786f
618c8d7
f447f65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| type: Changed | ||
| description: Reduce redundant System fetches per row in /system/upsert from four to one, and add per-axis change-detection logging in the system audit path. | ||
| pr: 8080 | ||
| labels: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,18 @@ | ||
| from unittest.mock import AsyncMock, patch | ||
| from uuid import uuid4 | ||
|
|
||
| import pytest | ||
| from fideslang.models import DataFlow as DataFlowSchema | ||
| from fideslang.models import PrivacyDeclaration as PrivacyDeclarationSchema | ||
| from fideslang.models import System as SystemSchema | ||
| from sqlalchemy import delete | ||
| from sqlalchemy.exc import SQLAlchemyError | ||
|
|
||
| from fides.api.db.system import create_system, update_system | ||
| from fides.api.db.crud import get_resource | ||
| from fides.api.db.system import create_system, update_system, upsert_system | ||
| from fides.api.models.sql_models import System | ||
| from fides.api.models.system_history import SystemHistory | ||
| from fides.api.util import errors | ||
| from fides.config import get_config | ||
|
|
||
| CONFIG = get_config() | ||
|
|
@@ -137,3 +141,187 @@ async def test_automatic_system_update(self, db, async_session, system: System): | |
| ).all() | ||
|
|
||
| assert system_histories[0].edited_by == "automatic_system_update" | ||
|
|
||
|
|
||
| class TestUpsertSystemFetchOptimization: | ||
| """Regression coverage for the ENG-3593 fetch-count reduction in | ||
| /system/upsert. Each updated system used to fan out to 4 get_resource | ||
| calls; after the fix it should be exactly 1.""" | ||
|
|
||
| @pytest.fixture() | ||
| async def system(self, async_session): | ||
| resource = SystemSchema( | ||
| fides_key=str(uuid4()), | ||
| organization_fides_key="default_organization", | ||
| name="upsert_fetch_opt_system", | ||
| system_type="test", | ||
| privacy_declarations=[], | ||
| ) | ||
|
|
||
| system = await create_system( | ||
| resource, async_session, CONFIG.security.oauth_root_client_id | ||
| ) | ||
| yield system | ||
| delete(System).where(System.id == system.id) | ||
|
adamsachs marked this conversation as resolved.
|
||
|
|
||
| async def test_upsert_passes_existing_system_to_update( | ||
| self, async_session, system: System | ||
| ): | ||
| """upsert_system must pass the System loaded by its existence check | ||
| through to update_system as `existing_system`. If this contract is | ||
| broken, the redundant fetch the optimization removed is silently | ||
| re-introduced via update_system's fallback path.""" | ||
| system_schema = SystemSchema.model_validate(system) | ||
|
|
||
| with patch( | ||
| "fides.api.db.system.update_system", | ||
| new=AsyncMock(return_value=(system, False)), | ||
| ) as mock_update: | ||
| await upsert_system( | ||
| resources=[system_schema], | ||
| db=async_session, | ||
| current_user_id=CONFIG.security.oauth_root_client_id, | ||
| ) | ||
|
|
||
| mock_update.assert_called_once() | ||
| call_kwargs = mock_update.call_args.kwargs | ||
| assert call_kwargs.get("existing_system") is not None, ( | ||
| "update_system was called without existing_system - this defeats " | ||
| "the ENG-3593 fetch optimization." | ||
| ) | ||
| assert call_kwargs["existing_system"].fides_key == system.fides_key | ||
|
|
||
| async def test_update_system_with_existing_system_persists_changes( | ||
| self, db, async_session, system: System | ||
| ): | ||
| """When update_system is called with an explicit existing_system | ||
| (the new ENG-3593 code path), the inline UPDATE + db.refresh still | ||
| persist the change to the DB and emit the expected audit entry.""" | ||
| pre_loaded = await get_resource(System, system.fides_key, async_session) | ||
|
|
||
| system_schema = SystemSchema.model_validate(system) | ||
| system_schema.description = "Modified via explicit existing_system path" | ||
|
|
||
| _, updated = await update_system( | ||
| resource=system_schema, | ||
| db=async_session, | ||
| current_user_id=CONFIG.security.oauth_root_client_id, | ||
| existing_system=pre_loaded, | ||
| ) | ||
| assert updated | ||
|
|
||
| # The DB row was actually updated. | ||
| fresh = await get_resource(System, system.fides_key, async_session) | ||
| assert fresh.description == "Modified via explicit existing_system path" | ||
|
|
||
| # An audit entry on the general axis was created. | ||
| system_histories = SystemHistory.filter( | ||
| db=db, conditions=(SystemHistory.system_id == system.id) | ||
| ).all() | ||
| assert len(system_histories) == 1 | ||
|
|
||
| async def test_upsert_system_emits_one_fetch_per_updated_system( | ||
| self, async_session | ||
| ): | ||
| """Regression guard for ENG-3593: each updated system on the UPDATE | ||
| path should issue exactly one `crud.get_resource` call (the existence | ||
| check in `upsert_system`). Before the fix this was four. If a future | ||
| change re-introduces a redundant call, this test fails loudly. | ||
|
|
||
| Uses two systems to also catch any accidental constant-cost bug | ||
| (e.g., loading systems once but counting wrong). | ||
| """ | ||
| # Stable fides_keys across both passes so the second pass hits the | ||
| # rows the first pass created. The schema instances themselves must | ||
| # be rebuilt because `update_system` mutates inputs (it does a | ||
| # `delattr(resource, "privacy_declarations")` after upserting them). | ||
| fides_keys = [f"fetch_count_{uuid4()}" for _ in range(2)] | ||
|
|
||
| def build_payload() -> list[SystemSchema]: | ||
| return [ | ||
| SystemSchema( | ||
| fides_key=fk, | ||
| organization_fides_key="default_organization", | ||
| name=f"Fetch count test {i}", | ||
| system_type="test", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
A more robust approach would be to patch with patch("fides.api.db.system.get_resource", wraps=get_resource) as mock_get:
await upsert_system(resources=build_payload(), db=async_session, ...)
# Each system should only be fetched once (existence check in upsert_system).
assert mock_get.call_count == len(fides_keys)This directly measures what the optimization is meant to guarantee rather than inferring it through log messages.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adjusted to use the mocking in 618c8d7 |
||
| privacy_declarations=[], | ||
| ) | ||
| for i, fk in enumerate(fides_keys) | ||
| ] | ||
|
|
||
| # Prime: this run creates the rows (INSERT path); not what we measure. | ||
| await upsert_system( | ||
| resources=build_payload(), | ||
| db=async_session, | ||
| current_user_id=CONFIG.security.oauth_root_client_id, | ||
| ) | ||
|
|
||
| # Measure: every row exists, every row hits the UPDATE path. Patch | ||
| # `get_resource` at the call site (system.py) and wrap the original | ||
| # so behavior is unchanged but invocations are counted. | ||
| with patch( | ||
| "fides.api.db.system.get_resource", wraps=get_resource | ||
| ) as mock_get_resource: | ||
| await upsert_system( | ||
| resources=build_payload(), | ||
| db=async_session, | ||
| current_user_id=CONFIG.security.oauth_root_client_id, | ||
| ) | ||
|
|
||
| # One call per system: `upsert_system`'s existence check. The | ||
| # `update_system` call no longer needs to fetch because | ||
| # `existing_system` is threaded through. | ||
| assert mock_get_resource.call_count == len(fides_keys), ( | ||
| f"Expected {len(fides_keys)} get_resource calls on the UPDATE " | ||
| f"path (one existence check per system), got " | ||
| f"{mock_get_resource.call_count}." | ||
| ) | ||
|
|
||
| async def test_update_system_raises_query_error_on_sqlalchemy_error( | ||
| self, async_session | ||
| ): | ||
| """When the inlined UPDATE raises a SQLAlchemyError, update_system | ||
| must wrap it in a domain-level errors.QueryError. This mirrors the | ||
| guard the prior `crud.update_resource` call provided and prevents | ||
| raw SQLAlchemy exceptions from propagating to API callers. | ||
|
|
||
| Builds the System inline rather than using the class `system` | ||
| fixture: ``async with db.begin()`` auto-rolls back when the | ||
| injected error propagates, which expires ORM objects bound to the | ||
| shared session-scoped fixture. The fixture's teardown then tries | ||
| to access ``system.id`` and triggers a lazy reload that fails | ||
| outside an async greenlet context. | ||
| """ | ||
| fides_key = f"qe_test_{uuid4()}" | ||
| resource = SystemSchema( | ||
| fides_key=fides_key, | ||
| organization_fides_key="default_organization", | ||
| name="query_error_test_system", | ||
| system_type="test", | ||
| privacy_declarations=[], | ||
| ) | ||
| await create_system( | ||
| resource, async_session, CONFIG.security.oauth_root_client_id | ||
| ) | ||
| pre_loaded = await get_resource(System, fides_key, async_session) | ||
|
|
||
| system_schema = SystemSchema.model_validate(pre_loaded) | ||
| system_schema.description = "Should fail" | ||
|
|
||
| original_execute = async_session.execute | ||
|
|
||
| async def execute_or_fail(stmt, *args, **kwargs): | ||
| # Fail only on the System UPDATE so we don't break transaction | ||
| # control or other internal session machinery. | ||
| if getattr(stmt, "table", None) is System.__table__: | ||
| raise SQLAlchemyError("simulated DB failure") | ||
| return await original_execute(stmt, *args, **kwargs) | ||
|
|
||
| with patch.object(async_session, "execute", side_effect=execute_or_fail): | ||
| with pytest.raises(errors.QueryError): | ||
| await update_system( | ||
| resource=system_schema, | ||
| db=async_session, | ||
| current_user_id=CONFIG.security.oauth_root_client_id, | ||
| existing_system=pre_loaded, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these lines are uncovered by tests, can we add a test for the exception case ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Claude]: added in f447f65941 —
test_update_system_raises_query_error_on_sqlalchemy_errorpatchesasync_session.executeto raiseSQLAlchemyErroronly on the System UPDATE and assertserrors.QueryErrorpropagates.