Skip to content

Commit

Permalink
[API] Check background task state on project deletion wait [1.6.x] (m…
Browse files Browse the repository at this point in the history
  • Loading branch information
alonmr committed Feb 12, 2024
1 parent bbf66be commit afecd24
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 16 deletions.
2 changes: 1 addition & 1 deletion server/api/api/endpoints/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ async def delete_project(
return fastapi.Response(status_code=http.HTTPStatus.ACCEPTED.value)

else:
# For iguzio < 3.5.5, the project deletion job is triggered while zebo does not wait for it to complete.
# For iguazio < 3.5.5, the project deletion job is triggered while iguazio does not wait for it to complete.
# We wait for it here to make sure we respond with a proper status code.
await run_in_threadpool(
server.api.api.utils.verify_project_is_deleted, name, auth_info
Expand Down
27 changes: 26 additions & 1 deletion server/api/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import re
import traceback
import typing
import uuid
from hashlib import sha1, sha224
from http import HTTPStatus
from os import environ
Expand Down Expand Up @@ -1125,15 +1126,18 @@ def get_or_create_project_deletion_background_task(
background_task_kind=background_task_kind,
)

background_task_name = str(uuid.uuid4())
return server.api.utils.background_tasks.InternalBackgroundTasksHandler().create_background_task(
background_task_kind,
mlrun.mlconf.background_tasks.default_timeouts.operations.delete_project,
_delete_project,
background_task_name,
db_session=db_session,
project_name=project_name,
deletion_strategy=deletion_strategy,
auth_info=auth_info,
wait_for_project_deletion=wait_for_project_deletion,
background_task_name=background_task_name,
)


Expand All @@ -1143,6 +1147,7 @@ async def _delete_project(
deletion_strategy: mlrun.common.schemas.DeletionStrategy,
auth_info: mlrun.common.schemas.AuthInfo,
wait_for_project_deletion: bool,
background_task_name: str,
):
force_deleted = False
try:
Expand All @@ -1154,6 +1159,7 @@ async def _delete_project(
auth_info.projects_role,
auth_info,
wait_for_completion=True,
background_task_name=background_task_name,
)
except mlrun.errors.MLRunNotFoundError as exc:
if not server.api.utils.helpers.is_request_from_leader(auth_info.projects_role):
Expand Down Expand Up @@ -1188,12 +1194,31 @@ async def _delete_project(
def verify_project_is_deleted(project_name, auth_info):
def _verify_project_is_deleted():
try:
server.api.db.session.run_function_with_new_db_session(
project = server.api.db.session.run_function_with_new_db_session(
get_project_member().get_project, project_name, auth_info.session
)
except mlrun.errors.MLRunNotFoundError:
return
else:
project_status = project.status.dict()
if background_task_name := project_status.get(
"deletion_background_task_name"
):
bg_task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_background_task(
name=background_task_name, raise_on_not_found=False
)
if (
bg_task
and bg_task.status.state
== mlrun.common.schemas.BackgroundTaskState.failed
):
# Background task failed, stop retrying
raise mlrun.errors.MLRunFatalFailureError(
original_exception=mlrun.errors.MLRunInternalServerError(
f"Failed to delete project {project_name}: {bg_task.status.error}"
)
)

raise mlrun.errors.MLRunInternalServerError(
f"Project {project_name} was not deleted"
)
Expand Down
19 changes: 19 additions & 0 deletions server/api/crud/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,12 @@ def delete_project(
name: str,
deletion_strategy: mlrun.common.schemas.DeletionStrategy = mlrun.common.schemas.DeletionStrategy.default(),
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
background_task_name: str = None,
):
logger.debug("Deleting project", name=name, deletion_strategy=deletion_strategy)
self._enrich_project_with_deletion_background_task_name(
session, name, background_task_name
)
if (
deletion_strategy.is_restricted()
or deletion_strategy == mlrun.common.schemas.DeletionStrategy.check
Expand Down Expand Up @@ -472,3 +476,18 @@ def _verify_no_project_function_pods():
False,
_verify_no_project_function_pods,
)

@staticmethod
def _enrich_project_with_deletion_background_task_name(
session: sqlalchemy.orm.Session, name: str, background_task_name: str
):
if not background_task_name:
return

project_patch = {
"status": {"deletion_background_task_name": background_task_name}
}

server.api.utils.singletons.db.get_db().patch_project(
session, name, project_patch
)
3 changes: 2 additions & 1 deletion server/api/utils/background_tasks/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ def create_background_task(
kind: str,
timeout: typing.Optional[int], # in seconds
function,
name: typing.Optional[str] = None,
*args,
**kwargs,
) -> typing.Tuple[typing.Callable, str]:
name = str(uuid.uuid4())
name = name or str(uuid.uuid4())
# sanity
if name in self._internal_background_tasks:
raise RuntimeError("Background task name already exists")
Expand Down
3 changes: 2 additions & 1 deletion server/api/utils/projects/follower.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,13 @@ def delete_project(
projects_role: typing.Optional[mlrun.common.schemas.ProjectsRole] = None,
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
wait_for_completion: bool = True,
background_task_name: str = None,
) -> bool:
if server.api.utils.helpers.is_request_from_leader(
projects_role, leader_name=self._leader_name
):
server.api.crud.Projects().delete_project(
db_session, name, deletion_strategy, auth_info
db_session, name, deletion_strategy, auth_info, background_task_name
)
else:
return self._leader_client.delete_project(
Expand Down
1 change: 1 addition & 0 deletions server/api/utils/projects/leader.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def delete_project(
projects_role: typing.Optional[mlrun.common.schemas.ProjectsRole] = None,
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
wait_for_completion: bool = True,
background_task_name: str = None,
) -> bool:
self._projects_in_deletion.add(name)
try:
Expand Down
1 change: 1 addition & 0 deletions server/api/utils/projects/member.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def delete_project(
projects_role: typing.Optional[mlrun.common.schemas.ProjectsRole] = None,
auth_info: mlrun.common.schemas.AuthInfo = mlrun.common.schemas.AuthInfo(),
wait_for_completion: bool = True,
background_task_name: str = None,
) -> bool:
pass

Expand Down
82 changes: 74 additions & 8 deletions tests/api/api/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import server.api.crud
import server.api.main
import server.api.utils.auth.verifier
import server.api.utils.background_tasks
import server.api.utils.clients.log_collector
import server.api.utils.singletons.db
import server.api.utils.singletons.project_member
Expand Down Expand Up @@ -958,7 +959,7 @@ def test_project_with_parameters(
)
def test_delete_project_not_found_in_leader(
unversioned_client: TestClient,
mock_project_leader_iguazio_client,
mock_project_follower_iguazio_client,
delete_api_version: str,
) -> None:
project = mlrun.common.schemas.Project(
Expand All @@ -970,15 +971,80 @@ def test_delete_project_not_found_in_leader(
assert response.status_code == HTTPStatus.CREATED.value
_assert_project_response(project, response)

response = unversioned_client.delete(
f"{delete_api_version}/projects/{project.metadata.name}",
)
assert response.status_code == HTTPStatus.ACCEPTED.value
with unittest.mock.patch.object(
mock_project_follower_iguazio_client,
"delete_project",
side_effect=mlrun.errors.MLRunNotFoundError("Project not found"),
):
response = unversioned_client.delete(
f"{delete_api_version}/projects/{project.metadata.name}",
)
assert response.status_code == HTTPStatus.ACCEPTED.value

response = unversioned_client.get(
f"v1/projects/{project.metadata.name}",
response = unversioned_client.get(
f"v1/projects/{project.metadata.name}",
)
assert response.status_code == HTTPStatus.NOT_FOUND.value


# Test should not run more than a few seconds because we test that if the background task fails,
# the wrapper task fails fast
@pytest.mark.timeout(10)
@pytest.mark.parametrize(
"delete_api_version",
[
"v1",
"v2",
],
)
def test_delete_project_fail_fast(
unversioned_client: TestClient,
mock_project_follower_iguazio_client,
delete_api_version: str,
) -> None:
# Set the igz version for the project leader mock
# We only test igz version < 3.5.5 flow because from 3.5.5 iguazio waits for the inner background task to
# finish so the wrapper task does not wait for the inner task
mlrun.mlconf.igz_version = "3.5.4"
project = mlrun.common.schemas.Project(
metadata=mlrun.common.schemas.ProjectMetadata(name="project-name"),
spec=mlrun.common.schemas.ProjectSpec(),
)
assert response.status_code == HTTPStatus.NOT_FOUND.value

response = unversioned_client.post("v1/projects", json=project.dict())
assert response.status_code == HTTPStatus.CREATED.value
_assert_project_response(project, response)

with unittest.mock.patch(
"server.api.crud.projects.Projects.delete_project_resources",
side_effect=Exception("some error"),
):
response = unversioned_client.delete(
f"{delete_api_version}/projects/{project.metadata.name}",
headers={
mlrun.common.schemas.HeaderNames.deletion_strategy: mlrun.common.schemas.DeletionStrategy.cascading,
},
)
if delete_api_version == "v1":
assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR.value
assert (
"Failed to delete project project-name: some error"
in response.json()["detail"]
)
else:
assert response.status_code == HTTPStatus.ACCEPTED.value
background_task = mlrun.common.schemas.BackgroundTask(**response.json())
background_task = server.api.utils.background_tasks.InternalBackgroundTasksHandler().get_background_task(
background_task.metadata.name
)
assert (
background_task.status.state
== mlrun.common.schemas.BackgroundTaskState.failed
)
assert (
"Failed to delete project project-name: some error"
in background_task.status.error
)


def _create_resources_of_all_kinds(
Expand Down
28 changes: 24 additions & 4 deletions tests/api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import httpx
import kfp
import pytest
import semver
import sqlalchemy.orm
from fastapi.testclient import TestClient

Expand Down Expand Up @@ -347,6 +348,7 @@ class MockedProjectFollowerIguazioClient(
):
def __init__(self):
self._db_session = None
self._unversioned_client = None

def create_project(
self,
Expand All @@ -372,7 +374,21 @@ def delete_project(
deletion_strategy: mlrun.common.schemas.DeletionStrategy = mlrun.common.schemas.DeletionStrategy.default(),
wait_for_completion: bool = True,
) -> bool:
raise mlrun.errors.MLRunNotFoundError("Project not found")
api_version = "v2"
igz_version = mlrun.mlconf.get_parsed_igz_version()
if igz_version and igz_version < semver.VersionInfo.parse("3.5.5"):
api_version = "v1"

self._unversioned_client.delete(
f"{api_version}/projects/{name}",
headers={
mlrun.common.schemas.HeaderNames.projects_role: mlrun.mlconf.httpdb.projects.leader,
mlrun.common.schemas.HeaderNames.deletion_strategy: deletion_strategy,
},
)

# Mock waiting for completion in iguazio (return False to indicate 'not running in background')
return False

def list_projects(
self,
Expand Down Expand Up @@ -404,7 +420,9 @@ def get_project_owner(


@pytest.fixture()
def mock_project_leader_iguazio_client(db: sqlalchemy.orm.Session):
def mock_project_follower_iguazio_client(
db: sqlalchemy.orm.Session, unversioned_client: TestClient
):
"""
This fixture mocks the project leader iguazio client.
"""
Expand All @@ -413,8 +431,10 @@ def mock_project_leader_iguazio_client(db: sqlalchemy.orm.Session):
old_iguazio_client = server.api.utils.clients.iguazio.Client
server.api.utils.clients.iguazio.Client = MockedProjectFollowerIguazioClient
server.api.utils.singletons.project_member.initialize_project_member()
MockedProjectFollowerIguazioClient()._db_session = db
iguazio_client = MockedProjectFollowerIguazioClient()
iguazio_client._db_session = db
iguazio_client._unversioned_client = unversioned_client

yield
yield iguazio_client

server.api.utils.clients.iguazio.Client = old_iguazio_client

0 comments on commit afecd24

Please sign in to comment.