Skip to content

Commit

Permalink
fix unhandled lock and taskflow errors (#1495, #1496, #1498, #1500, #…
Browse files Browse the repository at this point in the history
  • Loading branch information
mikkonie committed Oct 31, 2022
1 parent c7d9e9e commit a0619d4
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 28 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,24 @@ Added
- **Landingzones**
- ``LANDINGZONES_TRIGGER_ENABLE`` Django setting (#1508)

Changed
-------

- **Taskflowbackend**
- Improve project lock error messages (#1496, #1500, #1511)

Fixed
-----

- **General**
- Invalid ``REDIS_URL`` default value (#1497)
- Invalid modify API settings in production config (#1503)
- **Samplesheets**
- Uncaught project lock exceptions in iRODS delete request accepting (#1495)
- Missing CSS classes for failed iRODS delete requests (#1513)
- **Taskflowbackend**
- Unhandled project lock exceptions (#1496, #1500, #1511)
- Landing zone status not updated on flow lock/build errors (#1498)


v0.12.0 (2022-10-14)
Expand Down
1 change: 1 addition & 0 deletions docs_manual/source/sodar_release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Maintenance and bug fix release.

- Fix incorrect project modify API settings in production
- Fix Tooz and Redis connection issue handling
- Fix unhandled project locking errors


v0.12.0 (2022-10-14)
Expand Down
16 changes: 16 additions & 0 deletions landingzones/tests/test_views_api_taskflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json

from django.test import override_settings
from django.urls import reverse

# Projectroles dependency
Expand All @@ -25,6 +26,7 @@
LandingZoneTaskflowMixin,
ZONE_TITLE,
ZONE_DESC,
INVALID_REDIS_URL,
)


Expand Down Expand Up @@ -391,3 +393,17 @@ def test_post_move_invalid_status(self):
self.assertEqual(response.status_code, 400)
self.assertEqual(LandingZone.objects.count(), 1)
self.assertEqual(LandingZone.objects.first().status, 'DELETED')

@override_settings(REDIS_URL=INVALID_REDIS_URL)
def test_post_move_lock_failure(self):
"""Test post() for moving with project lock failure"""
url = reverse(
'landingzones:api_submit_move',
kwargs={'landingzone': self.landing_zone.sodar_uuid},
)
response = self.request_knox(url, method='POST')

self.assertEqual(response.status_code, 500)
self.assertEqual(LandingZone.objects.count(), 1)
zone = LandingZone.objects.first()
self.assert_zone_status(zone, 'FAILED')
49 changes: 49 additions & 0 deletions landingzones/tests/test_views_taskflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from django.contrib import auth
from django.core import mail
from django.test import override_settings
from django.urls import reverse

# Projectroles dependency
Expand All @@ -26,6 +27,9 @@
# Taskflowbackend dependency
from taskflowbackend.tests.base import TaskflowbackendTestBase

# Timeline dependency
from timeline.models import ProjectEvent

from landingzones.models import LandingZone
from landingzones.tests.test_models import LandingZoneMixin

Expand All @@ -50,6 +54,7 @@
ASYNC_WAIT_SECONDS = 5
ASYNC_RETRY_COUNT = 3
INVALID_MD5 = '11111111111111111111111111111111'
INVALID_REDIS_URL = 'redis://127.0.0.1:6666/0'


class LandingZoneTaskflowMixin:
Expand Down Expand Up @@ -593,6 +598,50 @@ def test_validate_md5_only(self):
self.assertEqual(len(self.zone_coll.data_objects), 1)
self.assertEqual(len(self.assay_coll.data_objects), 0)

@override_settings(REDIS_URL=INVALID_REDIS_URL)
def test_move_lock_failure(self):
"""Test validating and moving with project lock failure"""
self.irods_obj = self.make_object(self.zone_coll, TEST_OBJ_NAME)
self.md5_obj = self.make_md5_object(self.irods_obj)
zone = LandingZone.objects.first()
self.assertEqual(zone.status, 'ACTIVE')
self.assertEqual(len(self.zone_coll.data_objects), 2)
self.assertEqual(len(self.assay_coll.data_objects), 0)
self.assertEqual(len(mail.outbox), 1)
self.assertEqual(
ProjectEvent.objects.filter(event_name='zone_move').count(), 0
)
self.assertEqual(
AppAlert.objects.filter(alert_name='zone_move').count(), 0
)

with self.login(self.user):
response = self.client.post(
reverse(
'landingzones:move',
kwargs={'landingzone': self.landing_zone.sodar_uuid},
),
)
self.assertRedirects(
response,
reverse(
'landingzones:list',
kwargs={'project': self.project.sodar_uuid},
),
)

self.assert_zone_status(zone, 'FAILED')
self.assertEqual(len(self.zone_coll.data_objects), 2)
self.assertEqual(len(self.assay_coll.data_objects), 0)
self.assertEqual(len(mail.outbox), 1) # TODO: Should this send email?
tl_event = ProjectEvent.objects.filter(event_name='zone_move').first()
self.assertIsInstance(tl_event, ProjectEvent)
self.assertEqual(tl_event.get_status().status_type, 'FAILED')
# TODO: Create app alerts for async failures (see #1499)
self.assertEqual(
AppAlert.objects.filter(alert_name='zone_move').count(), 0
)


class TestLandingZoneDeleteView(
SampleSheetIOMixin,
Expand Down
1 change: 1 addition & 0 deletions samplesheets/templatetags/samplesheets_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
DEFAULT_TAG_COLOR = 'secondary'
REQUEST_STATUS_CLASSES = {
'ACTIVE': 'bg-info text-white',
'FAILED': 'bg-danger text-white',
'ACCEPTED': 'bg-success text-white',
'REJECTED': 'bg-danger text-white',
}
Expand Down
42 changes: 42 additions & 0 deletions samplesheets/tests/test_views_taskflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
PUBLIC_USER_PASS = 'password'
SOURCE_ID = '0815'
SAMPLE_ID = '0815-N1'
INVALID_REDIS_URL = 'redis://127.0.0.1:6666/0'


class SampleSheetTaskflowMixin:
Expand Down Expand Up @@ -1453,6 +1454,47 @@ def test_accept_one_of_multiple(self):
self.assertEqual(self._get_create_alert_count(self.user), 1)
self.assertEqual(self._get_create_alert_count(self.user_delegate), 1)

@override_settings(REDIS_URL=INVALID_REDIS_URL)
def test_accept_lock_failure(self):
"""Test accepting a delete request with project lock failure"""
self.assert_irods_obj(self.path)

with self.login(self.user_contrib):
self.client.post(
reverse(
'samplesheets:irods_request_create',
kwargs={'project': self.project.sodar_uuid},
),
self.post_data,
)

self.assertEqual(IrodsDataRequest.objects.count(), 1)
obj = IrodsDataRequest.objects.first()
self.assertEqual(self._get_create_alert_count(self.user), 1)
self.assertEqual(self._get_create_alert_count(self.user_delegate), 1)

with self.login(self.user):
response = self.client.post(
reverse(
'samplesheets:irods_request_accept',
kwargs={'irodsdatarequest': obj.sodar_uuid},
),
{'confirm': True},
)
self.assertRedirects(
response,
reverse(
'samplesheets:irods_requests',
kwargs={'project': self.project.sodar_uuid},
),
)

obj.refresh_from_db()
self.assertEqual(obj.status, 'FAILED')
self.assertEqual(self._get_create_alert_count(self.user), 1)
self.assertEqual(self._get_create_alert_count(self.user_delegate), 1)
self.assert_irods_obj(self.path, True)


class TestIrodsRequestRejectView(TestIrodsRequestViewsBase):
"""Tests for IrodsRequestRejectView"""
Expand Down
21 changes: 14 additions & 7 deletions samplesheets/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2278,6 +2278,10 @@ def form_valid(self, request, *args, **kwargs):
app_alerts = get_backend_api('appalerts_backend')
project = self.get_project()
tl_event = None
redirect_url = reverse(
'samplesheets:irods_requests',
kwargs={'project': project.sodar_uuid},
)

try:
obj = IrodsDataRequest.objects.get(
Expand Down Expand Up @@ -2330,7 +2334,15 @@ def form_valid(self, request, *args, **kwargs):
tl_event.set_status('FAILED', str(ex))
obj.status = 'FAILED'
obj.save()
raise ex
if settings.DEBUG:
raise ex
messages.error(
self.request,
'Accepting iRODS data request "{}" failed: {}'.format(
obj.get_display_name(), ex
),
)
return redirect(redirect_url)

# Update cache
if settings.SHEETS_ENABLE_CACHE:
Expand Down Expand Up @@ -2377,12 +2389,7 @@ def form_valid(self, request, *args, **kwargs):
self.request,
'iRODS data request "{}" accepted.'.format(obj.get_display_name()),
)
return redirect(
reverse(
'samplesheets:irods_requests',
kwargs={'project': project.sodar_uuid},
)
)
return redirect(redirect_url)


class IrodsRequestRejectView(
Expand Down
60 changes: 46 additions & 14 deletions taskflowbackend/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured

# Landingzones dependency
from landingzones.models import LandingZone

# Projectroles dependency
from projectroles.models import SODAR_CONSTANTS
from projectroles.plugins import get_backend_api
Expand All @@ -27,6 +30,7 @@
# Local constants
DEFAULT_PERMANENT_USERS = ['client_user', 'rods', 'rodsadmin', 'public']
UNKNOWN_RUN_ERROR = 'Running flow failed: unknown error, see server log'
LOCK_FAIL_MSG = 'Unable to acquire project lock'


class TaskflowAPI:
Expand All @@ -35,6 +39,26 @@ class TaskflowAPI:
class FlowSubmitException(Exception):
"""SODAR Taskflow submission exception"""

@classmethod
def _raise_flow_exception(cls, ex_msg, tl_event=None, zone=None):
"""
Handle and raise exception with flow building or execution. Updates an
associated timeline event if present and flow is in async mode. Also
updates landing zone status if zone is provided.
:param ex_msg: Exception message (string)
:param tl_event: Timeline event or None
:zone: LandingZone object or None
:raise: FlowSubmitException
"""
if tl_event:
tl_event.set_status('FAILED', ex_msg)
# Update landing zone
if zone:
zone.set_status('FAILED', ex_msg)
# TODO: Create app alert for failure if async (see #1499)
raise cls.FlowSubmitException(ex_msg)

@classmethod
def get_flow(
cls,
Expand Down Expand Up @@ -90,28 +114,40 @@ def run_flow(
:param project: Project object
:param force_fail: Force failure (boolean, for testing)
:param async_mode: Submit in async mode (boolean, default=False)
:param tl_event: Timeline ProjectEvent object or None
:param tl_event: Timeline ProjectEvent object or None. Event status will
be updated if the flow is run in async mode.
:return: Dict
"""
flow_result = None
ex_msg = None
coordinator = None
lock = None
# Get zone if present in flow
zone_uuid = flow.flow_data.get('zone_uuid')
zone = None
if zone_uuid:
zone = LandingZone.objects.filter(sodar_uuid=zone_uuid).first()
# Provide tl_event for exceptions only if async mode
tl_event_ex = tl_event if async_mode else None

# Acquire lock if needed
if flow.require_lock:
# Acquire lock
coordinator = lock_api.get_coordinator()
if not coordinator:
raise Exception('Unable to retrieve lock coordinator')
cls._raise_flow_exception(
LOCK_FAIL_MSG + ': Failed to retrieve lock coordinator',
tl_event_ex,
zone,
)
else:
lock_id = str(project.sodar_uuid)
lock = coordinator.get_lock(lock_id)
try:
lock_api.acquire(lock)
except Exception as ex:
raise Exception(
'Unable to acquire project lock: {}'.format(ex)
cls._raise_flow_exception(
LOCK_FAIL_MSG + ': {}'.format(ex), tl_event_ex, zone
)
else:
logger.info('Lock not required (flow.require_lock=False)')
Expand All @@ -122,9 +158,6 @@ def run_flow(
flow.build(force_fail)
except Exception as ex:
ex_msg = 'Error building flow: {}'.format(ex)
# Set timeline status
if tl_event:
tl_event.set_status('FAILED', ex_msg)

# Run flow
if not ex_msg:
Expand All @@ -138,11 +171,8 @@ def run_flow(
if flow_result and tl_event and async_mode:
tl_event.set_status('OK', 'Async submit OK')
# Exception/failure
elif not flow_result:
if not ex_msg:
ex_msg = UNKNOWN_RUN_ERROR
if async_mode and tl_event:
tl_event.set_status('FAILED', ex_msg)
elif not flow_result and not ex_msg:
ex_msg = UNKNOWN_RUN_ERROR

# Release lock if acquired
if flow.require_lock and lock:
Expand All @@ -151,8 +181,10 @@ def run_flow(

# Raise exception if failed, otherwise return result
if ex_msg:
logger.error(ex_msg)
raise cls.FlowSubmitException(ex_msg)
logger.error(ex_msg) # TODO: Isn't this redundant?
# NOTE: Not providing zone here since it's handled by flow
# TODO: Replace RevertLandingZoneFailTask with a call here?
cls._raise_flow_exception(ex_msg, tl_event_ex, None)
return flow_result

def submit(
Expand Down
Loading

0 comments on commit a0619d4

Please sign in to comment.