ceph · votdev · May 9, 2018 · Apr 24, 2018 · Apr 24, 2018 · Apr 23, 2018
diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py
@@ -155,7 +155,7 @@ def _assertIsInst(cls, v1, v2):
     @classmethod
     def _task_request(cls, method, url, data, timeout):
         res = cls._request(url, method, data)
-        cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 409])
+        cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400])
 
         if cls._resp.status_code != 202:
             log.info("task finished immediately")
@@ -166,53 +166,41 @@ def _task_request(cls, method, url, data, timeout):
         task_name = res['name']
         task_metadata = res['metadata']
 
-        class Waiter(threading.Thread):
-            def __init__(self, task_name, task_metadata):
-                super(Waiter, self).__init__()
-                self.task_name = task_name
-                self.task_metadata = task_metadata
-                self.ev = threading.Event()
-                self.abort = False
-                self.res_task = None
-
-            def run(self):
-                running = True
-                while running and not self.abort:
-                    log.info("task (%s, %s) is still executing", self.task_name,
-                             self.task_metadata)
-                    time.sleep(1)
-                    res = cls._get('/api/task?name={}'.format(self.task_name))
-                    for task in res['finished_tasks']:
-                        if task['metadata'] == self.task_metadata:
-                            # task finished
-                            running = False
-                            self.res_task = task
-                            self.ev.set()
-
-        thread = Waiter(task_name, task_metadata)
-        thread.start()
-        status = thread.ev.wait(timeout)
-        if not status:
-            # timeout expired
-            thread.abort = True
-            thread.join()
-            raise Exception("Waiting for task ({}, {}) to finish timed out"
-                            .format(task_name, task_metadata))
+        retries = int(timeout)
+        res_task = None
+        while retries > 0 and not res_task:
+            retries -= 1
+            log.info("task (%s, %s) is still executing", task_name,
+                     task_metadata)
+            time.sleep(1)
+            _res = cls._get('/api/task?name={}'.format(task_name))
+            cls._assertEq(cls._resp.status_code, 200)
+            executing_tasks = [task for task in _res['executing_tasks'] if
+                               task['metadata'] == task_metadata]
+            finished_tasks = [task for task in _res['finished_tasks'] if
+                               task['metadata'] == task_metadata]
+            if not executing_tasks and finished_tasks:
+                res_task = finished_tasks[0]
+
+        if retries <= 0:
+            raise Exception("Waiting for task ({}, {}) to finish timed out. {}"
+                            .format(task_name, task_metadata, _res))
+
         log.info("task (%s, %s) finished", task_name, task_metadata)
-        if thread.res_task['success']:
+        if res_task['success']:
             if method == 'POST':
                 cls._resp.status_code = 201
             elif method == 'PUT':
                 cls._resp.status_code = 200
             elif method == 'DELETE':
                 cls._resp.status_code = 204
-            return thread.res_task['ret_value']
+            return res_task['ret_value']
         else:
-            if 'status' in thread.res_task['exception']:
-                cls._resp.status_code = thread.res_task['exception']['status']
+            if 'status' in res_task['exception']:
+                cls._resp.status_code = res_task['exception']['status']
             else:
                 cls._resp.status_code = 500
-            return thread.res_task['exception']
+            return res_task['exception']
 
     @classmethod
     def _task_post(cls, url, data=None, timeout=60):

diff --git a/qa/tasks/mgr/dashboard/test_cephfs.py b/qa/tasks/mgr/dashboard/test_cephfs.py
@@ -37,3 +37,14 @@ def test_cephfs_mds_counters(self):
 
         self.assertIsInstance(data, dict)
         self.assertIsNotNone(data)
+
+    @authenticate
+    def test_cephfs_mds_counters_wrong(self):
+        data = self._get("/api/cephfs/mds_counters/baadbaad")
+        self.assertStatus(400)
+        self.assertJsonBody({
+                "component": 'cephfs',
+                "code": "invalid_cephfs_id",
+                "detail": "Invalid cephfs id baadbaad"
+             })
+
diff --git a/qa/tasks/mgr/dashboard/test_pool.py b/qa/tasks/mgr/dashboard/test_pool.py
@@ -18,9 +18,6 @@ def tearDownClass(cls):
             cls._ceph_cmd(['osd', 'pool', 'delete', name, name, '--yes-i-really-really-mean-it'])
         cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile'])
 
-
-
-
     @authenticate
     def test_pool_list(self):
         data = self._get("/api/pool")
@@ -145,6 +142,17 @@ def test_pool_create(self):
         for data in pools:
             self._pool_create(data)
 
+    @authenticate
+    def test_pool_create_fail(self):
+        data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'}
+        self._post('/api/pool/', data)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            'component': 'pool',
+            'code': "2",
+            'detail': "specified rule dnf doesn't exist"
+        })
+
     @authenticate
     def test_pool_info(self):
         info_data = self._get("/api/pool/_info")

diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py
@@ -260,9 +260,12 @@ def test_create_rbd_twice(self):
         res = self.create_image('rbd', 'test_rbd_twice', 10240)
 
         res = self.create_image('rbd', 'test_rbd_twice', 10240)
-        self.assertStatus(409)
-        self.assertEqual(res, {"errno": 17, "status": 409, "component": "rbd",
-                               "detail": "[errno 17] error creating image"})
+        self.assertStatus(400)
+        self.assertEqual(res, {"code": '17', 'status': 400, "component": "rbd",
+                               "detail": "[errno 17] error creating image",
+                               'task': {'name': 'rbd/create',
+                                        'metadata': {'pool_name': 'rbd',
+                                                     'image_name': 'test_rbd_twice'}}})
         self.remove_image('rbd', 'test_rbd_twice')
         self.assertStatus(204)
 
@@ -316,9 +319,12 @@ def test_disk_usage(self):
 
     def test_delete_non_existent_image(self):
         res = self.remove_image('rbd', 'i_dont_exist')
-        self.assertStatus(409)
-        self.assertEqual(res, {"errno": 2, "status": 409, "component": "rbd",
-                               "detail": "[errno 2] error removing image"})
+        self.assertStatus(400)
+        self.assertEqual(res, {u'code': u'2', "status": 400, "component": "rbd",
+                               "detail": "[errno 2] error removing image",
+                               'task': {'name': 'rbd/delete',
+                                        'metadata': {'pool_name': 'rbd',
+                                                     'image_name': 'i_dont_exist'}}})
 
     def test_image_delete(self):
         self.create_image('rbd', 'delete_me', 2**30)
@@ -472,9 +478,9 @@ def test_clone(self):
                                      'snap_name': 'snap1'})
 
         res = self.remove_image('rbd', 'cimg')
-        self.assertStatus(409)
-        self.assertIn('errno', res)
-        self.assertEqual(res['errno'], 39)
+        self.assertStatus(400)
+        self.assertIn('code', res)
+        self.assertEqual(res['code'], '39')
 
         self.remove_image('rbd', 'cimg-clone')
         self.assertStatus(204)
@@ -525,7 +531,7 @@ def test_flatten(self):
         self.assertIsNotNone(img['parent'])
 
         self.flatten_image('rbd_iscsi', 'img1_snapf_clone')
-        self.assertStatus(200)
+        self.assertStatus([200, 201])
 
         img = self._get('/api/block/image/rbd_iscsi/img1_snapf_clone')
         self.assertStatus(200)

diff --git a/src/pybind/mgr/dashboard/HACKING.rst b/src/pybind/mgr/dashboard/HACKING.rst
@@ -905,3 +905,82 @@ Usage example:
       })
     }
   }
+
+Error Handling in Python
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Good error handling is a key requirement in creating a good user experience
+and providing a good API.
+
+Dashboard code should not duplicate C++ code. Thus, if error handling in C++
+is sufficient to provide good feedback, a new wrapper to catch these errors
+is not necessary. On the other hand, input validation is the best place to
+catch errors and generate the best error messages. If required, generate
+errors as soon as possible.
+
+The backend provides few standard ways of returning errors.
+
+First, there is a generic Internal Server Error::
+
+    Status Code: 500
+    {
+        "version": <cherrypy version, e.g. 13.1.0>,
+        "detail": "The server encountered an unexpected condition which prevented it from fulfilling the request.",
+    }
+
+
+For errors generated by the backend, we provide a standard error
+format::
+
+    Status Code: 400
+    {
+        "detail": str(e),     # E.g. "[errno -42] <some error message>"
+        "component": "rbd",   # this can be null to represent a global error code
+        "code": "3",          # Or a error name, e.g. "code": "some_error_key"
+    }
+
+
+In case, the API Endpoints uses @ViewCache to temporarily cache results,
+the error looks like so::
+
+    Status Code 400
+    {
+        "detail": str(e),     # E.g. "[errno -42] <some error message>"
+        "component": "rbd",   # this can be null to represent a global error code
+        "code": "3",          # Or a error name, e.g. "code": "some_error_key"
+        'status': 3,          # Indicating the @ViewCache error status
+    }
+
+In case, the API Endpoints uses a task the error looks like so::
+
+    Status Code 400
+    {
+        "detail": str(e),     # E.g. "[errno -42] <some error message>"
+        "component": "rbd",   # this can be null to represent a global error code
+        "code": "3",          # Or a error name, e.g. "code": "some_error_key"
+        "task": {             # Information about the task itself
+            "name": "taskname",
+            "metadata": {...}
+        }
+    }
+
+
+Our WebUI should show errors generated by the API to the user. Especially
+field-related errors in wizards and dialogs or show non-intrusive notifications.
+
+Handling exceptions in Python should be an exception. In general, we
+should have few exception handlers in our project. Per default, propagate
+errors to the API, as it will take care of all exceptions anyway. In general,
+log the exception by adding ``logger.exception()`` with a description to the
+handler.
+
+We need to distinguish between user errors from internal errors and
+programming errors. Using different exception types will ease the
+task for the API layer and for the user interface:
+
+Standard Python errors, like ``SystemError``, ``ValueError`` or ``KeyError``
+will end up as internal server errors in the API.
+
+In general, do not ``return`` error responses in the REST API. They will be
+returned by the  error handler. Instead, raise the appropriate exception.
+