From 40eb35efb56ddf4ef06243ec433d57200e9029f5 Mon Sep 17 00:00:00 2001 From: Michael Edwards Date: Wed, 21 Nov 2018 20:31:16 +0000 Subject: [PATCH] ZOOKEEPER-1636: cleanup completion list of a failed multi request (from Thawan Kooburat) --- .../zookeeper-client-c/src/zookeeper.c | 17 ++++- .../zookeeper-client-c/tests/TestMulti.cc | 68 +++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/zookeeper-client/zookeeper-client-c/src/zookeeper.c b/zookeeper-client/zookeeper-client-c/src/zookeeper.c index ab40ae99586..239ffa2f3cd 100644 --- a/zookeeper-client/zookeeper-client-c/src/zookeeper.c +++ b/zookeeper-client/zookeeper-client-c/src/zookeeper.c @@ -2594,6 +2594,17 @@ completion_list_t *dequeue_completion(completion_head_t *list) return cptr; } +// cleanup completion list of a failed multi request +static void cleanup_failed_multi(zhandle_t *zh, int xid, int rc, completion_list_t *cptr) { + completion_list_t *entry; + completion_head_t *clist = &cptr->c.clist; + while ((entry = dequeue_completion(clist)) != NULL) { + // Fake failed response for all sub-requests + deserialize_response(zh, entry->c.type, xid, 1, rc, entry, NULL); + destroy_completion_entry(entry); + } +} + static int deserialize_multi(zhandle_t *zh, int xid, completion_list_t *cptr, struct iarchive *ia) { int rc = 0; @@ -2723,8 +2734,12 @@ static void deserialize_response(zhandle_t *zh, int type, int xid, int failed, i case COMPLETION_MULTI: LOG_DEBUG(LOGCALLBACK(zh), "Calling COMPLETION_MULTI for xid=%#x failed=%d rc=%d", cptr->xid, failed, rc); - rc = deserialize_multi(zh, xid, cptr, ia); assert(cptr->c.void_result); + if (failed) { + cleanup_failed_multi(zh, xid, rc, cptr); + } else { + rc = deserialize_multi(zh, xid, cptr, ia); + } cptr->c.void_result(rc, cptr->data); break; default: diff --git a/zookeeper-client/zookeeper-client-c/tests/TestMulti.cc b/zookeeper-client/zookeeper-client-c/tests/TestMulti.cc index 0ee9566ffc0..ec1096df765 100644 --- a/zookeeper-client/zookeeper-client-c/tests/TestMulti.cc +++ b/zookeeper-client/zookeeper-client-c/tests/TestMulti.cc @@ -178,6 +178,7 @@ class Zookeeper_multi : public CPPUNIT_NS::TestFixture CPPUNIT_TEST(testCheck); CPPUNIT_TEST(testWatch); CPPUNIT_TEST(testSequentialNodeCreateInAsyncMulti); + CPPUNIT_TEST(testBigAsyncMulti); CPPUNIT_TEST_SUITE_END(); static void watcher(zhandle_t *, int type, int state, const char *path,void*v){ @@ -248,6 +249,16 @@ class Zookeeper_multi : public CPPUNIT_NS::TestFixture count++; } + static void multi_completion_fn_rc(int rc, const void *data) { + count++; + *((int*) data) = rc; + } + + static void create_completion_fn_rc(int rc, const char* value, const void *data) { + count++; + *((int*) data) = rc; + } + static void waitForMultiCompletion(int seconds) { time_t expires = time(0) + seconds; while(count == 0 && time(0) < expires) { @@ -654,6 +665,63 @@ class Zookeeper_multi : public CPPUNIT_NS::TestFixture // wait for multi completion in doMultiInWatch waitForMultiCompletion(5); } + + /** + * ZOOKEEPER-1636: If request is too large, the server will cut the + * connection without sending response packet. The client will try to + * process completion on multi request and eventually cause SIGSEGV + */ + void testBigAsyncMulti() { + int rc; + int callback_rc = (int) ZOK; + watchctx_t ctx; + zhandle_t *zk = createClient(&ctx); + + // The request should be more than 1MB which exceeds the default + // jute.maxbuffer and causes the server to drop client connection + const int iteration = 500; + const int type_count = 3; + const int nops = iteration * type_count; + char buff[1024]; + + zoo_op_result_t results[nops]; + zoo_op_t ops[nops]; + struct Stat* s[nops]; + int index = 0; + + // Test that we deliver error to 3 types of sub-request + for (int i = 0; i < iteration; ++i) { + zoo_set_op_init(&ops[index++], "/x", buff, sizeof(buff), -1, s[i]); + zoo_create_op_init(&ops[index++], "/x", buff, sizeof(buff), + &ZOO_OPEN_ACL_UNSAFE, ZOO_SEQUENCE, NULL, 0); + zoo_delete_op_init(&ops[index++], "/x", -1); + } + + rc = zoo_amulti(zk, nops, ops, results, multi_completion_fn_rc, + + &callback_rc); + CPPUNIT_ASSERT_EQUAL((int) ZOK, rc); + + waitForMultiCompletion(10); + // With the bug, we will get SIGSEGV before reaching this point + CPPUNIT_ASSERT_EQUAL((int) ZCONNECTIONLOSS, callback_rc); + + // Make sure that all sub-request completions get processed + for (int i = 0; i < nops; ++i) { + CPPUNIT_ASSERT_EQUAL((int) ZCONNECTIONLOSS, results[i].err); + } + + // The handle should be able to recover itself. + ctx.waitForConnected(zk); + + // Try to submit another async request to see if it get processed + // correctly + rc = zoo_acreate(zk, "/target", "", 0, &ZOO_OPEN_ACL_UNSAFE, 0, + create_completion_fn_rc, &callback_rc); + CPPUNIT_ASSERT_EQUAL((int) ZOK, rc); + + waitForMultiCompletion(10); + CPPUNIT_ASSERT_EQUAL((int) ZOK, callback_rc); + } /** * ZOOKEEPER-1624: PendingChanges of create sequential node request didn't