From f4ce583ea0623c47608a3cf861a722636ddee008 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Tue, 16 May 2017 16:09:36 +0300 Subject: [PATCH 01/10] Fix for shutting down the client properly if the cluster connection attempts all fail. The Thread join now will detect if it is being called from running thread and avoid deadlock. Removed the unneded test testTcpSocketConnectionTimeout_withIntMax. Added some additional tests for testing thread functionality. --- .../connection/ClusterListenerThread.cpp | 1 + .../hazelcast/client/spi/ClusterService.cpp | 4 +- hazelcast/src/hazelcast/util/Thread.cpp | 12 +++ .../test/src/cluster/ClientConnectionTest.cpp | 7 -- hazelcast/test/src/cluster/ClusterTest.cpp | 87 +++++++++++++++++++ hazelcast/test/src/util/ClientUtilTest.cpp | 35 +++++++- 6 files changed, 137 insertions(+), 9 deletions(-) diff --git a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp index 5f0e777306..b65ee7530a 100644 --- a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp +++ b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp @@ -85,6 +85,7 @@ namespace hazelcast { std::string("Error while connecting to cluster! =>") + e.what()); isStartedSuccessfully = false; startLatch.countDown(); + clientContext.getLifecycleService().shutdown(); return; } } diff --git a/hazelcast/src/hazelcast/client/spi/ClusterService.cpp b/hazelcast/src/hazelcast/client/spi/ClusterService.cpp index 0769f8d85b..80b455b0a6 100644 --- a/hazelcast/src/hazelcast/client/spi/ClusterService.cpp +++ b/hazelcast/src/hazelcast/client/spi/ClusterService.cpp @@ -69,7 +69,9 @@ namespace hazelcast { } void ClusterService::shutdown() { - active = false; + if (!active.compareAndSet(true, false)) { + return; + } if (NULL != clusterThread.getThread()) { // avoid anyone waiting on the start latch to get stuck clusterThread.startLatch.countDown(); diff --git a/hazelcast/src/hazelcast/util/Thread.cpp b/hazelcast/src/hazelcast/util/Thread.cpp index 56beb5c396..caef24d6e5 100644 --- a/hazelcast/src/hazelcast/util/Thread.cpp +++ b/hazelcast/src/hazelcast/util/Thread.cpp @@ -50,6 +50,7 @@ namespace hazelcast { , isJoined(false) , isInterrupted(false){ init(func, arg0, arg1, arg2, arg3); + } long Thread::getThreadID() { @@ -92,6 +93,11 @@ namespace hazelcast { if (!isJoined.compareAndSet(false, true)) { return true; } + if (id == getThreadID()) { + // called from inside the thread, deadlock possibility + return false; + } + DWORD err = WaitForSingleObject(thread, INFINITE); if (err != WAIT_OBJECT_0) { return false; @@ -197,6 +203,12 @@ namespace hazelcast { if (!isJoined.compareAndSet(false, true)) { return true; } + + if (pthread_equal(thread, pthread_self())) { + // called from inside the thread, deadlock possibility + return false; + } + int err = pthread_join(thread, NULL); if (EINVAL == err || ESRCH == err || EDEADLK == err) { isJoined = false; diff --git a/hazelcast/test/src/cluster/ClientConnectionTest.cpp b/hazelcast/test/src/cluster/ClientConnectionTest.cpp index 40c0252848..6c070d0280 100644 --- a/hazelcast/test/src/cluster/ClientConnectionTest.cpp +++ b/hazelcast/test/src/cluster/ClientConnectionTest.cpp @@ -48,13 +48,6 @@ namespace hazelcast { ASSERT_THROW(HazelcastClient client(config), exception::IllegalStateException); } - TEST_F(ClientConnectionTest, testTcpSocketConnectionTimeout_withIntMax) { - HazelcastServer instance(*g_srvFactory, true); - ClientConfig config; - config.addAddress(Address("8.8.8.8", 5701)); - ASSERT_THROW(HazelcastClient client(config), exception::IllegalStateException); - } - #ifdef HZ_BUILD_WITH_SSL TEST_F(ClientConnectionTest, testSslSocketTimeoutToOutsideNetwork) { HazelcastServer instance(*g_srvFactory, true); diff --git a/hazelcast/test/src/cluster/ClusterTest.cpp b/hazelcast/test/src/cluster/ClusterTest.cpp index e51540b04b..6dbd505eb5 100644 --- a/hazelcast/test/src/cluster/ClusterTest.cpp +++ b/hazelcast/test/src/cluster/ClusterTest.cpp @@ -58,6 +58,64 @@ namespace hazelcast { } }; + class ClientAllStatesListener : public LifecycleListener { + public: + + ClientAllStatesListener(util::CountDownLatch *startingLatch, util::CountDownLatch *startedLatch = NULL, + util::CountDownLatch *connectedLatch = NULL, + util::CountDownLatch *disconnectedLatch = NULL, + util::CountDownLatch *shuttingDownLatch = NULL, + util::CountDownLatch *shutdownLatch = NULL) + : startingLatch(startingLatch), startedLatch(startedLatch), connectedLatch(connectedLatch), + disconnectedLatch(disconnectedLatch), shuttingDownLatch(shuttingDownLatch), + shutdownLatch(shutdownLatch) { } + + virtual void stateChanged(const LifecycleEvent &lifecycleEvent) { + switch (lifecycleEvent.getState()) { + case LifecycleEvent::STARTING: + if (startingLatch) { + startingLatch->countDown(); + } + break; + case LifecycleEvent::STARTED: + if (startedLatch) { + startedLatch->countDown(); + } + break; + case LifecycleEvent::CLIENT_CONNECTED: + if (connectedLatch) { + connectedLatch->countDown(); + } + break; + case LifecycleEvent::CLIENT_DISCONNECTED: + if (disconnectedLatch) { + disconnectedLatch->countDown(); + } + break; + case LifecycleEvent::SHUTTING_DOWN: + if (shuttingDownLatch) { + shuttingDownLatch->countDown(); + } + break; + case LifecycleEvent::SHUTDOWN: + if (shutdownLatch) { + shutdownLatch->countDown(); + } + break; + default: + FAIL() << "No such state expected:" << lifecycleEvent.getState(); + } + } + + private: + util::CountDownLatch *startingLatch; + util::CountDownLatch *startedLatch; + util::CountDownLatch *connectedLatch; + util::CountDownLatch *disconnectedLatch; + util::CountDownLatch *shuttingDownLatch; + util::CountDownLatch *shutdownLatch; + }; + class SampleInitialListener : public InitialMembershipListener { public: SampleInitialListener(util::CountDownLatch &_memberAdded, util::CountDownLatch &_attributeLatch, @@ -253,6 +311,35 @@ namespace hazelcast { ASSERT_THROW(HazelcastClient client(clientConfig), exception::IllegalStateException); } + TEST_P(ClusterTest, testAllClientStates) { + HazelcastServer instance(*g_srvFactory); + + ClientConfig clientConfig; + clientConfig.setAttemptPeriod(1000); + clientConfig.setConnectionAttemptLimit(1); + util::CountDownLatch startingLatch(1); + util::CountDownLatch startedLatch(1); + util::CountDownLatch connectedLatch(1); + util::CountDownLatch disconnectedLatch(1); + util::CountDownLatch shuttingDownLatch(1); + util::CountDownLatch shutdownLatch(1); + ClientAllStatesListener listener(&startingLatch, &startedLatch, &connectedLatch, &disconnectedLatch, + &shuttingDownLatch, &shutdownLatch); + clientConfig.addListener(&listener); + + HazelcastClient client(clientConfig); + + ASSERT_TRUE(startingLatch.await(0)); + ASSERT_TRUE(startedLatch.await(0)); + ASSERT_TRUE(connectedLatch.await(0)); + + instance.shutdown(); + + ASSERT_TRUE(disconnectedLatch.await(3)); + ASSERT_TRUE(shuttingDownLatch.await(5)); + ASSERT_TRUE(shutdownLatch.await(500)); + } + #ifdef HZ_BUILD_WITH_SSL INSTANTIATE_TEST_CASE_P(All, ClusterTest, diff --git a/hazelcast/test/src/util/ClientUtilTest.cpp b/hazelcast/test/src/util/ClientUtilTest.cpp index fda6c5c0b1..6b8a2c1c0b 100644 --- a/hazelcast/test/src/util/ClientUtilTest.cpp +++ b/hazelcast/test/src/util/ClientUtilTest.cpp @@ -20,6 +20,7 @@ #include "hazelcast/util/Util.h" #include "hazelcast/util/Future.h" #include "hazelcast/util/Thread.h" +#include "hazelcast/util/CountDownLatch.h" #include #include @@ -29,7 +30,7 @@ namespace hazelcast { namespace client { namespace test { class ClientUtilTest : public ::testing::Test { - public: + protected: static void wakeTheConditionUp(util::ThreadArgs& args) { util::Mutex *mutex = (util::Mutex *)args.arg0; util::ConditionVariable *cv = (util::ConditionVariable *)args.arg1; @@ -55,6 +56,19 @@ namespace hazelcast { std::auto_ptr exception(new exception::IException("exceptionName", "details")); future->set_exception(exception); } + + static void cancelJoinFromRunningThread(util::ThreadArgs& args) { + util::Thread *currentThread = args.currentThread; + util::CountDownLatch *latch = (util::CountDownLatch *) args.arg0; + currentThread->cancel(); + ASSERT_FALSE(currentThread->join()); + latch->countDown(); + } + + static void notifyExitingThread(util::ThreadArgs& args) { + util::CountDownLatch *latch = (util::CountDownLatch *) args.arg0; + latch->countDown(); + } }; TEST_F(ClientUtilTest, testConditionWaitTimeout) { @@ -152,6 +166,25 @@ namespace hazelcast { ASSERT_EQ(threadName, thread.getThreadName()); } + TEST_F (ClientUtilTest, testThreadJoinAfterThreadExited) { + std::string threadName = "myThreadName"; + util::CountDownLatch latch(1); + util::Thread thread(threadName, notifyExitingThread, &latch); + ASSERT_TRUE(latch.await(2)); + // guarantee that the thread exited + util::sleep(1); + + // call join after thread exit + thread.join(); + } + + TEST_F (ClientUtilTest, testCancelJoinItselfFromTheRunningThread) { + std::string threadName = "myThreadName"; + util::CountDownLatch latch(1); + util::Thread thread(threadName, cancelJoinFromRunningThread, &latch); + ASSERT_TRUE(latch.await(1000)); + } + void sleepyThread(util::ThreadArgs& args) { int sleepTime = *(int *)args.arg0; args.currentThread->interruptibleSleep(sleepTime); From b4527c525ce64a58f5afe1aff78df18f8c1e2acc Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Wed, 31 May 2017 16:24:01 +0300 Subject: [PATCH 02/10] Rebased with master. --- hazelcast/test/src/cluster/ClusterTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hazelcast/test/src/cluster/ClusterTest.cpp b/hazelcast/test/src/cluster/ClusterTest.cpp index 6dbd505eb5..48286b89b4 100644 --- a/hazelcast/test/src/cluster/ClusterTest.cpp +++ b/hazelcast/test/src/cluster/ClusterTest.cpp @@ -307,7 +307,7 @@ namespace hazelcast { } TEST_P(ClusterTest, testBehaviourWhenClusterNotFound) { - ClientConfig clientConfig; + ClientConfig &clientConfig = *const_cast(GetParam()); ASSERT_THROW(HazelcastClient client(clientConfig), exception::IllegalStateException); } From dbd4c6cfaf6a0c8a86756335be52d8ee48b41061 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Thu, 1 Jun 2017 12:53:50 +0300 Subject: [PATCH 03/10] Corrected the shutdown order of services, changed to be the same as Java client. Corrected what is being tested at IssueTest.issue221. --- hazelcast/src/hazelcast/client/connection/IOSelector.cpp | 5 +++++ hazelcast/src/hazelcast/client/spi/LifecycleService.cpp | 6 +++--- hazelcast/test/src/issues/IssueTest.cpp | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hazelcast/src/hazelcast/client/connection/IOSelector.cpp b/hazelcast/src/hazelcast/client/connection/IOSelector.cpp index 8e3187483f..893daff0fa 100644 --- a/hazelcast/src/hazelcast/client/connection/IOSelector.cpp +++ b/hazelcast/src/hazelcast/client/connection/IOSelector.cpp @@ -100,6 +100,11 @@ namespace hazelcast { void IOSelector::shutdown() { isAlive = false; + try { + wakeUp(); + } catch (exception::IOException &) { + // suppress io exception + } } void IOSelector::addTask(ListenerTask *listenerTask) { diff --git a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp index e827030366..0dcaca8b4e 100644 --- a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp +++ b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp @@ -67,10 +67,10 @@ namespace hazelcast { if (!active.compareAndSet(true, false)) return; fireLifecycleEvent(LifecycleEvent::SHUTTING_DOWN); - clientContext.getInvocationService().shutdown(); - clientContext.getPartitionService().shutdown(); - clientContext.getClusterService().shutdown(); clientContext.getConnectionManager().shutdown(); + clientContext.getClusterService().shutdown(); + clientContext.getPartitionService().shutdown(); + clientContext.getInvocationService().shutdown(); clientContext.getNearCacheManager().destroyAllNearCaches(); fireLifecycleEvent(LifecycleEvent::SHUTDOWN); } diff --git a/hazelcast/test/src/issues/IssueTest.cpp b/hazelcast/test/src/issues/IssueTest.cpp index c613624a93..7d03d40705 100644 --- a/hazelcast/test/src/issues/IssueTest.cpp +++ b/hazelcast/test/src/issues/IssueTest.cpp @@ -138,7 +138,10 @@ namespace hazelcast { } catch (exception::IOException &) { // this is the expected exception, test passes, do nothing } catch (exception::IException &e) { - FAIL() << "IException is received while we expect IOException. Received exception:" << e.what(); + std::string msg = e.what(); + if (msg.find("ConnectionManager is not active") == std::string::npos) { + FAIL() << "Unexpected exception. Received exception:" << msg; + } } } From 468d7e064e463c323286cafa404a0bb6ae968140 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Thu, 1 Jun 2017 17:38:04 +0300 Subject: [PATCH 04/10] Prevent client destruction before shutdown is completed. Failing to do this causes invalid memory access since the client is destroyed and all its services are destroyed. --- hazelcast/include/hazelcast/client/spi/LifecycleService.h | 2 +- hazelcast/src/hazelcast/client/spi/LifecycleService.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hazelcast/include/hazelcast/client/spi/LifecycleService.h b/hazelcast/include/hazelcast/client/spi/LifecycleService.h index 048fa2a929..541f3b0e0e 100644 --- a/hazelcast/include/hazelcast/client/spi/LifecycleService.h +++ b/hazelcast/include/hazelcast/client/spi/LifecycleService.h @@ -67,7 +67,7 @@ namespace hazelcast { std::set listeners; util::Mutex listenerLock; util::AtomicBoolean active; - + util::Mutex shutdownLock; }; } diff --git a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp index 0dcaca8b4e..def171e4dc 100644 --- a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp +++ b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp @@ -64,6 +64,10 @@ namespace hazelcast { } void LifecycleService::shutdown() { + // Take this lock to prevent client being destructed. If shutdown is called from ClusterListenerThread + // and this thread starts the shutdown, then we need to prevent client from destruction + util::LockGuard guard(shutdownLock); + if (!active.compareAndSet(true, false)) return; fireLifecycleEvent(LifecycleEvent::SHUTTING_DOWN); From 01d12531b41893fd80053bbf8d1fb2d21b10e1ee Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Thu, 1 Jun 2017 18:33:23 +0300 Subject: [PATCH 05/10] Fixes when IOSelector sets isAlive to true and prevents wakeup if the wakeup socket is not initialized yet. --- .../src/hazelcast/client/connection/IOSelector.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hazelcast/src/hazelcast/client/connection/IOSelector.cpp b/hazelcast/src/hazelcast/client/connection/IOSelector.cpp index 893daff0fa..10145e8f2c 100644 --- a/hazelcast/src/hazelcast/client/connection/IOSelector.cpp +++ b/hazelcast/src/hazelcast/client/connection/IOSelector.cpp @@ -42,7 +42,6 @@ namespace hazelcast { :connectionManager(connectionManager) { t.tv_sec = 5; t.tv_usec = 0; - isAlive = true; } void IOSelector::staticListen(util::ThreadArgs &args) { @@ -55,6 +54,10 @@ namespace hazelcast { } void IOSelector::wakeUp() { + if (!wakeUpSocket.get()) { + return; + } + int wakeUpSignal = 9; try { wakeUpSocket->send(&wakeUpSignal, sizeof(int)); @@ -91,6 +94,7 @@ namespace hazelcast { sleepingSocket->setBlocking(false); wakeUpSocketSet.insertSocket(sleepingSocket.get()); wakeUpListenerSocketId = sleepingSocket->getSocketId(); + isAlive = true; return true; } else { util::ILogger::getLogger().severe("IOSelector::initListenSocket " + std::string(strerror(errno))); @@ -99,7 +103,9 @@ namespace hazelcast { } void IOSelector::shutdown() { - isAlive = false; + if (!isAlive.compareAndSet(true, false)) { + return; + } try { wakeUp(); } catch (exception::IOException &) { From 7176bd2709584697b9845b002dc9af78e8344e33 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Fri, 2 Jun 2017 12:57:19 +0300 Subject: [PATCH 06/10] Added shutdownAndWait api to lifecycle service to be called by the HazelcastClient destructor. --- .../include/hazelcast/client/spi/LifecycleService.h | 5 ++++- hazelcast/src/hazelcast/client/HazelcastClient.cpp | 2 +- .../src/hazelcast/client/spi/LifecycleService.cpp | 13 ++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/hazelcast/include/hazelcast/client/spi/LifecycleService.h b/hazelcast/include/hazelcast/client/spi/LifecycleService.h index 541f3b0e0e..ac6aaf2920 100644 --- a/hazelcast/include/hazelcast/client/spi/LifecycleService.h +++ b/hazelcast/include/hazelcast/client/spi/LifecycleService.h @@ -25,6 +25,7 @@ #include "hazelcast/util/HazelcastDll.h" #include "hazelcast/util/Mutex.h" #include "hazelcast/util/AtomicBoolean.h" +#include "hazelcast/util/CountDownLatch.h" #include #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) @@ -55,6 +56,8 @@ namespace hazelcast { void shutdown(); + void shutdownAndWait(); + void addLifecycleListener(LifecycleListener *lifecycleListener); bool removeLifecycleListener(LifecycleListener *lifecycleListener); @@ -67,7 +70,7 @@ namespace hazelcast { std::set listeners; util::Mutex listenerLock; util::AtomicBoolean active; - util::Mutex shutdownLock; + util::CountDownLatch shutdownLatch; }; } diff --git a/hazelcast/src/hazelcast/client/HazelcastClient.cpp b/hazelcast/src/hazelcast/client/HazelcastClient.cpp index 495992a838..fbe3da7f8e 100644 --- a/hazelcast/src/hazelcast/client/HazelcastClient.cpp +++ b/hazelcast/src/hazelcast/client/HazelcastClient.cpp @@ -60,7 +60,7 @@ namespace hazelcast { } HazelcastClient::~HazelcastClient() { - lifecycleService.shutdown(); + lifecycleService.shutdownAndWait(); } diff --git a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp index def171e4dc..d417b8d20f 100644 --- a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp +++ b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp @@ -33,7 +33,8 @@ namespace hazelcast { LifecycleService::LifecycleService(ClientContext &clientContext, const ClientConfig &clientConfig) :clientContext(clientContext) - , active(false) { + , active(false) + , shutdownLatch(1) { std::set const &lifecycleListeners = clientConfig.getLifecycleListeners(); listeners.insert(lifecycleListeners.begin(), lifecycleListeners.end()); @@ -64,10 +65,6 @@ namespace hazelcast { } void LifecycleService::shutdown() { - // Take this lock to prevent client being destructed. If shutdown is called from ClusterListenerThread - // and this thread starts the shutdown, then we need to prevent client from destruction - util::LockGuard guard(shutdownLock); - if (!active.compareAndSet(true, false)) return; fireLifecycleEvent(LifecycleEvent::SHUTTING_DOWN); @@ -77,6 +74,12 @@ namespace hazelcast { clientContext.getInvocationService().shutdown(); clientContext.getNearCacheManager().destroyAllNearCaches(); fireLifecycleEvent(LifecycleEvent::SHUTDOWN); + shutdownLatch.countDown(); + } + + void LifecycleService::shutdownAndWait() { + shutdown(); + shutdownLatch.await(); } void LifecycleService::addLifecycleListener(LifecycleListener *lifecycleListener) { From d418ddfcbf109d37bc0ccb26f442465f319409d1 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Fri, 2 Jun 2017 16:25:56 +0300 Subject: [PATCH 07/10] Removed the new introduced shutdownAndWait method on LifeCycleService. --- .../include/hazelcast/client/spi/LifecycleService.h | 2 -- hazelcast/src/hazelcast/client/HazelcastClient.cpp | 2 +- .../client/connection/ClusterListenerThread.cpp | 2 +- hazelcast/src/hazelcast/client/spi/LifecycleService.cpp | 9 +++------ 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/hazelcast/include/hazelcast/client/spi/LifecycleService.h b/hazelcast/include/hazelcast/client/spi/LifecycleService.h index ac6aaf2920..aaf2391a53 100644 --- a/hazelcast/include/hazelcast/client/spi/LifecycleService.h +++ b/hazelcast/include/hazelcast/client/spi/LifecycleService.h @@ -56,8 +56,6 @@ namespace hazelcast { void shutdown(); - void shutdownAndWait(); - void addLifecycleListener(LifecycleListener *lifecycleListener); bool removeLifecycleListener(LifecycleListener *lifecycleListener); diff --git a/hazelcast/src/hazelcast/client/HazelcastClient.cpp b/hazelcast/src/hazelcast/client/HazelcastClient.cpp index fbe3da7f8e..495992a838 100644 --- a/hazelcast/src/hazelcast/client/HazelcastClient.cpp +++ b/hazelcast/src/hazelcast/client/HazelcastClient.cpp @@ -60,7 +60,7 @@ namespace hazelcast { } HazelcastClient::~HazelcastClient() { - lifecycleService.shutdownAndWait(); + lifecycleService.shutdown(); } diff --git a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp index b65ee7530a..c94558df40 100644 --- a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp +++ b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp @@ -84,8 +84,8 @@ namespace hazelcast { util::ILogger::getLogger().severe( std::string("Error while connecting to cluster! =>") + e.what()); isStartedSuccessfully = false; - startLatch.countDown(); clientContext.getLifecycleService().shutdown(); + startLatch.countDown(); return; } } diff --git a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp index d417b8d20f..0ce8c24209 100644 --- a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp +++ b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp @@ -65,8 +65,10 @@ namespace hazelcast { } void LifecycleService::shutdown() { - if (!active.compareAndSet(true, false)) + if (!active.compareAndSet(true, false)) { + shutdownLatch.await(); return; + } fireLifecycleEvent(LifecycleEvent::SHUTTING_DOWN); clientContext.getConnectionManager().shutdown(); clientContext.getClusterService().shutdown(); @@ -77,11 +79,6 @@ namespace hazelcast { shutdownLatch.countDown(); } - void LifecycleService::shutdownAndWait() { - shutdown(); - shutdownLatch.await(); - } - void LifecycleService::addLifecycleListener(LifecycleListener *lifecycleListener) { util::LockGuard lg(listenerLock); listeners.insert(lifecycleListener); From 99ec2f51914d83cbf94a264f8a3bed7afa5163fc Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Sat, 3 Jun 2017 04:22:27 +0300 Subject: [PATCH 08/10] Added connection closing when ConnectionManager is shutting down. PartitionService was hung indefinitely on shutdown, could not regenerate the problem locally but this may be possible reason. Failing to shutdown the connection means any waiting invocation may not be set and if partition service is waiting on future.get, it may block indefinitely. --- .../include/hazelcast/client/connection/Connection.h | 2 +- hazelcast/include/hazelcast/util/Closeable.h | 2 +- hazelcast/include/hazelcast/util/IOUtil.h | 2 +- .../hazelcast/client/connection/ClusterListenerThread.cpp | 4 ++-- hazelcast/src/hazelcast/client/connection/Connection.cpp | 8 +++++--- .../src/hazelcast/client/connection/ConnectionManager.cpp | 7 ++++++- .../hazelcast/client/connection/OwnerConnectionFuture.cpp | 2 +- hazelcast/src/hazelcast/util/IOUtil.cpp | 4 ++-- 8 files changed, 19 insertions(+), 12 deletions(-) diff --git a/hazelcast/include/hazelcast/client/connection/Connection.h b/hazelcast/include/hazelcast/client/connection/Connection.h index 8dc345a099..d990c32ff5 100644 --- a/hazelcast/include/hazelcast/client/connection/Connection.h +++ b/hazelcast/include/hazelcast/client/connection/Connection.h @@ -68,7 +68,7 @@ namespace hazelcast { void connect(int timeoutInMillis); - void close(); + void close(const char *closeReason = NULL); void write(protocol::ClientMessage *message); diff --git a/hazelcast/include/hazelcast/util/Closeable.h b/hazelcast/include/hazelcast/util/Closeable.h index 0bae25493d..911bede073 100644 --- a/hazelcast/include/hazelcast/util/Closeable.h +++ b/hazelcast/include/hazelcast/util/Closeable.h @@ -27,7 +27,7 @@ namespace hazelcast { public: virtual ~Closeable(); - virtual void close() = 0; + virtual void close(const char *closeReason) = 0; }; } } diff --git a/hazelcast/include/hazelcast/util/IOUtil.h b/hazelcast/include/hazelcast/util/IOUtil.h index f8ede479ec..91c88ecf19 100644 --- a/hazelcast/include/hazelcast/util/IOUtil.h +++ b/hazelcast/include/hazelcast/util/IOUtil.h @@ -46,7 +46,7 @@ namespace hazelcast { return value; } - static void closeResource(Closeable *closable); + static void closeResource(Closeable *closable, const char *closeReason = NULL); }; } diff --git a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp index c94558df40..1c031dedd7 100644 --- a/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp +++ b/hazelcast/src/hazelcast/client/connection/ClusterListenerThread.cpp @@ -106,7 +106,7 @@ namespace hazelcast { clientContext.getConnectionManager().onCloseOwnerConnection(); if (deletingConnection.compareAndSet(false, true)) { - util::IOUtil::closeResource(conn.get()); + util::IOUtil::closeResource(conn.get(), "Error while listening cluster events"); conn.reset(); deletingConnection = false; clientContext.getLifecycleService().fireLifecycleEvent(LifecycleEvent::CLIENT_DISCONNECTED); @@ -118,7 +118,7 @@ namespace hazelcast { void ClusterListenerThread::stop() { if (deletingConnection.compareAndSet(false, true)) { - util::IOUtil::closeResource(conn.get()); + util::IOUtil::closeResource(conn.get(), "Cluster listener thread is stopping"); conn.reset(); deletingConnection = false; } diff --git a/hazelcast/src/hazelcast/client/connection/Connection.cpp b/hazelcast/src/hazelcast/client/connection/Connection.cpp index ab6c82f1de..250066b5b0 100644 --- a/hazelcast/src/hazelcast/client/connection/Connection.cpp +++ b/hazelcast/src/hazelcast/client/connection/Connection.cpp @@ -89,7 +89,7 @@ namespace hazelcast { outputSocketStream.write(PROTOCOL); } - void Connection::close() { + void Connection::close(const char *closeReason) { if (!live.compareAndSet(true, false)) { return; } @@ -98,8 +98,10 @@ namespace hazelcast { int socketId = socket->getSocketId(); std::stringstream message; - message << "Closing connection (id:" << connectionId << ") to " << serverAddr << " with socket id " << socketId << - (_isOwnerConnection ? " as the owner connection." : "."); + message << "Closing connection (id:" << connectionId << ") to " << serverAddr << + " with socket id " << socketId << + (_isOwnerConnection ? " as the owner connection." : ". ") << + (NULL != closeReason ? closeReason : ""); util::ILogger::getLogger().warning(message.str()); if (!_isOwnerConnection) { readHandler.deRegisterSocket(); diff --git a/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp b/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp index d776ab2b85..f7f5160d46 100644 --- a/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp +++ b/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp @@ -16,6 +16,7 @@ // // Created by sancar koyunlu on 8/21/13. +#include #include "hazelcast/util/Util.h" #include "hazelcast/client/protocol/AuthenticationStatus.h" #include "hazelcast/client/exception/AuthenticationException.h" @@ -69,6 +70,10 @@ namespace hazelcast { void ConnectionManager::shutdown() { live = false; + // close connections + BOOST_FOREACH(boost::shared_ptr connection , connections.values()) { + connection->close("Hazelcast client is shutting down"); + } heartBeater.shutdown(); if (heartBeatThread.get() != NULL) { heartBeatThread->cancel(); @@ -384,7 +389,7 @@ namespace hazelcast { } else { ownerConnectionFuture.close(); } - util::IOUtil::closeResource(&connection); + util::IOUtil::closeResource(&connection, "Heartbeat failed"); } void ConnectionManager::removeEndpoint(const Address &address) { diff --git a/hazelcast/src/hazelcast/client/connection/OwnerConnectionFuture.cpp b/hazelcast/src/hazelcast/client/connection/OwnerConnectionFuture.cpp index c9c7c4da54..79b30b7a93 100644 --- a/hazelcast/src/hazelcast/client/connection/OwnerConnectionFuture.cpp +++ b/hazelcast/src/hazelcast/client/connection/OwnerConnectionFuture.cpp @@ -91,7 +91,7 @@ namespace hazelcast { std::stringstream message; message << "Closing owner connection to " << currentOwnerConnection->getRemoteEndpoint(); util::ILogger::getLogger().finest(message.str()); - util::IOUtil::closeResource(currentOwnerConnection.get()); + util::IOUtil::closeResource(currentOwnerConnection.get(), message.str().c_str()); markAsClosed(); } } diff --git a/hazelcast/src/hazelcast/util/IOUtil.cpp b/hazelcast/src/hazelcast/util/IOUtil.cpp index 7b84e1a1da..0f73dd3e9a 100644 --- a/hazelcast/src/hazelcast/util/IOUtil.cpp +++ b/hazelcast/src/hazelcast/util/IOUtil.cpp @@ -22,10 +22,10 @@ namespace hazelcast { namespace util { - void IOUtil::closeResource(Closeable *closable) { + void IOUtil::closeResource(Closeable *closable, const char *closeReason) { if (closable != NULL) { try { - closable->close(); + closable->close(closeReason); } catch (client::exception::IException& e) { std::stringstream message; message << "closeResource failed" << e.what(); From 4a0c445a9901082adcbd02e8fef58f112949dd4f Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Sat, 3 Jun 2017 12:10:49 +0300 Subject: [PATCH 09/10] Corrected the error message printing for protocol messages. --- .../hazelcast/client/exception/ProtocolExceptions.h | 9 +++++---- .../hazelcast/client/connection/ConnectionManager.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hazelcast/include/hazelcast/client/exception/ProtocolExceptions.h b/hazelcast/include/hazelcast/client/exception/ProtocolExceptions.h index 2fcf0127d8..f1ac82705c 100644 --- a/hazelcast/include/hazelcast/client/exception/ProtocolExceptions.h +++ b/hazelcast/include/hazelcast/client/exception/ProtocolExceptions.h @@ -33,8 +33,9 @@ namespace hazelcast { namespace exception { class HAZELCAST_API ProtocolException : public IException { public: - ProtocolException(const std::string& source, const std::string& message, int32_t errorNo, int32_t causeCode) - : IException(source, message), errorCode(errorNo), causeErrorCode(causeCode) { + ProtocolException(const std::string& message, const std::string& details, int32_t errorNo, + int32_t causeCode) + : IException("Cluster", details), errorCode(errorNo), causeErrorCode(causeCode) { } ProtocolException(const std::string& source, const std::string& message) @@ -56,8 +57,8 @@ namespace hazelcast { #define DEFINE_PROTOCOL_EXCEPTION(ClassName) \ class HAZELCAST_API ClassName : public ProtocolException {\ public:\ - ClassName(const std::string& source, const std::string& message, int32_t errorCode, int32_t causeCode) \ - : ProtocolException(source, message, errorCode, causeCode) {\ + ClassName(const std::string& message, const std::string& details, int32_t errorNo, int32_t causeCode) \ + : ProtocolException(message, details, errorNo, causeCode) {\ }\ ClassName(const std::string& source, const std::string& message) \ : ProtocolException(source, message) {\ diff --git a/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp b/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp index f7f5160d46..82c5dddad3 100644 --- a/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp +++ b/hazelcast/src/hazelcast/client/connection/ConnectionManager.cpp @@ -355,7 +355,7 @@ namespace hazelcast { void ConnectionManager::checkLive() { if (!live) { - throw exception::HazelcastException("ConnectionManager is not active!"); + throw exception::HazelcastException("ConnectionManager", "ConnectionManager is not active!"); } } From e6902f0d2e2c11dddd46febd3f5d1f679a130005 Mon Sep 17 00:00:00 2001 From: ihsan demir Date: Sun, 4 Jun 2017 00:36:42 +0300 Subject: [PATCH 10/10] Changed latch with lock. --- hazelcast/include/hazelcast/client/spi/LifecycleService.h | 6 +----- hazelcast/src/hazelcast/client/spi/LifecycleService.cpp | 7 +++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/hazelcast/include/hazelcast/client/spi/LifecycleService.h b/hazelcast/include/hazelcast/client/spi/LifecycleService.h index aaf2391a53..698c4d4259 100644 --- a/hazelcast/include/hazelcast/client/spi/LifecycleService.h +++ b/hazelcast/include/hazelcast/client/spi/LifecycleService.h @@ -16,16 +16,12 @@ // // Created by sancar koyunlu on 6/17/13. - - - #ifndef HAZELCAST_LIFECYCLE_SERVICE #define HAZELCAST_LIFECYCLE_SERVICE #include "hazelcast/util/HazelcastDll.h" #include "hazelcast/util/Mutex.h" #include "hazelcast/util/AtomicBoolean.h" -#include "hazelcast/util/CountDownLatch.h" #include #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) @@ -68,7 +64,7 @@ namespace hazelcast { std::set listeners; util::Mutex listenerLock; util::AtomicBoolean active; - util::CountDownLatch shutdownLatch; + util::Mutex shutdownLock; }; } diff --git a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp index 0ce8c24209..d86ea798a2 100644 --- a/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp +++ b/hazelcast/src/hazelcast/client/spi/LifecycleService.cpp @@ -33,8 +33,7 @@ namespace hazelcast { LifecycleService::LifecycleService(ClientContext &clientContext, const ClientConfig &clientConfig) :clientContext(clientContext) - , active(false) - , shutdownLatch(1) { + , active(false) { std::set const &lifecycleListeners = clientConfig.getLifecycleListeners(); listeners.insert(lifecycleListeners.begin(), lifecycleListeners.end()); @@ -65,8 +64,9 @@ namespace hazelcast { } void LifecycleService::shutdown() { + util::LockGuard guard(shutdownLock); + if (!active.compareAndSet(true, false)) { - shutdownLatch.await(); return; } fireLifecycleEvent(LifecycleEvent::SHUTTING_DOWN); @@ -76,7 +76,6 @@ namespace hazelcast { clientContext.getInvocationService().shutdown(); clientContext.getNearCacheManager().destroyAllNearCaches(); fireLifecycleEvent(LifecycleEvent::SHUTDOWN); - shutdownLatch.countDown(); } void LifecycleService::addLifecycleListener(LifecycleListener *lifecycleListener) {