From 535713586728265cbfb2872b329074ca8439bf3c Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Tue, 2 Jul 2019 16:32:25 +0200 Subject: [PATCH 01/12] Auto switching to read only mode in hermes-management #1052 - temporary version with not working tests. --- .../domain/health/HealthCheckTask.java | 60 +++++++ .../domain/health/HealthChecker.java | 55 +++++++ .../domain/health/NodeDataProvider.java | 31 ++++ .../domain/health/HealthCheckTaskTest.groovy | 150 ++++++++++++++++++ 4 files changed, 296 insertions(+) create mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java create mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java create mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java create mode 100644 hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java new file mode 100644 index 0000000000..7157e0308c --- /dev/null +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -0,0 +1,60 @@ +package pl.allegro.tech.hermes.management.domain.health; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import pl.allegro.tech.hermes.management.domain.mode.ModeService; +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; + +class HealthCheckTask implements Runnable { + + private static final Logger LOGGER = LoggerFactory.getLogger(HealthCheckTask.class); + + private final Collection zookeeperClients; + private final String healthCheckPath; + private final ObjectMapper objectMapper; + private final ModeService modeService; + + HealthCheckTask(Collection zookeeperClients, String healthCheckPath, ObjectMapper objectMapper, ModeService modeService) { + this.zookeeperClients = zookeeperClients; + this.healthCheckPath = healthCheckPath; + this.objectMapper = objectMapper; + this.modeService = modeService; + } + + @Override + public void run() { + final List healthChecks = zookeeperClients.stream() + .map(this::doHealthCheck) + .collect(Collectors.toList()); + if (healthChecks.contains(HealthCheckResult.UNHEALTHY)) { + modeService.setMode(ModeService.ManagementMode.READ_ONLY); + } else if (modeService.isReadOnlyEnabled() && !healthChecks.contains(HealthCheckResult.UNHEALTHY)) { + modeService.setMode(ModeService.ManagementMode.READ_WRITE); + } + } + + private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { + try { + final String timestamp = LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + zookeeperClient.getCuratorFramework() + .setData() + .forPath(healthCheckPath, objectMapper.writeValueAsBytes(timestamp)); + LOGGER.info("ZooKeeper {} healthy.", zookeeperClient.getDcName()); + return HealthCheckResult.HEALTHY; + } catch (Exception e) { + LOGGER.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDcName(), e); + return HealthCheckResult.UNHEALTHY; + } + } + + enum HealthCheckResult { + HEALTHY, UNHEALTHY + } +} diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java new file mode 100644 index 0000000000..078a306c2b --- /dev/null +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java @@ -0,0 +1,55 @@ +package pl.allegro.tech.hermes.management.domain.health; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.springframework.stereotype.Component; +import pl.allegro.tech.hermes.management.domain.mode.ModeService; +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient; +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager; + +import javax.annotation.PostConstruct; +import javax.inject.Inject; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +@Component +public class HealthChecker { + + private final ZookeeperClientManager zookeeperClientManager; + private final String healthCheckPath; + private final HealthCheckTask healthCheckTask; + private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor(); + + @Inject + public HealthChecker(ZookeeperClientManager zookeeperClientManager, NodeDataProvider nodeDataProvider, + ObjectMapper objectMapper, ModeService modeService) { + this.zookeeperClientManager = zookeeperClientManager; + this.healthCheckPath = String.format("/hermes/management/health/%s:%s", nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); + this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), this.healthCheckPath, objectMapper, modeService); + } + + @PostConstruct + public void scheduleHealthCheck() { + zookeeperClientManager.getClients() + .forEach(this::setupHealthPath); + executorService.scheduleAtFixedRate(healthCheckTask, 0, 2, TimeUnit.SECONDS); + } + + // TODO: Skorzystac z ensurePathExists + private void setupHealthPath(ZookeeperClient zookeeperClient) { + try { + final boolean healthPathExists = zookeeperClient.getCuratorFramework() + .checkExists() + .forPath(healthCheckPath) != null; + + if (!healthPathExists) { + zookeeperClient.getCuratorFramework() + .create() + .creatingParentsIfNeeded() + .forPath(healthCheckPath); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java new file mode 100644 index 0000000000..1646214c67 --- /dev/null +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java @@ -0,0 +1,31 @@ +package pl.allegro.tech.hermes.management.domain.health; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import javax.inject.Inject; +import java.net.InetAddress; +import java.net.UnknownHostException; + +@Component +class NodeDataProvider { + + private final String serverPort; + + @Inject + NodeDataProvider(@Value("${server.port}") String serverPort) { + this.serverPort = serverPort; + } + + String getHostname() { + try { + return InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } + + String getServerPort() { + return serverPort; + } +} diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy new file mode 100644 index 0000000000..e861da9172 --- /dev/null +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -0,0 +1,150 @@ +package pl.allegro.tech.hermes.management.domain.health + +import com.fasterxml.jackson.databind.ObjectMapper +import pl.allegro.tech.hermes.management.config.storage.StorageClustersProperties +import pl.allegro.tech.hermes.management.config.storage.StorageProperties +import pl.allegro.tech.hermes.management.domain.mode.ModeService +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager +import pl.allegro.tech.hermes.management.utils.MultiZookeeperIntegrationTest +import spock.lang.Ignore + +import java.util.concurrent.TimeUnit + +@Ignore +class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { + + def healthCheckPath = '/hermes/management/health/hostname:8080' + def modeService = new ModeService() + + @Ignore + def "should not change mode on successful health check"() { + given: + def manager = buildZookeeperClientManager() + manager.start() + + and: + assertZookeeperClientsConnected(manager.clients) + + and: + manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } + + and: + assert !modeService.readOnlyEnabled + + def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) + + when: + healthCheckTask.run() + + then: + !modeService.readOnlyEnabled + + cleanup: + manager.stop() + } + + @Ignore + def "should change mode to READ_ONLY on failed health check"() { + given: + def manager = buildZookeeperClientManager() + manager.start() + + and: + assertZookeeperClientsConnected(manager.clients) + + and: + manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } + + and: + assert !modeService.readOnlyEnabled + + and: + zookeeper1.stop() + + def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) + + when: + healthCheckTask.run() + + then: + modeService.readOnlyEnabled + + cleanup: + manager.stop() + } + + @Ignore + def "should change mode to READ_ONLY on failed health check and set READ_WRITE back again on successful next connection"() { + given: + def manager = buildZookeeperClientManager() + manager.start() + + and: + assertZookeeperClientsConnected(manager.clients) + + and: + manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } + + and: + assert !modeService.readOnlyEnabled + + and: + zookeeper1.stop() + + def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) + + when: + healthCheckTask.run() + + then: + modeService.readOnlyEnabled + + and: + zookeeper1.start() + manager.clients.each { client -> client.curatorFramework.blockUntilConnected(1, TimeUnit.SECONDS) } + + and: + healthCheckTask.run() + + and: + !modeService.readOnlyEnabled + + cleanup: + manager.stop() + } + + static buildZookeeperClientManager(String dc = "dc1") { + def properties = new StorageClustersProperties(clusters: [ + new StorageProperties(connectionString: "localhost:$DC_1_ZOOKEEPER_PORT", dc: DC_1_NAME), + new StorageProperties(connectionString: "localhost:$DC_2_ZOOKEEPER_PORT", dc: DC_2_NAME) + ]) + new ZookeeperClientManager(properties, new TestDcNameProvider(dc)) + } + + static findClientByDc(List clients, String dcName) { + clients.find { it.dcName == dcName } + } + + static setupZookeeperPath(ZookeeperClient zookeeperClient, String path) { + def healthCheckPathExists = zookeeperClient.curatorFramework + .checkExists() + .forPath(path) != null + if (!healthCheckPathExists) { + zookeeperClient.curatorFramework + .create() + .creatingParentContainersIfNeeded() + .forPath(path) + } + } + + static assertZookeeperClientsConnected(List clients) { + assert clients.size() == 2 + + def dc1Client = findClientByDc(clients, DC_1_NAME) + assert assertClientConnected(dc1Client) + + def dc2Client = findClientByDc(clients, DC_2_NAME) + assert assertClientConnected(dc2Client) + } +} From 9e0a7e415abd2ac5b13300930abcbdd77667dbe4 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 09:27:34 +0200 Subject: [PATCH 02/12] Auto switching to read only mode in hermes-management #1052 - fixed not working tests. --- .../management/domain/health/HealthCheckTask.java | 2 +- .../domain/health/HealthCheckTaskTest.groovy | 9 +-------- .../utils/MultiZookeeperIntegrationTest.groovy | 10 ++++++---- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index 7157e0308c..192ec3e4d5 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -33,7 +33,7 @@ public void run() { final List healthChecks = zookeeperClients.stream() .map(this::doHealthCheck) .collect(Collectors.toList()); - if (healthChecks.contains(HealthCheckResult.UNHEALTHY)) { + if (!modeService.isReadOnlyEnabled() && healthChecks.contains(HealthCheckResult.UNHEALTHY)) { modeService.setMode(ModeService.ManagementMode.READ_ONLY); } else if (modeService.isReadOnlyEnabled() && !healthChecks.contains(HealthCheckResult.UNHEALTHY)) { modeService.setMode(ModeService.ManagementMode.READ_WRITE); diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy index e861da9172..c627f53fdb 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -7,17 +7,14 @@ import pl.allegro.tech.hermes.management.domain.mode.ModeService import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager import pl.allegro.tech.hermes.management.utils.MultiZookeeperIntegrationTest -import spock.lang.Ignore import java.util.concurrent.TimeUnit -@Ignore class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { def healthCheckPath = '/hermes/management/health/hostname:8080' def modeService = new ModeService() - @Ignore def "should not change mode on successful health check"() { given: def manager = buildZookeeperClientManager() @@ -44,7 +41,6 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { manager.stop() } - @Ignore def "should change mode to READ_ONLY on failed health check"() { given: def manager = buildZookeeperClientManager() @@ -74,7 +70,6 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { manager.stop() } - @Ignore def "should change mode to READ_ONLY on failed health check and set READ_WRITE back again on successful next connection"() { given: def manager = buildZookeeperClientManager() @@ -101,7 +96,7 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { modeService.readOnlyEnabled and: - zookeeper1.start() + zookeeper1.restart() manager.clients.each { client -> client.curatorFramework.blockUntilConnected(1, TimeUnit.SECONDS) } and: @@ -139,8 +134,6 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { } static assertZookeeperClientsConnected(List clients) { - assert clients.size() == 2 - def dc1Client = findClientByDc(clients, DC_1_NAME) assert assertClientConnected(dc1Client) diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy index 76ac2e5c4b..3418d177b5 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy @@ -12,15 +12,17 @@ abstract class MultiZookeeperIntegrationTest extends Specification { static final int DC_2_ZOOKEEPER_PORT = 9501 static final String DC_2_NAME = "dc2" - static zookeeper1 = new TestingServer(DC_1_ZOOKEEPER_PORT, false) - static zookeeper2 = new TestingServer(DC_2_ZOOKEEPER_PORT, false) + TestingServer zookeeper1 + TestingServer zookeeper2 - def setupSpec() { + def setup() { + zookeeper1 = new TestingServer(DC_1_ZOOKEEPER_PORT, false) + zookeeper2 = new TestingServer(DC_2_ZOOKEEPER_PORT, false) zookeeper1.start() zookeeper2.start() } - def cleanupSpec(){ + def cleanup() { zookeeper1.stop() zookeeper2.stop() } From 0ba6ae0643e778a4682e1a134f4063c045df9c22 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 10:47:26 +0200 Subject: [PATCH 03/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../domain/health/HealthCheckScheduler.java | 40 ++++++++++++ .../domain/health/HealthChecker.java | 55 ---------------- .../src/main/resources/application.yaml | 3 + .../domain/health/HealthCheckTaskTest.groovy | 62 +++++-------------- 4 files changed, 57 insertions(+), 103 deletions(-) create mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java delete mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java new file mode 100644 index 0000000000..2ae6943ba7 --- /dev/null +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -0,0 +1,40 @@ +package pl.allegro.tech.hermes.management.domain.health; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import pl.allegro.tech.hermes.management.domain.mode.ModeService; +import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager; + +import javax.annotation.PostConstruct; +import javax.inject.Inject; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +@Component +public class HealthCheckScheduler { + + private final ZookeeperClientManager zookeeperClientManager; + private final String healthCheckPath; + private final HealthCheckTask healthCheckTask; + private final Long period; + private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor(); + + @Inject + public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, NodeDataProvider nodeDataProvider, + ObjectMapper objectMapper, ModeService modeService, + @Value("${management.health.zk-health-path-prefix}") String healthCheckPathPrefix, + @Value("${management.health.period}") Long period) { + this.zookeeperClientManager = zookeeperClientManager; + this.healthCheckPath = String.format("%s/%s:%s", healthCheckPathPrefix, nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); + this.period = period; + this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), this.healthCheckPath, objectMapper, modeService); + } + + @PostConstruct + public void scheduleHealthCheck() { + zookeeperClientManager.getClients().forEach(client -> client.ensurePathExists(healthCheckPath)); + executorService.scheduleAtFixedRate(healthCheckTask, 0, period, TimeUnit.SECONDS); + } +} diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java deleted file mode 100644 index 078a306c2b..0000000000 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthChecker.java +++ /dev/null @@ -1,55 +0,0 @@ -package pl.allegro.tech.hermes.management.domain.health; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.springframework.stereotype.Component; -import pl.allegro.tech.hermes.management.domain.mode.ModeService; -import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient; -import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager; - -import javax.annotation.PostConstruct; -import javax.inject.Inject; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; - -@Component -public class HealthChecker { - - private final ZookeeperClientManager zookeeperClientManager; - private final String healthCheckPath; - private final HealthCheckTask healthCheckTask; - private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor(); - - @Inject - public HealthChecker(ZookeeperClientManager zookeeperClientManager, NodeDataProvider nodeDataProvider, - ObjectMapper objectMapper, ModeService modeService) { - this.zookeeperClientManager = zookeeperClientManager; - this.healthCheckPath = String.format("/hermes/management/health/%s:%s", nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); - this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), this.healthCheckPath, objectMapper, modeService); - } - - @PostConstruct - public void scheduleHealthCheck() { - zookeeperClientManager.getClients() - .forEach(this::setupHealthPath); - executorService.scheduleAtFixedRate(healthCheckTask, 0, 2, TimeUnit.SECONDS); - } - - // TODO: Skorzystac z ensurePathExists - private void setupHealthPath(ZookeeperClient zookeeperClient) { - try { - final boolean healthPathExists = zookeeperClient.getCuratorFramework() - .checkExists() - .forPath(healthCheckPath) != null; - - if (!healthPathExists) { - zookeeperClient.getCuratorFramework() - .create() - .creatingParentsIfNeeded() - .forPath(healthCheckPath); - } - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} diff --git a/hermes-management/src/main/resources/application.yaml b/hermes-management/src/main/resources/application.yaml index ae229ce9b7..a152e5e108 100644 --- a/hermes-management/src/main/resources/application.yaml +++ b/hermes-management/src/main/resources/application.yaml @@ -38,6 +38,9 @@ management: server: servlet: context-path: / + health: + zk-health-path-prefix: /hermes/management/health + period: 30 # sec schema.repository.type: schema_registry diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy index c627f53fdb..7580c4a7e9 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -8,87 +8,57 @@ import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClien import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager import pl.allegro.tech.hermes.management.utils.MultiZookeeperIntegrationTest -import java.util.concurrent.TimeUnit - class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { def healthCheckPath = '/hermes/management/health/hostname:8080' def modeService = new ModeService() + ZookeeperClientManager manager + HealthCheckTask healthCheckTask - def "should not change mode on successful health check"() { - given: - def manager = buildZookeeperClientManager() + def setup() { + manager = buildZookeeperClientManager() manager.start() - - and: assertZookeeperClientsConnected(manager.clients) - - and: manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } + healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) + } - and: - assert !modeService.readOnlyEnabled + def cleanup() { + manager.stop() + } - def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) + def "should not change mode in case of successful health check"() { + given: + assert !modeService.readOnlyEnabled when: healthCheckTask.run() then: !modeService.readOnlyEnabled - - cleanup: - manager.stop() } - def "should change mode to READ_ONLY on failed health check"() { + def "should change mode to READ_ONLY in case of failed health check"() { given: - def manager = buildZookeeperClientManager() - manager.start() - - and: - assertZookeeperClientsConnected(manager.clients) - - and: - manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } - - and: assert !modeService.readOnlyEnabled and: zookeeper1.stop() - def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) - when: healthCheckTask.run() then: modeService.readOnlyEnabled - - cleanup: - manager.stop() } - def "should change mode to READ_ONLY on failed health check and set READ_WRITE back again on successful next connection"() { + def "should change mode to READ_ONLY in case of failed health check and set READ_WRITE back again in case of successful next connection"() { given: - def manager = buildZookeeperClientManager() - manager.start() - - and: - assertZookeeperClientsConnected(manager.clients) - - and: - manager.clients.each { client -> setupZookeeperPath(client, healthCheckPath) } - - and: assert !modeService.readOnlyEnabled and: zookeeper1.stop() - def healthCheckTask = new HealthCheckTask(manager.clients, healthCheckPath, new ObjectMapper(), modeService) - when: healthCheckTask.run() @@ -97,16 +67,12 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { and: zookeeper1.restart() - manager.clients.each { client -> client.curatorFramework.blockUntilConnected(1, TimeUnit.SECONDS) } and: healthCheckTask.run() and: !modeService.readOnlyEnabled - - cleanup: - manager.stop() } static buildZookeeperClientManager(String dc = "dc1") { From 694ac786482c4ff68379460aa802d73e3a311662 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 13:29:03 +0200 Subject: [PATCH 04/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../health/CouldNotResolveHostNameException.java | 7 +++++++ .../domain/health/HealthCheckScheduler.java | 11 ++++++++--- .../management/domain/health/HealthCheckTask.java | 10 +++++----- .../management/domain/health/NodeDataProvider.java | 2 +- .../domain/health/HealthCheckTaskTest.groovy | 8 ++++---- 5 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/CouldNotResolveHostNameException.java diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/CouldNotResolveHostNameException.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/CouldNotResolveHostNameException.java new file mode 100644 index 0000000000..048ee4f27d --- /dev/null +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/CouldNotResolveHostNameException.java @@ -0,0 +1,7 @@ +package pl.allegro.tech.hermes.management.domain.health; + +class CouldNotResolveHostNameException extends RuntimeException { + CouldNotResolveHostNameException(Throwable cause) { + super(cause); + } +} diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index 2ae6943ba7..0e337731fa 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -1,6 +1,7 @@ package pl.allegro.tech.hermes.management.domain.health; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import pl.allegro.tech.hermes.management.domain.mode.ModeService; @@ -19,11 +20,15 @@ public class HealthCheckScheduler { private final String healthCheckPath; private final HealthCheckTask healthCheckTask; private final Long period; - private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor(); + private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("health-check-scheduler-%d").build() + ); @Inject - public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, NodeDataProvider nodeDataProvider, - ObjectMapper objectMapper, ModeService modeService, + public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, + NodeDataProvider nodeDataProvider, + ObjectMapper objectMapper, + ModeService modeService, @Value("${management.health.zk-health-path-prefix}") String healthCheckPathPrefix, @Value("${management.health.period}") Long period) { this.zookeeperClientManager = zookeeperClientManager; diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index 192ec3e4d5..083ca46979 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -33,23 +33,23 @@ public void run() { final List healthChecks = zookeeperClients.stream() .map(this::doHealthCheck) .collect(Collectors.toList()); - if (!modeService.isReadOnlyEnabled() && healthChecks.contains(HealthCheckResult.UNHEALTHY)) { + if (healthChecks.contains(HealthCheckResult.UNHEALTHY)) { modeService.setMode(ModeService.ManagementMode.READ_ONLY); - } else if (modeService.isReadOnlyEnabled() && !healthChecks.contains(HealthCheckResult.UNHEALTHY)) { + } else { modeService.setMode(ModeService.ManagementMode.READ_WRITE); } } private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { + final String timestamp = LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); try { - final String timestamp = LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); zookeeperClient.getCuratorFramework() .setData() .forPath(healthCheckPath, objectMapper.writeValueAsBytes(timestamp)); - LOGGER.info("ZooKeeper {} healthy.", zookeeperClient.getDcName()); + LOGGER.info("ZooKeeper {} healthy.", zookeeperClient.getDatacenterName()); return HealthCheckResult.HEALTHY; } catch (Exception e) { - LOGGER.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDcName(), e); + LOGGER.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDatacenterName(), e); return HealthCheckResult.UNHEALTHY; } } diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java index 1646214c67..1c71b44c7e 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java @@ -21,7 +21,7 @@ String getHostname() { try { return InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { - throw new RuntimeException(e); + throw new CouldNotResolveHostNameException(e); } } diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy index 7580c4a7e9..8cf6d6eb2d 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -77,14 +77,14 @@ class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { static buildZookeeperClientManager(String dc = "dc1") { def properties = new StorageClustersProperties(clusters: [ - new StorageProperties(connectionString: "localhost:$DC_1_ZOOKEEPER_PORT", dc: DC_1_NAME), - new StorageProperties(connectionString: "localhost:$DC_2_ZOOKEEPER_PORT", dc: DC_2_NAME) + new StorageProperties(connectionString: "localhost:$DC_1_ZOOKEEPER_PORT", datacenter: DC_1_NAME), + new StorageProperties(connectionString: "localhost:$DC_2_ZOOKEEPER_PORT", datacenter: DC_2_NAME) ]) - new ZookeeperClientManager(properties, new TestDcNameProvider(dc)) + new ZookeeperClientManager(properties, new TestDatacenterNameProvider(dc)) } static findClientByDc(List clients, String dcName) { - clients.find { it.dcName == dcName } + clients.find { it.datacenterName == dcName } } static setupZookeeperPath(ZookeeperClient zookeeperClient, String path) { From 3a0b1ab03609ed05a379ce75633c0b7f3980ca7e Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 14:30:26 +0200 Subject: [PATCH 05/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../hermes/infrastructure/zookeeper/ZookeeperPaths.java | 5 +++++ .../management/domain/health/HealthCheckScheduler.java | 9 +++++---- .../management/domain/health/NodeDataProvider.java | 2 -- hermes-management/src/main/resources/application.yaml | 3 +-- .../management/domain/health/HealthCheckTaskTest.groovy | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java b/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java index 4d1c6124f5..938782f40b 100644 --- a/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java +++ b/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java @@ -23,6 +23,7 @@ public class ZookeeperPaths { public static final String BLACKLIST_PATH = "blacklist"; public static final String MAX_RATE_PATH = "max-rate"; public static final String MAX_RATE_HISTORY_PATH = "history"; + public static final String STORAGE_HEALTH_PATH = "storage/health"; private final String basePath; @@ -172,4 +173,8 @@ public String oAuthProvidersPath() { public String oAuthProviderPath(String oAuthProviderName) { return Joiner.on(URL_SEPARATOR).join(oAuthProvidersPath(), oAuthProviderName); } + + public String nodeHealthPathForManagementHost(String host, String port) { + return Joiner.on(URL_SEPARATOR).join(basePath, STORAGE_HEALTH_PATH, String.format("%s_%s", host, port)); + } } diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index 0e337731fa..d417c02e9f 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -4,6 +4,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; +import pl.allegro.tech.hermes.infrastructure.zookeeper.ZookeeperPaths; import pl.allegro.tech.hermes.management.domain.mode.ModeService; import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager; @@ -26,14 +27,14 @@ public class HealthCheckScheduler { @Inject public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, + ZookeeperPaths zookeeperPaths, NodeDataProvider nodeDataProvider, ObjectMapper objectMapper, ModeService modeService, - @Value("${management.health.zk-health-path-prefix}") String healthCheckPathPrefix, - @Value("${management.health.period}") Long period) { + @Value("${management.health.periodSeconds}") Long periodSeconds) { this.zookeeperClientManager = zookeeperClientManager; - this.healthCheckPath = String.format("%s/%s:%s", healthCheckPathPrefix, nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); - this.period = period; + this.healthCheckPath = zookeeperPaths.nodeHealthPathForManagementHost(nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); + this.period = periodSeconds; this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), this.healthCheckPath, objectMapper, modeService); } diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java index 1c71b44c7e..5c0517c5d1 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/NodeDataProvider.java @@ -3,7 +3,6 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import javax.inject.Inject; import java.net.InetAddress; import java.net.UnknownHostException; @@ -12,7 +11,6 @@ class NodeDataProvider { private final String serverPort; - @Inject NodeDataProvider(@Value("${server.port}") String serverPort) { this.serverPort = serverPort; } diff --git a/hermes-management/src/main/resources/application.yaml b/hermes-management/src/main/resources/application.yaml index a152e5e108..c451856c31 100644 --- a/hermes-management/src/main/resources/application.yaml +++ b/hermes-management/src/main/resources/application.yaml @@ -39,8 +39,7 @@ management: servlet: context-path: / health: - zk-health-path-prefix: /hermes/management/health - period: 30 # sec + periodSeconds: 30 schema.repository.type: schema_registry diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy index 8cf6d6eb2d..06fc3a35cb 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -10,7 +10,7 @@ import pl.allegro.tech.hermes.management.utils.MultiZookeeperIntegrationTest class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { - def healthCheckPath = '/hermes/management/health/hostname:8080' + def healthCheckPath = '/hermes/storage/health/hostname_8080' def modeService = new ModeService() ZookeeperClientManager manager HealthCheckTask healthCheckTask From 078c6705f285e4db57393b4f794301225ad4f6f5 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 14:54:08 +0200 Subject: [PATCH 06/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../domain/health/HealthCheckScheduler.java | 2 ++ .../domain/health/HealthCheckTask.java | 16 ++++++++++------ .../src/main/resources/application.yaml | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index d417c02e9f..dda9e1111e 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.stereotype.Component; import pl.allegro.tech.hermes.infrastructure.zookeeper.ZookeeperPaths; import pl.allegro.tech.hermes.management.domain.mode.ModeService; @@ -15,6 +16,7 @@ import java.util.concurrent.TimeUnit; @Component +@ConditionalOnProperty(name = "management.health.enabled", havingValue = "true") public class HealthCheckScheduler { private final ZookeeperClientManager zookeeperClientManager; diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index 083ca46979..1f54b7591a 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -30,14 +30,10 @@ class HealthCheckTask implements Runnable { @Override public void run() { - final List healthChecks = zookeeperClients.stream() + final List healthCheckResults = zookeeperClients.stream() .map(this::doHealthCheck) .collect(Collectors.toList()); - if (healthChecks.contains(HealthCheckResult.UNHEALTHY)) { - modeService.setMode(ModeService.ManagementMode.READ_ONLY); - } else { - modeService.setMode(ModeService.ManagementMode.READ_WRITE); - } + updateMode(healthCheckResults); } private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { @@ -54,6 +50,14 @@ private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { } } + private void updateMode(List healthCheckResults) { + if (healthCheckResults.contains(HealthCheckResult.UNHEALTHY)) { + modeService.setMode(ModeService.ManagementMode.READ_ONLY); + } else { + modeService.setMode(ModeService.ManagementMode.READ_WRITE); + } + } + enum HealthCheckResult { HEALTHY, UNHEALTHY } diff --git a/hermes-management/src/main/resources/application.yaml b/hermes-management/src/main/resources/application.yaml index c451856c31..c64b29ecf5 100644 --- a/hermes-management/src/main/resources/application.yaml +++ b/hermes-management/src/main/resources/application.yaml @@ -40,6 +40,7 @@ management: context-path: / health: periodSeconds: 30 + enabled: true schema.repository.type: schema_registry From 295f9ebc231a8f0e545e78d7c93bc1cf4183dff3 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Wed, 3 Jul 2019 16:50:49 +0200 Subject: [PATCH 07/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../hermes/infrastructure/zookeeper/ZookeeperPaths.java | 2 +- .../management/domain/health/HealthCheckScheduler.java | 4 +--- .../hermes/management/domain/health/HealthCheckTask.java | 6 +++--- .../management/domain/health/HealthCheckTaskTest.groovy | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java b/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java index 938782f40b..6b82146ee1 100644 --- a/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java +++ b/hermes-common/src/main/java/pl/allegro/tech/hermes/infrastructure/zookeeper/ZookeeperPaths.java @@ -23,7 +23,7 @@ public class ZookeeperPaths { public static final String BLACKLIST_PATH = "blacklist"; public static final String MAX_RATE_PATH = "max-rate"; public static final String MAX_RATE_HISTORY_PATH = "history"; - public static final String STORAGE_HEALTH_PATH = "storage/health"; + public static final String STORAGE_HEALTH_PATH = "storage-health"; private final String basePath; diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index dda9e1111e..442ff56ba9 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -10,7 +10,6 @@ import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClientManager; import javax.annotation.PostConstruct; -import javax.inject.Inject; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -24,10 +23,9 @@ public class HealthCheckScheduler { private final HealthCheckTask healthCheckTask; private final Long period; private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder().setNameFormat("health-check-scheduler-%d").build() + new ThreadFactoryBuilder().setNameFormat("storage-health-check-scheduler-%d").build() ); - @Inject public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, ZookeeperPaths zookeeperPaths, NodeDataProvider nodeDataProvider, diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index 1f54b7591a..d81cb35dc1 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -14,7 +14,7 @@ class HealthCheckTask implements Runnable { - private static final Logger LOGGER = LoggerFactory.getLogger(HealthCheckTask.class); + private static final Logger logger = LoggerFactory.getLogger(HealthCheckTask.class); private final Collection zookeeperClients; private final String healthCheckPath; @@ -42,10 +42,10 @@ private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { zookeeperClient.getCuratorFramework() .setData() .forPath(healthCheckPath, objectMapper.writeValueAsBytes(timestamp)); - LOGGER.info("ZooKeeper {} healthy.", zookeeperClient.getDatacenterName()); + logger.info("ZooKeeper {} healthy.", zookeeperClient.getDatacenterName()); return HealthCheckResult.HEALTHY; } catch (Exception e) { - LOGGER.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDatacenterName(), e); + logger.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDatacenterName(), e); return HealthCheckResult.UNHEALTHY; } } diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy index 06fc3a35cb..8e1aeabbb6 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/domain/health/HealthCheckTaskTest.groovy @@ -10,7 +10,7 @@ import pl.allegro.tech.hermes.management.utils.MultiZookeeperIntegrationTest class HealthCheckTaskTest extends MultiZookeeperIntegrationTest { - def healthCheckPath = '/hermes/storage/health/hostname_8080' + def healthCheckPath = '/hermes/storage-health/hostname_8080' def modeService = new ModeService() ZookeeperClientManager manager HealthCheckTask healthCheckTask From 3a57c43bc76ef5276d9c8bce4516e96278abc12a Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Thu, 4 Jul 2019 10:18:21 +0200 Subject: [PATCH 08/12] Auto switching to read only mode in hermes-management #1052 - refactoring --- .../management/domain/health/HealthCheckScheduler.java | 8 ++------ .../hermes/management/domain/health/HealthCheckTask.java | 5 +++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index 442ff56ba9..f5ec1df3e5 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -18,8 +18,6 @@ @ConditionalOnProperty(name = "management.health.enabled", havingValue = "true") public class HealthCheckScheduler { - private final ZookeeperClientManager zookeeperClientManager; - private final String healthCheckPath; private final HealthCheckTask healthCheckTask; private final Long period; private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( @@ -32,15 +30,13 @@ public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, ObjectMapper objectMapper, ModeService modeService, @Value("${management.health.periodSeconds}") Long periodSeconds) { - this.zookeeperClientManager = zookeeperClientManager; - this.healthCheckPath = zookeeperPaths.nodeHealthPathForManagementHost(nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); + String healthCheckPath = zookeeperPaths.nodeHealthPathForManagementHost(nodeDataProvider.getHostname(), nodeDataProvider.getServerPort()); this.period = periodSeconds; - this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), this.healthCheckPath, objectMapper, modeService); + this.healthCheckTask = new HealthCheckTask(zookeeperClientManager.getClients(), healthCheckPath, objectMapper, modeService); } @PostConstruct public void scheduleHealthCheck() { - zookeeperClientManager.getClients().forEach(client -> client.ensurePathExists(healthCheckPath)); executorService.scheduleAtFixedRate(healthCheckTask, 0, period, TimeUnit.SECONDS); } } diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index d81cb35dc1..1422798550 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -39,13 +39,14 @@ public void run() { private HealthCheckResult doHealthCheck(ZookeeperClient zookeeperClient) { final String timestamp = LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); try { + zookeeperClient.ensurePathExists(healthCheckPath); zookeeperClient.getCuratorFramework() .setData() .forPath(healthCheckPath, objectMapper.writeValueAsBytes(timestamp)); - logger.info("ZooKeeper {} healthy.", zookeeperClient.getDatacenterName()); + logger.info("Storage healthy for datacenter {}", zookeeperClient.getDatacenterName()); return HealthCheckResult.HEALTHY; } catch (Exception e) { - logger.error("Cannot connect to ZooKeeper {}.", zookeeperClient.getDatacenterName(), e); + logger.error("Storage health check failed for datacenter {}", zookeeperClient.getDatacenterName(), e); return HealthCheckResult.UNHEALTHY; } } From 4967e1f98736b517ae44eec9a64e451251760845 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Thu, 4 Jul 2019 10:28:57 +0200 Subject: [PATCH 09/12] Auto switching to read only mode in hermes-management #1052 - added logging in HealthCheckScheduler --- .../management/domain/health/HealthCheckScheduler.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java index f5ec1df3e5..19e452757e 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckScheduler.java @@ -2,6 +2,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.stereotype.Component; @@ -18,6 +20,8 @@ @ConditionalOnProperty(name = "management.health.enabled", havingValue = "true") public class HealthCheckScheduler { + private static final Logger logger = LoggerFactory.getLogger(HealthCheckScheduler.class); + private final HealthCheckTask healthCheckTask; private final Long period; private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( @@ -37,6 +41,7 @@ public HealthCheckScheduler(ZookeeperClientManager zookeeperClientManager, @PostConstruct public void scheduleHealthCheck() { + logger.info("Starting the storage health check scheduler"); executorService.scheduleAtFixedRate(healthCheckTask, 0, period, TimeUnit.SECONDS); } } From a07676b110cca214ba31cc41545f43666e71d159 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Thu, 4 Jul 2019 11:44:33 +0200 Subject: [PATCH 10/12] Auto switching to read only mode in hermes-management #1052 - fix port already in use --- .../management/utils/MultiZookeeperIntegrationTest.groovy | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy index 3418d177b5..f190361893 100644 --- a/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy +++ b/hermes-management/src/test/groovy/pl/allegro/tech/hermes/management/utils/MultiZookeeperIntegrationTest.groovy @@ -3,13 +3,14 @@ package pl.allegro.tech.hermes.management.utils import org.apache.curator.test.TestingServer import pl.allegro.tech.hermes.management.infrastructure.dc.DatacenterNameProvider import pl.allegro.tech.hermes.management.infrastructure.zookeeper.ZookeeperClient +import pl.allegro.tech.hermes.test.helper.util.Ports import spock.lang.Specification abstract class MultiZookeeperIntegrationTest extends Specification { - static final int DC_1_ZOOKEEPER_PORT = 9500 + static final int DC_1_ZOOKEEPER_PORT = Ports.nextAvailable() static final String DC_1_NAME = "dc1" - static final int DC_2_ZOOKEEPER_PORT = 9501 + static final int DC_2_ZOOKEEPER_PORT = Ports.nextAvailable() static final String DC_2_NAME = "dc2" TestingServer zookeeper1 From ba9013dc7cdbd2777e422f662adae091999bfca1 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Thu, 4 Jul 2019 12:07:55 +0200 Subject: [PATCH 11/12] Auto switching to read only mode in hermes-management #1052 - make HealthCheckResult private --- .../tech/hermes/management/domain/health/HealthCheckTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java index 1422798550..b0da7df969 100644 --- a/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java +++ b/hermes-management/src/main/java/pl/allegro/tech/hermes/management/domain/health/HealthCheckTask.java @@ -59,7 +59,7 @@ private void updateMode(List healthCheckResults) { } } - enum HealthCheckResult { + private enum HealthCheckResult { HEALTHY, UNHEALTHY } } From 8de06ea94c123103637debf2abf54eaaff198610 Mon Sep 17 00:00:00 2001 From: Jacek Ewertowski Date: Fri, 5 Jul 2019 11:17:24 +0200 Subject: [PATCH 12/12] Auto switching to read only mode in hermes-management #1052 - updated CHANGELOG.md --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3e183ad4e..efeedac991 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ ### ... +## 1.1.1 (05.07.2019) + +### Enhancements + +#### ([1052](https://github.com/allegro/hermes/issues/1052)) Auto switching to read only mode in hermes-management + +Hermes-management verifies whether all zookeeper clusters are available. + +It writes periodically a timestamp to each one of them. + +When the timestamp write fails on one of the zk clusters then management switches into ReadOnly mode. + +This feature is disabled by default. Enable with: + +```yaml +management: + health: + periodSeconds: 30 + enabled: true +``` + ## 1.1.0 (02.07.2019) ### Enhancements