diff --git a/artemis-commons/src/test/java/org/apache/activemq/artemis/utils/ThreadLeakCheckRule.java b/artemis-commons/src/test/java/org/apache/activemq/artemis/utils/ThreadLeakCheckRule.java index 3d6082bbde0..bd7f80866fa 100644 --- a/artemis-commons/src/test/java/org/apache/activemq/artemis/utils/ThreadLeakCheckRule.java +++ b/artemis-commons/src/test/java/org/apache/activemq/artemis/utils/ThreadLeakCheckRule.java @@ -270,6 +270,18 @@ private boolean isExpectedThread(Thread thread) { } else if (threadName.contains("ObjectCleanerThread")) { // Required since upgrade to Netty 4.1.22 maybe because https://github.com/netty/netty/commit/739e70398ccb6b11ffa97c6b5f8d55e455a2165e return true; + } else if (threadName.contains("RMI TCP")) { + return true; + } else if (threadName.contains("RMI Scheduler")) { + return true; + } else if (threadName.contains("RMI RenewClean")) { + return true; + } else if (threadName.contains("Signal Dispatcher")) { + return true; + } else if (threadName.contains("ForkJoinPool.commonPool")) { + return true; + } else if (threadName.contains("GC Daemon")) { + return true; } else { for (StackTraceElement element : thread.getStackTrace()) { if (element.getClassName().contains("org.jboss.byteman.agent.TransformListener")) { diff --git a/artemis-core-client/src/main/java/org/apache/activemq/artemis/api/config/ActiveMQDefaultConfiguration.java b/artemis-core-client/src/main/java/org/apache/activemq/artemis/api/config/ActiveMQDefaultConfiguration.java index f5771398a33..41351a2ea34 100644 --- a/artemis-core-client/src/main/java/org/apache/activemq/artemis/api/config/ActiveMQDefaultConfiguration.java +++ b/artemis-core-client/src/main/java/org/apache/activemq/artemis/api/config/ActiveMQDefaultConfiguration.java @@ -264,6 +264,9 @@ public static String getDefaultHapolicyBackupStrategy() { // the directory to store the journal files in private static String DEFAULT_JOURNAL_DIR = "data/journal"; + // the directory to store the data files in + private static String DEFAULT_DATA_DIR = "data"; + // true means that the journal directory will be created private static boolean DEFAULT_CREATE_JOURNAL_DIR = true; @@ -627,6 +630,8 @@ public static String getDefaultHapolicyBackupStrategy() { public static final String DEFAULT_TEMPORARY_QUEUE_NAMESPACE = ""; + private static final String DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME = "org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager"; + // Number of concurrent workers for a core bridge public static int DEFAULT_BRIDGE_CONCURRENCY = 1; @@ -938,6 +943,13 @@ public static String getDefaultJournalDir() { return DEFAULT_JOURNAL_DIR; } + /** + * the directory to store the journal files in + */ + public static String getDefaultDataDir() { + return DEFAULT_DATA_DIR; + } + /** * true means that the journal directory will be created */ @@ -1721,6 +1733,10 @@ public static String getDefaultTemporaryQueueNamespace() { return DEFAULT_TEMPORARY_QUEUE_NAMESPACE; } + public static String getDefaultDistributedPrimitiveManagerClassName() { + return DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME; + } + public static int getDefaultBridgeConcurrency() { return DEFAULT_BRIDGE_CONCURRENCY; } diff --git a/artemis-distribution/pom.xml b/artemis-distribution/pom.xml index eae42a6ca4d..443362a98f8 100644 --- a/artemis-distribution/pom.xml +++ b/artemis-distribution/pom.xml @@ -231,6 +231,17 @@ ${project.version} javadoc + + + org.apache.activemq + artemis-quorum-api + ${project.version} + + + org.apache.activemq + artemis-quorum-ri + ${project.version} + io.netty netty-all diff --git a/artemis-distribution/src/main/assembly/dep.xml b/artemis-distribution/src/main/assembly/dep.xml index 018266dc87b..f4d240fe00a 100644 --- a/artemis-distribution/src/main/assembly/dep.xml +++ b/artemis-distribution/src/main/assembly/dep.xml @@ -62,6 +62,9 @@ org.apache.activemq.rest:artemis-rest org.apache.qpid:qpid-jms-client io.micrometer:micrometer-core + + org.apache.activemq:artemis-quorum-api + org.apache.activemq:artemis-quorum-ri jakarta.jms:jakarta.jms-api @@ -97,6 +100,12 @@ com.sun.xml.bind:jaxb-impl jakarta.activation:jakarta.activation-api jakarta.security.auth.message:jakarta.security.auth.message-api + + org.apache.curator:curator-recipes + org.apache.curator:curator-client + org.apache.curator:curator-framework + org.apache.zookeeper:zookeeper + org.apache.zookeeper:zookeeper-jute mvn:org.apache.activemq/activemq-artemis-native/${activemq-artemis-native-version} + mvn:org.apache.activemq/artemis-quorum-api/${pom.version} mvn:org.apache.activemq/artemis-server-osgi/${pom.version} diff --git a/artemis-quorum-api/pom.xml b/artemis-quorum-api/pom.xml new file mode 100644 index 00000000000..febbe5e871c --- /dev/null +++ b/artemis-quorum-api/pom.xml @@ -0,0 +1,41 @@ + + + 4.0.0 + + + org.apache.activemq + artemis-pom + 2.18.0-SNAPSHOT + + + artemis-quorum-api + bundle + ActiveMQ Artemis Quorum API + + + ${project.basedir}/.. + + + + + com.google.errorprone + error_prone_core + + + \ No newline at end of file diff --git a/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedLock.java b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedLock.java new file mode 100644 index 00000000000..ac83a2c859f --- /dev/null +++ b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedLock.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum; + +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; + +public interface DistributedLock extends AutoCloseable { + + String getLockId(); + + boolean isHeldByCaller() throws UnavailableStateException; + + boolean tryLock() throws UnavailableStateException, InterruptedException; + + default boolean tryLock(long timeout, TimeUnit unit) throws UnavailableStateException, InterruptedException { + // it doesn't make sense to be super fast + final long TARGET_FIRE_PERIOD_NS = TimeUnit.MILLISECONDS.toNanos(250); + if (timeout < 0) { + throw new IllegalArgumentException("timeout cannot be negative"); + } + Objects.requireNonNull(unit); + if (timeout == 0) { + return tryLock(); + } + final Thread currentThread = Thread.currentThread(); + final long timeoutNs = unit.toNanos(timeout); + final long start = System.nanoTime(); + final long deadline = start + timeoutNs; + long expectedNextFireTime = start; + while (!currentThread.isInterrupted()) { + long parkNs = expectedNextFireTime - System.nanoTime(); + while (parkNs > 0) { + LockSupport.parkNanos(parkNs); + if (currentThread.isInterrupted()) { + throw new InterruptedException(); + } + final long now = System.nanoTime(); + parkNs = expectedNextFireTime - now; + } + if (tryLock()) { + return true; + } + final long now = System.nanoTime(); + final long remainingTime = deadline - now; + if (remainingTime <= 0) { + return false; + } + if (remainingTime < TARGET_FIRE_PERIOD_NS) { + expectedNextFireTime = now; + } else { + expectedNextFireTime += TARGET_FIRE_PERIOD_NS; + } + } + throw new InterruptedException(); + } + + void unlock() throws UnavailableStateException; + + void addListener(UnavailableLockListener listener); + + void removeListener(UnavailableLockListener listener); + + @FunctionalInterface + interface UnavailableLockListener { + + void onUnavailableLockEvent(); + } + + @Override + void close(); +} diff --git a/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedPrimitiveManager.java b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedPrimitiveManager.java new file mode 100644 index 00000000000..8906a1c856c --- /dev/null +++ b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/DistributedPrimitiveManager.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum; + +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +public interface DistributedPrimitiveManager extends AutoCloseable { + + static DistributedPrimitiveManager newInstanceOf(String className, Map properties) throws Exception { + return (DistributedPrimitiveManager) Class.forName(className).getDeclaredConstructor(Map.class).newInstance(properties); + } + + @FunctionalInterface + interface UnavailableManagerListener { + + void onUnavailableManagerEvent(); + } + + void addUnavailableManagerListener(UnavailableManagerListener listener); + + void removeUnavailableManagerListener(UnavailableManagerListener listener); + + boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException; + + void start() throws InterruptedException, ExecutionException; + + boolean isStarted(); + + void stop(); + + DistributedLock getDistributedLock(String lockId) throws InterruptedException, ExecutionException, TimeoutException; + + @Override + default void close() { + stop(); + } +} \ No newline at end of file diff --git a/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/UnavailableStateException.java b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/UnavailableStateException.java new file mode 100644 index 00000000000..2ae88c657f6 --- /dev/null +++ b/artemis-quorum-api/src/main/java/org/apache/activemq/artemis/quorum/UnavailableStateException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum; + +public final class UnavailableStateException extends Exception { + + public UnavailableStateException() { + super(); + } + + public UnavailableStateException(String message) { + super(message); + } + + public UnavailableStateException(String message, Throwable cause) { + super(message, cause); + } + + public UnavailableStateException(Throwable cause) { + super(cause); + } +} diff --git a/artemis-quorum-ri/pom.xml b/artemis-quorum-ri/pom.xml new file mode 100644 index 00000000000..83c81678af4 --- /dev/null +++ b/artemis-quorum-ri/pom.xml @@ -0,0 +1,124 @@ + + + 4.0.0 + + + org.apache.activemq + artemis-pom + 2.18.0-SNAPSHOT + + + artemis-quorum-ri + jar + ActiveMQ Artemis Quorum RI + + + ${project.basedir}/.. + + + + + org.apache.curator + curator-recipes + + + org.apache.curator + curator-client + + + org.apache.zookeeper + zookeeper + + + org.apache.curator + curator-test + ${curator.version} + + + org.apache.activemq + artemis-quorum-api + ${project.version} + + + org.jboss.logging + jboss-logging + + + org.apache.activemq + artemis-commons + ${project.version} + + + com.google.errorprone + error_prone_core + + + + junit + junit + test + + + org.hamcrest + hamcrest + ${hamcrest.version} + test + + + + org.jboss.logging + jboss-logging-processor + provided + true + + + org.jboss.logmanager + jboss-logmanager + test + + + org.wildfly.common + wildfly-common + test + + + org.apache.activemq + artemis-commons + ${project.version} + test + test-jar + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + test + + test-jar + + + + + + + \ No newline at end of file diff --git a/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileBasedPrimitiveManager.java b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileBasedPrimitiveManager.java new file mode 100644 index 00000000000..99eaf160cf0 --- /dev/null +++ b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileBasedPrimitiveManager.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.file; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; + +/** + * This is an implementation suitable to be used just on unit tests and it won't attempt + * to manage nor purge existing stale locks files. It's part of the tests life-cycle to properly + * set-up and tear-down the environment. + */ +public class FileBasedPrimitiveManager implements DistributedPrimitiveManager { + + private final File locksFolder; + private final Map locks; + private boolean started; + + public FileBasedPrimitiveManager(Map args) { + this(new File(args.get("locks-folder"))); + } + + public FileBasedPrimitiveManager(File locksFolder) { + Objects.requireNonNull(locksFolder); + if (!locksFolder.exists()) { + throw new IllegalStateException(locksFolder + " is supposed to already exists"); + } + if (!locksFolder.isDirectory()) { + throw new IllegalStateException(locksFolder + " is supposed to be a directory"); + } + this.locksFolder = locksFolder; + this.locks = new HashMap<>(); + } + + @Override + public boolean isStarted() { + return started; + } + + @Override + public void addUnavailableManagerListener(UnavailableManagerListener listener) { + // noop + } + + @Override + public void removeUnavailableManagerListener(UnavailableManagerListener listener) { + // noop + } + + @Override + public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException { + if (timeout >= 0) { + Objects.requireNonNull(unit); + } + if (started) { + return true; + } + started = true; + return true; + } + + @Override + public void start() throws InterruptedException, ExecutionException { + start(-1, null); + } + + @Override + public void stop() { + if (!started) { + return; + } + try { + locks.forEach((lockId, lock) -> { + try { + lock.close(false); + } catch (Throwable t) { + // TODO no op for now: log would be better! + } + }); + locks.clear(); + } finally { + started = false; + } + } + + @Override + public DistributedLock getDistributedLock(String lockId) throws ExecutionException { + Objects.requireNonNull(lockId); + if (!started) { + throw new IllegalStateException("manager should be started first"); + } + final FileDistributedLock lock = locks.get(lockId); + if (lock != null && !lock.isClosed()) { + return lock; + } + try { + final FileDistributedLock newLock = new FileDistributedLock(locks::remove, locksFolder, lockId); + locks.put(lockId, newLock); + return newLock; + } catch (IOException ioEx) { + throw new ExecutionException(ioEx); + } + } +} diff --git a/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileDistributedLock.java b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileDistributedLock.java new file mode 100644 index 00000000000..5e749698d63 --- /dev/null +++ b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/file/FileDistributedLock.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.file; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.channels.OverlappingFileLockException; +import java.nio.file.StandardOpenOption; +import java.util.function.Consumer; + +import org.apache.activemq.artemis.quorum.DistributedLock; + +final class FileDistributedLock implements DistributedLock { + + private final String lockId; + private final Consumer onClosedLock; + private boolean closed; + private FileLock fileLock; + private final FileChannel channel; + + FileDistributedLock(Consumer onClosedLock, File locksFolder, String lockId) throws IOException { + this.onClosedLock = onClosedLock; + this.lockId = lockId; + this.closed = false; + this.fileLock = null; + this.channel = FileChannel.open(new File(locksFolder, lockId).toPath(), StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE); + } + + private void checkNotClosed() { + if (closed) { + throw new IllegalStateException("This lock is closed"); + } + } + + @Override + public String getLockId() { + checkNotClosed(); + return lockId; + } + + @Override + public boolean isHeldByCaller() { + checkNotClosed(); + final FileLock fileLock = this.fileLock; + if (fileLock == null) { + return false; + } + return fileLock.isValid(); + } + + @Override + public boolean tryLock() { + checkNotClosed(); + final FileLock fileLock = this.fileLock; + if (fileLock != null) { + throw new IllegalStateException("unlock first"); + } + final FileLock lock; + try { + lock = channel.tryLock(); + } catch (OverlappingFileLockException o) { + // this process already hold this lock, but not this manager + return false; + } catch (Throwable t) { + throw new IllegalStateException(t); + } + if (lock == null) { + return false; + } + this.fileLock = lock; + return true; + } + + @Override + public void unlock() { + checkNotClosed(); + final FileLock fileLock = this.fileLock; + if (fileLock != null) { + this.fileLock = null; + try { + fileLock.close(); + } catch (IOException e) { + // noop + } + } + } + + @Override + public void addListener(UnavailableLockListener listener) { + checkNotClosed(); + // noop + } + + @Override + public void removeListener(UnavailableLockListener listener) { + checkNotClosed(); + // noop + } + + public boolean isClosed() { + return closed; + } + + public void close(boolean useCallback) { + if (closed) { + return; + } + try { + if (useCallback) { + onClosedLock.accept(lockId); + } + unlock(); + channel.close(); + } catch (IOException e) { + // ignore it + } finally { + closed = true; + } + } + + @Override + public void close() { + close(true); + } +} diff --git a/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLock.java b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLock.java new file mode 100644 index 00000000000..b3126dd68ee --- /dev/null +++ b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLock.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.zookeeper; + +import java.util.Arrays; +import java.util.UUID; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.UnavailableStateException; +import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2; +import org.apache.curator.framework.recipes.locks.Lease; + +final class CuratorDistributedLock implements DistributedLock { + + // this is used to prevent deadlocks on close + private final CuratorDistributedPrimitiveManager manager; + private final String lockId; + private final InterProcessSemaphoreV2 ipcSem; + private final Consumer onClose; + private final CopyOnWriteArrayList listeners; + private Lease lease; + private boolean unavailable; + private boolean closed; + private byte[] leaseVersion; + + CuratorDistributedLock(CuratorDistributedPrimitiveManager manager, + String lockId, + InterProcessSemaphoreV2 ipcSem, + Consumer onClose) { + this.manager = manager; + this.lockId = lockId; + this.ipcSem = ipcSem; + this.onClose = onClose; + this.listeners = new CopyOnWriteArrayList<>(); + this.closed = false; + this.unavailable = false; + this.leaseVersion = null; + } + + protected void onReconnected() { + synchronized (manager) { + if (closed || unavailable) { + return; + } + if (leaseVersion != null) { + assert lease != null; + try { + if (Arrays.equals(lease.getData(), leaseVersion)) { + return; + } + onLost(); + } catch (Exception e) { + onLost(); + } + } + } + } + + protected void onLost() { + synchronized (manager) { + if (closed || unavailable) { + return; + } + lease = null; + leaseVersion = null; + unavailable = true; + for (UnavailableLockListener listener : listeners) { + listener.onUnavailableLockEvent(); + } + } + } + + protected void onSuspended() { + synchronized (manager) { + if (closed || unavailable) { + return; + } + } + } + + @Override + public String getLockId() { + return lockId; + } + + private void checkNotClosed() { + if (closed) { + throw new IllegalStateException("This lock is closed"); + } + } + + @Override + public boolean isHeldByCaller() throws UnavailableStateException { + synchronized (manager) { + manager.checkHandlingEvents(); + checkNotClosed(); + if (unavailable) { + throw new UnavailableStateException(lockId + " lock state isn't available"); + } + if (lease == null) { + return false; + } + assert leaseVersion != null; + try { + return Arrays.equals(lease.getData(), leaseVersion); + } catch (Throwable t) { + throw new UnavailableStateException(t); + } + } + } + + @Override + public boolean tryLock() throws UnavailableStateException, InterruptedException { + synchronized (manager) { + manager.checkHandlingEvents(); + checkNotClosed(); + if (lease != null) { + throw new IllegalStateException("unlock first"); + } + if (unavailable) { + throw new UnavailableStateException(lockId + " lock state isn't available"); + } + try { + final byte[] leaseVersion = UUID.randomUUID().toString().getBytes(); + ipcSem.setNodeData(leaseVersion); + lease = ipcSem.acquire(0, TimeUnit.NANOSECONDS); + if (lease == null) { + ipcSem.setNodeData(null); + return false; + } + this.leaseVersion = leaseVersion; + assert Arrays.equals(lease.getData(), leaseVersion); + return true; + } catch (InterruptedException ie) { + throw ie; + } catch (Throwable e) { + throw new UnavailableStateException(e); + } + } + } + + @Override + public void unlock() throws UnavailableStateException { + synchronized (manager) { + manager.checkHandlingEvents(); + checkNotClosed(); + if (unavailable) { + throw new UnavailableStateException(lockId + " lock state isn't available"); + } + final Lease lease = this.lease; + if (lease != null) { + this.lease = null; + this.leaseVersion = null; + try { + ipcSem.returnLease(lease); + } catch (Throwable e) { + throw new UnavailableStateException(e); + } + } + } + } + + @Override + public void addListener(UnavailableLockListener listener) { + synchronized (manager) { + manager.checkHandlingEvents(); + checkNotClosed(); + listeners.add(listener); + if (unavailable) { + manager.startHandlingEvents(); + try { + listener.onUnavailableLockEvent(); + } finally { + manager.completeHandlingEvents(); + } + } + } + } + + @Override + public void removeListener(UnavailableLockListener listener) { + synchronized (manager) { + manager.checkHandlingEvents(); + checkNotClosed(); + listeners.remove(listener); + } + } + + public void close(boolean useCallback) { + synchronized (manager) { + manager.checkHandlingEvents(); + if (closed) { + return; + } + closed = true; + listeners.clear(); + if (useCallback) { + onClose.accept(this); + } + final Lease lease = this.lease; + if (lease == null) { + return; + } + this.lease = null; + if (unavailable) { + return; + } + try { + ipcSem.returnLease(lease); + } catch (Throwable t) { + // TODO silent, but debug ;) + } + } + } + + @Override + public void close() { + close(true); + } +} diff --git a/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedPrimitiveManager.java b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedPrimitiveManager.java new file mode 100644 index 00000000000..2cc3102592f --- /dev/null +++ b/artemis-quorum-ri/src/main/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedPrimitiveManager.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.zookeeper; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2; +import org.apache.curator.framework.state.ConnectionState; +import org.apache.curator.framework.state.ConnectionStateListener; +import org.apache.curator.retry.RetryForever; +import org.apache.curator.retry.RetryNTimes; + +import static java.util.stream.Collectors.joining; + +public class CuratorDistributedPrimitiveManager implements DistributedPrimitiveManager, ConnectionStateListener { + + private static final String CONNECT_STRING_PARAM = "connect-string"; + private static final String NAMESPACE_PARAM = "namespace"; + private static final String SESSION_MS_PARAM = "session-ms"; + private static final String SESSION_PERCENT_PARAM = "session-percent"; + private static final String CONNECTION_MS_PARAM = "connection-ms"; + private static final String RETRIES_PARAM = "retries"; + private static final String RETRIES_MS_PARAM = "retries-ms"; + private static final Set VALID_PARAMS = Stream.of( + CONNECT_STRING_PARAM, + NAMESPACE_PARAM, + SESSION_MS_PARAM, + SESSION_PERCENT_PARAM, + CONNECTION_MS_PARAM, + RETRIES_PARAM, + RETRIES_MS_PARAM).collect(Collectors.toSet()); + private static final String VALID_PARAMS_ON_ERROR = VALID_PARAMS.stream().collect(joining(",")); + // It's 9 times the default ZK tick time ie 2000 ms + private static final String DEFAULT_SESSION_TIMEOUT_MS = Integer.toString(18_000); + private static final String DEFAULT_CONNECTION_TIMEOUT_MS = Integer.toString(8_000); + private static final String DEFAULT_RETRIES = Integer.toString(1); + private static final String DEFAULT_RETRIES_MS = Integer.toString(1000); + // why 1/3 of the session? https://cwiki.apache.org/confluence/display/CURATOR/TN14 + private static final String DEFAULT_SESSION_PERCENT = Integer.toString(33); + + private static Map validateParameters(Map config) { + config.forEach((parameterName, ignore) -> validateParameter(parameterName)); + return config; + } + + private static void validateParameter(String parameterName) { + if (!VALID_PARAMS.contains(parameterName)) { + throw new IllegalArgumentException("non existent parameter " + parameterName + ": accepted list is " + VALID_PARAMS_ON_ERROR); + } + } + + private volatile CuratorFramework client; + private final Map locks; + private CopyOnWriteArrayList listeners; + private boolean unavailable; + private boolean handlingEvents; + private final CuratorFrameworkFactory.Builder curatorBuilder; + + public CuratorDistributedPrimitiveManager(Map config) { + this(validateParameters(config), true); + } + + private CuratorDistributedPrimitiveManager(Map config, boolean ignore) { + this(config.get(CONNECT_STRING_PARAM), + config.get(NAMESPACE_PARAM), + Integer.parseInt(config.getOrDefault(SESSION_MS_PARAM, DEFAULT_SESSION_TIMEOUT_MS)), + Integer.parseInt(config.getOrDefault(SESSION_PERCENT_PARAM, DEFAULT_SESSION_PERCENT)), + Integer.parseInt(config.getOrDefault(CONNECTION_MS_PARAM, DEFAULT_CONNECTION_TIMEOUT_MS)), + Integer.parseInt(config.getOrDefault(RETRIES_PARAM, DEFAULT_RETRIES)), + Integer.parseInt(config.getOrDefault(RETRIES_MS_PARAM, DEFAULT_RETRIES_MS))); + } + + private CuratorDistributedPrimitiveManager(String connectString, + String namespace, + int sessionMs, + int sessionPercent, + int connectionMs, + int retries, + int retriesMs) { + curatorBuilder = CuratorFrameworkFactory.builder() + .connectString(connectString) + .namespace(namespace) + .sessionTimeoutMs(sessionMs) + .connectionTimeoutMs(connectionMs) + .retryPolicy(retries >= 0 ? new RetryNTimes(retries, retriesMs) : new RetryForever(retriesMs)) + .simulatedSessionExpirationPercent(sessionPercent); + this.locks = new HashMap<>(); + this.listeners = null; + this.unavailable = false; + this.handlingEvents = false; + } + + @Override + public synchronized boolean isStarted() { + checkHandlingEvents(); + return client != null; + } + + @Override + public synchronized void addUnavailableManagerListener(UnavailableManagerListener listener) { + checkHandlingEvents(); + if (listeners == null) { + return; + } + listeners.add(listener); + if (unavailable) { + handlingEvents = true; + try { + listener.onUnavailableManagerEvent(); + } finally { + handlingEvents = true; + } + } + } + + @Override + public synchronized void removeUnavailableManagerListener(UnavailableManagerListener listener) { + checkHandlingEvents(); + if (listeners == null) { + return; + } + listeners.remove(listener); + } + + @Override + public synchronized boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException { + checkHandlingEvents(); + if (timeout >= 0) { + if (timeout > Integer.MAX_VALUE) { + throw new IllegalArgumentException("curator manager won't support too long timeout ie >" + Integer.MAX_VALUE); + } + Objects.requireNonNull(unit); + } + if (client != null) { + return true; + } + final CuratorFramework client = curatorBuilder.build(); + try { + client.start(); + if (!client.blockUntilConnected((int) timeout, unit)) { + client.close(); + return false; + } + this.client = client; + this.listeners = new CopyOnWriteArrayList<>(); + client.getConnectionStateListenable().addListener(this); + return true; + } catch (InterruptedException e) { + client.close(); + throw e; + } + } + + @Override + public synchronized void start() throws InterruptedException, ExecutionException { + start(-1, null); + } + + @Override + public synchronized void stop() { + checkHandlingEvents(); + final CuratorFramework client = this.client; + if (client == null) { + return; + } + this.client = null; + unavailable = false; + listeners.clear(); + this.listeners = null; + client.getConnectionStateListenable().removeListener(this); + locks.forEach((lockId, lock) -> { + try { + lock.close(false); + } catch (Throwable t) { + // TODO log? + } + }); + locks.clear(); + client.close(); + } + + @Override() + public synchronized DistributedLock getDistributedLock(String lockId) { + checkHandlingEvents(); + Objects.requireNonNull(lockId); + if (client == null) { + throw new IllegalStateException("manager isn't started yet!"); + } + final CuratorDistributedLock lock = locks.get(lockId); + if (lock != null) { + return lock; + } + final Consumer onCloseLock = closedLock -> { + synchronized (this) { + final boolean alwaysTrue = locks.remove(closedLock.getLockId(), closedLock); + assert alwaysTrue; + } + }; + final CuratorDistributedLock newLock = new CuratorDistributedLock(this, lockId, new InterProcessSemaphoreV2(client, "/locks/" + lockId, 1), onCloseLock); + locks.put(lockId, newLock); + if (unavailable) { + handlingEvents = true; + try { + newLock.onLost(); + } finally { + handlingEvents = false; + } + } + return newLock; + } + + protected void startHandlingEvents() { + handlingEvents = true; + } + + protected void completeHandlingEvents() { + handlingEvents = false; + } + + protected void checkHandlingEvents() { + if (client == null) { + return; + } + if (handlingEvents) { + throw new IllegalStateException("UnavailableManagerListener isn't supposed to modify the manager or its primitives on event handling!"); + } + } + + @Override + public synchronized void stateChanged(CuratorFramework client, ConnectionState newState) { + if (this.client != client) { + return; + } + if (unavailable) { + return; + } + handlingEvents = true; + try { + switch (newState) { + case LOST: + unavailable = true; + listeners.forEach(listener -> listener.onUnavailableManagerEvent()); + locks.forEach((s, curatorDistributedLock) -> curatorDistributedLock.onLost()); + break; + case RECONNECTED: + locks.forEach((s, curatorDistributedLock) -> curatorDistributedLock.onReconnected()); + break; + case SUSPENDED: + locks.forEach((s, curatorDistributedLock) -> curatorDistributedLock.onSuspended()); + break; + } + } finally { + handlingEvents = false; + } + } +} diff --git a/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/DistributedLockTest.java b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/DistributedLockTest.java new file mode 100644 index 00000000000..e3cf690a505 --- /dev/null +++ b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/DistributedLockTest.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Consumer; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +public abstract class DistributedLockTest { + + private final ArrayList closeables = new ArrayList<>(); + + @Before + public void setupEnv() throws Throwable { + } + + protected abstract void configureManager(Map config); + + protected abstract String managerClassName(); + + @After + public void tearDownEnv() throws Throwable { + closeables.forEach(closeables -> { + try { + closeables.close(); + } catch (Throwable t) { + // silent here + } + }); + } + + protected DistributedPrimitiveManager createManagedDistributeManager() { + return createManagedDistributeManager(stringStringMap -> { + }); + } + + protected DistributedPrimitiveManager createManagedDistributeManager(Consumer> defaultConfiguration) { + try { + final HashMap config = new HashMap<>(); + configureManager(config); + defaultConfiguration.accept(config); + final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config); + closeables.add(manager); + return manager; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void managerReturnsSameLockIfNotClosed() throws ExecutionException, InterruptedException, TimeoutException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + Assert.assertSame(manager.getDistributedLock("a"), manager.getDistributedLock("a")); + } + + @Test(expected = IllegalStateException.class) + public void managerCannotGetLockIfNotStarted() throws ExecutionException, InterruptedException, TimeoutException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.getDistributedLock("a"); + } + + @Test(expected = NullPointerException.class) + public void managerCannotGetLockWithNullLockId() throws ExecutionException, InterruptedException, TimeoutException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + manager.getDistributedLock(null); + } + + @Test + public void managerStopUnlockLocks() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + Assert.assertTrue(manager.getDistributedLock("a").tryLock()); + Assert.assertTrue(manager.getDistributedLock("b").tryLock()); + manager.stop(); + manager.start(); + Assert.assertFalse(manager.getDistributedLock("a").isHeldByCaller()); + Assert.assertFalse(manager.getDistributedLock("b").isHeldByCaller()); + } + + @Test + public void acquireAndReleaseLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertFalse(lock.isHeldByCaller()); + Assert.assertTrue(lock.tryLock()); + Assert.assertTrue(lock.isHeldByCaller()); + lock.unlock(); + Assert.assertFalse(lock.isHeldByCaller()); + } + + @Test(expected = IllegalStateException.class) + public void cannotAcquireSameLockTwice() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertTrue(lock.tryLock()); + lock.tryLock(); + } + + @Test + public void heldLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager ownerManager = createManagedDistributeManager(); + DistributedPrimitiveManager observerManager = createManagedDistributeManager(); + ownerManager.start(); + observerManager.start(); + Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock()); + Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller()); + Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller()); + } + + @Test + public void unlockedLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager ownerManager = createManagedDistributeManager(); + DistributedPrimitiveManager observerManager = createManagedDistributeManager(); + ownerManager.start(); + observerManager.start(); + Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock()); + ownerManager.getDistributedLock("a").unlock(); + Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller()); + Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller()); + } + + @Test + public void cannotAcquireSameLockFromDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager ownerManager = createManagedDistributeManager(); + DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager(); + ownerManager.start(); + notOwnerManager.start(); + Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock()); + Assert.assertFalse(notOwnerManager.getDistributedLock("a").tryLock()); + } + + @Test + public void cannotUnlockFromNotOwnerManager() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager ownerManager = createManagedDistributeManager(); + DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager(); + ownerManager.start(); + notOwnerManager.start(); + Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock()); + notOwnerManager.getDistributedLock("a").unlock(); + Assert.assertFalse(notOwnerManager.getDistributedLock("a").isHeldByCaller()); + Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller()); + } + + @Test + public void timedTryLockSucceedWithShortTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock backgroundLock = manager.getDistributedLock("a"); + Assert.assertTrue(backgroundLock.tryLock(1, TimeUnit.NANOSECONDS)); + } + + @Test + public void timedTryLockFailAfterTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedPrimitiveManager otherManager = createManagedDistributeManager(); + otherManager.start(); + Assert.assertTrue(otherManager.getDistributedLock("a").tryLock()); + final long start = System.nanoTime(); + final long timeoutSec = 1; + Assert.assertFalse(manager.getDistributedLock("a").tryLock(timeoutSec, TimeUnit.SECONDS)); + final long elapsed = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start); + assertThat(elapsed, greaterThanOrEqualTo(timeoutSec)); + } + + @Test + public void timedTryLockSuccess() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedPrimitiveManager otherManager = createManagedDistributeManager(); + otherManager.start(); + Assert.assertTrue(otherManager.getDistributedLock("a").tryLock()); + DistributedLock backgroundLock = manager.getDistributedLock("a"); + CompletableFuture acquired = new CompletableFuture<>(); + CountDownLatch startedTry = new CountDownLatch(1); + Thread tryLockThread = new Thread(() -> { + startedTry.countDown(); + try { + if (!backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS)) { + acquired.complete(false); + } else { + acquired.complete(true); + } + } catch (Throwable e) { + acquired.complete(false); + } + }); + tryLockThread.start(); + Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS)); + otherManager.getDistributedLock("a").unlock(); + Assert.assertTrue(acquired.get(4, TimeUnit.SECONDS)); + } + + @Test + public void interruptStopTimedTryLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedPrimitiveManager otherManager = createManagedDistributeManager(); + otherManager.start(); + Assert.assertTrue(otherManager.getDistributedLock("a").tryLock()); + DistributedLock backgroundLock = manager.getDistributedLock("a"); + CompletableFuture interrupted = new CompletableFuture<>(); + CountDownLatch startedTry = new CountDownLatch(1); + Thread tryLockThread = new Thread(() -> { + startedTry.countDown(); + try { + backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS); + interrupted.complete(false); + } catch (UnavailableStateException e) { + interrupted.complete(false); + } catch (InterruptedException e) { + interrupted.complete(true); + } + }); + tryLockThread.start(); + Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS)); + // let background lock to perform some tries + TimeUnit.SECONDS.sleep(1); + tryLockThread.interrupt(); + Assert.assertTrue(interrupted.get(4, TimeUnit.SECONDS)); + } + +} + diff --git a/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/file/FileDistributedLockTest.java b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/file/FileDistributedLockTest.java new file mode 100644 index 00000000000..c565c6b4110 --- /dev/null +++ b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/file/FileDistributedLockTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.file; + +import java.io.File; +import java.lang.reflect.InvocationTargetException; +import java.util.Collections; +import java.util.Map; + +import org.apache.activemq.artemis.quorum.DistributedLockTest; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class FileDistributedLockTest extends DistributedLockTest { + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + private File locksFolder; + + @Before + @Override + public void setupEnv() throws Throwable { + locksFolder = tmpFolder.newFolder("locks-folder"); + super.setupEnv(); + } + + @Override + protected void configureManager(Map config) { + config.put("locks-folder", locksFolder.toString()); + } + + @Override + protected String managerClassName() { + return FileBasedPrimitiveManager.class.getName(); + } + + @Test + public void reflectiveManagerCreation() throws Exception { + DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", locksFolder.toString())); + } + + @Test(expected = InvocationTargetException.class) + public void reflectiveManagerCreationFailWithoutLocksFolder() throws Exception { + DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.emptyMap()); + } + + @Test(expected = InvocationTargetException.class) + public void reflectiveManagerCreationFailIfLocksFolderIsNotFolder() throws Exception { + DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", tmpFolder.newFile().toString())); + } + +} diff --git a/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLockTest.java b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLockTest.java new file mode 100644 index 00000000000..571f27eda0c --- /dev/null +++ b/artemis-quorum-ri/src/test/java/org/apache/activemq/artemis/quorum/zookeeper/CuratorDistributedLockTest.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.quorum.zookeeper; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import com.google.common.base.Predicates; +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.apache.activemq.artemis.quorum.UnavailableStateException; +import org.apache.activemq.artemis.utils.Wait; +import org.apache.curator.test.InstanceSpec; +import org.apache.curator.test.TestingCluster; + +import org.apache.activemq.artemis.quorum.DistributedLockTest; +import org.apache.curator.test.TestingZooKeeperServer; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static java.lang.Boolean.TRUE; +import static org.hamcrest.Matchers.greaterThan; + +@RunWith(value = Parameterized.class) +public class CuratorDistributedLockTest extends DistributedLockTest { + + private static final int BASE_SERVER_PORT = 6666; + private static final int CONNECTION_MS = 2000; + // Beware: the server tick must be small enough that to let the session to be correctly expired + private static final int SESSION_MS = 6000; + private static final int SERVER_TICK_MS = 2000; + private static final int RETRIES_MS = 100; + private static final int RETRIES = 1; + + @Parameterized.Parameter + public int nodes; + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + private TestingCluster testingServer; + private InstanceSpec[] clusterSpecs; + private String connectString; + + @Parameterized.Parameters(name = "nodes={0}") + public static Iterable getTestParameters() { + return Arrays.asList(new Object[][]{{3}, {5}}); + } + + @Override + public void setupEnv() throws Throwable { + clusterSpecs = new InstanceSpec[nodes]; + for (int i = 0; i < nodes; i++) { + clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1); + } + testingServer = new TestingCluster(clusterSpecs); + testingServer.start(); + connectString = testingServer.getConnectString(); + super.setupEnv(); + } + + @Override + public void tearDownEnv() throws Throwable { + super.tearDownEnv(); + testingServer.close(); + } + + @Override + protected void configureManager(Map config) { + config.put("connect-string", connectString); + config.put("session-ms", Integer.toString(SESSION_MS)); + config.put("connection-ms", Integer.toString(CONNECTION_MS)); + config.put("retries", Integer.toString(RETRIES)); + config.put("retries-ms", Integer.toString(RETRIES_MS)); + } + + @Override + protected String managerClassName() { + return CuratorDistributedPrimitiveManager.class.getName(); + } + + @Test(expected = RuntimeException.class) + public void cannotCreateManagerWithNotValidParameterNames() { + final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("_", "_")); + } + + @Test + public void canAcquireLocksFromDifferentNamespace() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + final DistributedPrimitiveManager manager1 = createManagedDistributeManager(config -> config.put("namespace", "1")); + manager1.start(); + final DistributedPrimitiveManager manager2 = createManagedDistributeManager(config -> config.put("namespace", "2")); + manager2.start(); + Assert.assertTrue(manager1.getDistributedLock("a").tryLock()); + Assert.assertTrue(manager2.getDistributedLock("a").tryLock()); + } + + @Test + public void cannotStartManagerWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + testingServer.close(); + Assert.assertFalse(manager.start(1, TimeUnit.SECONDS)); + } + + @Test(expected = UnavailableStateException.class) + public void cannotAcquireLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + final DistributedLock lock = manager.getDistributedLock("a"); + final CountDownLatch notAvailable = new CountDownLatch(1); + final DistributedLock.UnavailableLockListener listener = notAvailable::countDown; + lock.addListener(listener); + testingServer.close(); + Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS)); + lock.tryLock(); + } + + @Test(expected = UnavailableStateException.class) + public void cannotTryLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + final DistributedLock lock = manager.getDistributedLock("a"); + testingServer.close(); + lock.tryLock(); + } + + @Test(expected = UnavailableStateException.class) + public void cannotCheckLockStatusWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + final DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertFalse(lock.isHeldByCaller()); + Assert.assertTrue(lock.tryLock()); + testingServer.close(); + lock.isHeldByCaller(); + } + + @Test(expected = UnavailableStateException.class) + public void looseLockAfterServerStop() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException, IOException { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + final DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertTrue(lock.tryLock()); + Assert.assertTrue(lock.isHeldByCaller()); + final CountDownLatch notAvailable = new CountDownLatch(1); + final DistributedLock.UnavailableLockListener listener = notAvailable::countDown; + lock.addListener(listener); + Assert.assertEquals(1, notAvailable.getCount()); + testingServer.close(); + Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS)); + lock.isHeldByCaller(); + } + + @Test + public void canAcquireLockOnMajorityRestart() throws Exception { + final DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + final DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertTrue(lock.tryLock()); + Assert.assertTrue(lock.isHeldByCaller()); + final CountDownLatch notAvailable = new CountDownLatch(1); + final DistributedLock.UnavailableLockListener listener = notAvailable::countDown; + lock.addListener(listener); + Assert.assertEquals(1, notAvailable.getCount()); + testingServer.stop(); + notAvailable.await(); + manager.stop(); + restartMajorityNodes(true); + final DistributedPrimitiveManager otherManager = createManagedDistributeManager(); + otherManager.start(); + // await more then the expected value, that depends by how curator session expiration is configured + TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS); + Assert.assertTrue(otherManager.getDistributedLock("a").tryLock()); + } + + @Test + public void cannotStartManagerWithoutQuorum() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + stopMajorityNotLeaderNodes(true); + Assert.assertFalse(manager.start(2, TimeUnit.SECONDS)); + Assert.assertFalse(manager.isStarted()); + } + + @Test(expected = UnavailableStateException.class) + public void cannotAcquireLockWithoutQuorum() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + stopMajorityNotLeaderNodes(true); + DistributedLock lock = manager.getDistributedLock("a"); + lock.tryLock(); + } + + @Test + public void cannotCheckLockWithoutQuorum() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + stopMajorityNotLeaderNodes(true); + DistributedLock lock = manager.getDistributedLock("a"); + final boolean held; + try { + held = lock.isHeldByCaller(); + } catch (UnavailableStateException expected) { + return; + } + Assert.assertFalse(held); + } + + @Test + public void canGetLockWithoutQuorum() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + stopMajorityNotLeaderNodes(true); + DistributedLock lock = manager.getDistributedLock("a"); + Assert.assertNotNull(lock); + } + + @Test + public void notifiedAsUnavailableWhileLoosingQuorum() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock lock = manager.getDistributedLock("a"); + CountDownLatch unavailable = new CountDownLatch(1); + lock.addListener(unavailable::countDown); + stopMajorityNotLeaderNodes(true); + Assert.assertTrue(unavailable.await(SESSION_MS + SERVER_TICK_MS, TimeUnit.MILLISECONDS)); + } + + @Test + public void beNotifiedOnce() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock lock = manager.getDistributedLock("a"); + final AtomicInteger unavailableManager = new AtomicInteger(0); + final AtomicInteger unavailableLock = new AtomicInteger(0); + manager.addUnavailableManagerListener(unavailableManager::incrementAndGet); + lock.addListener(unavailableLock::incrementAndGet); + stopMajorityNotLeaderNodes(true); + TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS + CONNECTION_MS); + Assert.assertEquals(1, unavailableLock.get()); + Assert.assertEquals(1, unavailableManager.get()); + } + + @Test + public void beNotifiedOfUnavailabilityWhileBlockedOnTimedLock() throws Exception { + Assume.assumeThat(nodes, greaterThan(1)); + DistributedPrimitiveManager manager = createManagedDistributeManager(); + manager.start(); + DistributedLock lock = manager.getDistributedLock("a"); + final AtomicInteger unavailableManager = new AtomicInteger(0); + final AtomicInteger unavailableLock = new AtomicInteger(0); + manager.addUnavailableManagerListener(unavailableManager::incrementAndGet); + lock.addListener(unavailableLock::incrementAndGet); + final DistributedPrimitiveManager otherManager = createManagedDistributeManager(); + otherManager.start(); + Assert.assertTrue(otherManager.getDistributedLock("a").tryLock()); + final CountDownLatch startedTimedLock = new CountDownLatch(1); + final AtomicReference unavailableTimedLock = new AtomicReference<>(null); + Thread timedLock = new Thread(() -> { + startedTimedLock.countDown(); + try { + lock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS); + unavailableTimedLock.set(false); + } catch (UnavailableStateException e) { + unavailableTimedLock.set(true); + } catch (InterruptedException e) { + unavailableTimedLock.set(false); + } + }); + timedLock.start(); + Assert.assertTrue(startedTimedLock.await(10, TimeUnit.SECONDS)); + TimeUnit.SECONDS.sleep(1); + stopMajorityNotLeaderNodes(true); + TimeUnit.MILLISECONDS.sleep(SESSION_MS + CONNECTION_MS); + Wait.waitFor(() -> unavailableLock.get() > 0, SERVER_TICK_MS); + Assert.assertEquals(1, unavailableManager.get()); + Assert.assertEquals(TRUE, unavailableTimedLock.get()); + } + + private static boolean isLeader(TestingZooKeeperServer server) { + long leaderId = server.getQuorumPeer().getLeaderId(); + long id = server.getQuorumPeer().getId(); + return id == leaderId; + } + + private void stopMajorityNotLeaderNodes(boolean fromLast) throws Exception { + List followers = testingServer.getServers().stream().filter(Predicates.not(CuratorDistributedLockTest::isLeader)).collect(Collectors.toList()); + final int quorum = (nodes / 2) + 1; + for (int i = 0; i < quorum; i++) { + final int nodeIndex = fromLast ? (followers.size() - 1) - i : i; + followers.get(nodeIndex).stop(); + } + } + + private void restartMajorityNodes(boolean startFromLast) throws Exception { + final int quorum = (nodes / 2) + 1; + for (int i = 0; i < quorum; i++) { + final int nodeIndex = startFromLast ? (nodes - 1) - i : i; + if (!testingServer.restartServer(clusterSpecs[nodeIndex])) { + throw new IllegalStateException("errored while restarting " + clusterSpecs[nodeIndex]); + } + } + } +} diff --git a/artemis-server/pom.xml b/artemis-server/pom.xml index 7360f488b4a..6f2d4d4b5ec 100644 --- a/artemis-server/pom.xml +++ b/artemis-server/pom.xml @@ -85,6 +85,11 @@ artemis-core-client ${project.version} + + org.apache.activemq + artemis-quorum-api + ${project.version} + org.apache.activemq activemq-artemis-native diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ConfigurationUtils.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ConfigurationUtils.java index a3149473a62..473edcdb5bf 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ConfigurationUtils.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ConfigurationUtils.java @@ -22,6 +22,8 @@ import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration; import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException; import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; @@ -31,6 +33,8 @@ import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle; import org.apache.activemq.artemis.core.server.ActiveMQServer; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ColocatedPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy; @@ -79,6 +83,11 @@ public static HAPolicy getHAPolicy(HAPolicyConfiguration conf, ReplicaPolicyConfiguration pc = (ReplicaPolicyConfiguration) conf; return new ReplicaPolicy(pc.getClusterName(), pc.getMaxSavedReplicatedJournalsSize(), pc.getGroupName(), pc.isRestartBackup(), pc.isAllowFailBack(), pc.getInitialReplicationSyncTimeout(), getScaleDownPolicy(pc.getScaleDownConfiguration()), server.getNetworkHealthCheck(), pc.getVoteOnReplicationFailure(), pc.getQuorumSize(), pc.getVoteRetries(), pc.getVoteRetryWait(), pc.getQuorumVoteWait(), pc.getRetryReplicationWait()); } + case PRIMARY: + return ReplicationPrimaryPolicy.with((ReplicationPrimaryPolicyConfiguration) conf); + case BACKUP: { + return ReplicationBackupPolicy.with((ReplicationBackupPolicyConfiguration) conf); + } case SHARED_STORE_MASTER: { SharedStoreMasterPolicyConfiguration pc = (SharedStoreMasterPolicyConfiguration) conf; return new SharedStoreMasterPolicy(pc.isFailoverOnServerShutdown(), pc.isWaitForActivation()); diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/HAPolicyConfiguration.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/HAPolicyConfiguration.java index d1b2a260b49..c18f4dc710e 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/HAPolicyConfiguration.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/HAPolicyConfiguration.java @@ -26,7 +26,9 @@ enum TYPE { REPLICA("Replica"), SHARED_STORE_MASTER("Shared Store Master"), SHARED_STORE_SLAVE("Shared Store Slave"), - COLOCATED("Colocated"); + COLOCATED("Colocated"), + PRIMARY("Primary"), + BACKUP("Backup"); private String name; diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/DistributedPrimitiveManagerConfiguration.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/DistributedPrimitiveManagerConfiguration.java new file mode 100644 index 00000000000..0dc7971e92c --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/DistributedPrimitiveManagerConfiguration.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.config.ha; + +import java.io.Serializable; +import java.util.Map; + +public class DistributedPrimitiveManagerConfiguration implements Serializable { + + private final String className; + private final Map properties; + + public DistributedPrimitiveManagerConfiguration(String className, Map properties) { + this.className = className; + this.properties = properties; + } + + public Map getProperties() { + return properties; + } + + public String getClassName() { + return className; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationBackupPolicyConfiguration.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationBackupPolicyConfiguration.java new file mode 100644 index 00000000000..f1c97eb7ea3 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationBackupPolicyConfiguration.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.config.ha; + +import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration; +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; + +public class ReplicationBackupPolicyConfiguration implements HAPolicyConfiguration { + + private String clusterName = null; + + private int maxSavedReplicatedJournalsSize = ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize(); + + private String groupName = null; + + /* + * used in the replicated policy after failover + * */ + private boolean allowFailBack = false; + + private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout(); + + private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries(); + + /** + * TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable. + */ + private long voteRetryWait = 2000; + + private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait(); + + private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null; + + public static final ReplicationBackupPolicyConfiguration withDefault() { + return new ReplicationBackupPolicyConfiguration(); + } + + private ReplicationBackupPolicyConfiguration() { + } + + @Override + public HAPolicyConfiguration.TYPE getType() { + return TYPE.BACKUP; + } + + public String getClusterName() { + return clusterName; + } + + public ReplicationBackupPolicyConfiguration setClusterName(String clusterName) { + this.clusterName = clusterName; + return this; + } + + public int getMaxSavedReplicatedJournalsSize() { + return maxSavedReplicatedJournalsSize; + } + + public ReplicationBackupPolicyConfiguration setMaxSavedReplicatedJournalsSize(int maxSavedReplicatedJournalsSize) { + this.maxSavedReplicatedJournalsSize = maxSavedReplicatedJournalsSize; + return this; + } + + public String getGroupName() { + return groupName; + } + + public ReplicationBackupPolicyConfiguration setGroupName(String groupName) { + this.groupName = groupName; + return this; + } + + public boolean isAllowFailBack() { + return allowFailBack; + } + + public ReplicationBackupPolicyConfiguration setAllowFailBack(boolean allowFailBack) { + this.allowFailBack = allowFailBack; + return this; + } + + public long getInitialReplicationSyncTimeout() { + return initialReplicationSyncTimeout; + } + + public ReplicationBackupPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) { + this.initialReplicationSyncTimeout = initialReplicationSyncTimeout; + return this; + } + + public int getVoteRetries() { + return voteRetries; + } + + public ReplicationBackupPolicyConfiguration setVoteRetries(int voteRetries) { + this.voteRetries = voteRetries; + return this; + } + + public ReplicationBackupPolicyConfiguration setVoteRetryWait(long voteRetryWait) { + this.voteRetryWait = voteRetryWait; + return this; + } + + public long getVoteRetryWait() { + return voteRetryWait; + } + + public long getRetryReplicationWait() { + return retryReplicationWait; + } + + public ReplicationBackupPolicyConfiguration setRetryReplicationWait(long retryReplicationWait) { + this.retryReplicationWait = retryReplicationWait; + return this; + } + + public ReplicationBackupPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) { + this.distributedManagerConfiguration = configuration; + return this; + } + + public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() { + return distributedManagerConfiguration; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationPrimaryPolicyConfiguration.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationPrimaryPolicyConfiguration.java new file mode 100644 index 00000000000..7bb53184ddb --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/config/ha/ReplicationPrimaryPolicyConfiguration.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.config.ha; + +import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration; +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; + +public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration { + + private boolean checkForLiveServer = ActiveMQDefaultConfiguration.isDefaultCheckForLiveServer(); + + private String groupName = null; + + private String clusterName = null; + + private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout(); + + private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries(); + + /** + * TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable. + */ + private long voteRetryWait = 2000; + + private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait(); + + private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null; + + public static ReplicationPrimaryPolicyConfiguration withDefault() { + return new ReplicationPrimaryPolicyConfiguration(); + } + + private ReplicationPrimaryPolicyConfiguration() { + } + + @Override + public TYPE getType() { + return TYPE.PRIMARY; + } + + public boolean isCheckForLiveServer() { + return checkForLiveServer; + } + + public ReplicationPrimaryPolicyConfiguration setCheckForLiveServer(boolean checkForLiveServer) { + this.checkForLiveServer = checkForLiveServer; + return this; + } + + public String getGroupName() { + return groupName; + } + + public ReplicationPrimaryPolicyConfiguration setGroupName(String groupName) { + this.groupName = groupName; + return this; + } + + public String getClusterName() { + return clusterName; + } + + public ReplicationPrimaryPolicyConfiguration setClusterName(String clusterName) { + this.clusterName = clusterName; + return this; + } + + public long getInitialReplicationSyncTimeout() { + return initialReplicationSyncTimeout; + } + + public ReplicationPrimaryPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) { + this.initialReplicationSyncTimeout = initialReplicationSyncTimeout; + return this; + } + + public int getVoteRetries() { + return voteRetries; + } + + public ReplicationPrimaryPolicyConfiguration setVoteRetries(int voteRetries) { + this.voteRetries = voteRetries; + return this; + } + + public ReplicationPrimaryPolicyConfiguration setVoteRetryWait(long voteRetryWait) { + this.voteRetryWait = voteRetryWait; + return this; + } + + public long getVoteRetryWait() { + return voteRetryWait; + } + + public void setRetryReplicationWait(Long retryReplicationWait) { + this.retryReplicationWait = retryReplicationWait; + } + + public Long getRetryReplicationWait() { + return retryReplicationWait; + } + + public ReplicationPrimaryPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) { + this.distributedManagerConfiguration = configuration; + return this; + } + + public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() { + return distributedManagerConfiguration; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/deployers/impl/FileConfigurationParser.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/deployers/impl/FileConfigurationParser.java index d93af438bb1..f7d9d01f0a2 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/deployers/impl/FileConfigurationParser.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/deployers/impl/FileConfigurationParser.java @@ -69,7 +69,10 @@ import org.apache.activemq.artemis.core.config.federation.FederationStreamConfiguration; import org.apache.activemq.artemis.core.config.federation.FederationTransformerConfiguration; import org.apache.activemq.artemis.core.config.federation.FederationUpstreamConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; @@ -1599,6 +1602,16 @@ private void parseHAPolicyConfiguration(final Element e, final Configuration mai Element colocatedNode = (Element) colocatedNodeList.item(0); mainConfig.setHAPolicyConfiguration(createColocatedHaPolicy(colocatedNode, true)); } + NodeList primaryNodeList = e.getElementsByTagName("primary"); + if (primaryNodeList.getLength() > 0) { + Element primaryNode = (Element) primaryNodeList.item(0); + mainConfig.setHAPolicyConfiguration(createReplicationPrimaryHaPolicy(primaryNode, mainConfig)); + } + NodeList backupNodeList = e.getElementsByTagName("backup"); + if (backupNodeList.getLength() > 0) { + Element backupNode = (Element) backupNodeList.item(0); + mainConfig.setHAPolicyConfiguration(createReplicationBackupHaPolicy(backupNode, mainConfig)); + } } else if (haNode.getTagName().equals("shared-store")) { NodeList masterNodeList = e.getElementsByTagName("master"); if (masterNodeList.getLength() > 0) { @@ -1691,6 +1704,75 @@ private ReplicaPolicyConfiguration createReplicaHaPolicy(Element policyNode) { return configuration; } + private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) { + ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault(); + + configuration.setCheckForLiveServer(getBoolean(policyNode, "check-for-live-server", configuration.isCheckForLiveServer())); + + configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK)); + + configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK)); + + configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO)); + + configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO)); + + configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO)); + + configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO)); + + configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config)); + + return configuration; + } + + private ReplicationBackupPolicyConfiguration createReplicationBackupHaPolicy(Element policyNode, Configuration config) { + + ReplicationBackupPolicyConfiguration configuration = ReplicationBackupPolicyConfiguration.withDefault(); + + configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK)); + + configuration.setAllowFailBack(getBoolean(policyNode, "allow-failback", configuration.isAllowFailBack())); + + configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO)); + + configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK)); + + configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO)); + + configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO)); + + configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO)); + + configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO)); + + configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config)); + + return configuration; + } + + private DistributedPrimitiveManagerConfiguration createDistributedPrimitiveManagerConfiguration(Element policyNode, Configuration config) { + final Element managerNode = (Element) policyNode.getElementsByTagName("manager").item(0); + final String className = getString(managerNode, "class-name", + ActiveMQDefaultConfiguration.getDefaultDistributedPrimitiveManagerClassName(), + Validators.NO_CHECK); + final Map properties; + if (parameterExists(managerNode, "properties")) { + final NodeList propertyNodeList = managerNode.getElementsByTagName("property"); + final int propertiesCount = propertyNodeList.getLength(); + properties = new HashMap<>(propertiesCount); + for (int i = 0; i < propertiesCount; i++) { + final Element propertyNode = (Element) propertyNodeList.item(i); + final String propertyName = propertyNode.getAttributeNode("key").getValue(); + final String propertyValue = propertyNode.getAttributeNode("value").getValue(); + properties.put(propertyName, propertyValue); + } + } else { + properties = new HashMap<>(1); + } + return new DistributedPrimitiveManagerConfiguration(className, properties); + } + private SharedStoreMasterPolicyConfiguration createSharedStoreMasterHaPolicy(Element policyNode) { SharedStoreMasterPolicyConfiguration configuration = new SharedStoreMasterPolicyConfiguration(); diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/replication/ReplicationEndpoint.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/replication/ReplicationEndpoint.java index b173f3afdc9..495b35d2817 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/replication/ReplicationEndpoint.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/replication/ReplicationEndpoint.java @@ -37,7 +37,6 @@ import org.apache.activemq.artemis.api.core.Message; import org.apache.activemq.artemis.api.core.SimpleString; import org.apache.activemq.artemis.core.config.Configuration; -import org.apache.activemq.artemis.core.io.IOCriticalErrorListener; import org.apache.activemq.artemis.core.io.SequentialFile; import org.apache.activemq.artemis.core.journal.EncoderPersister; import org.apache.activemq.artemis.core.journal.Journal; @@ -82,9 +81,8 @@ import org.apache.activemq.artemis.core.server.ActiveMQComponent; import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; -import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum; + import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; -import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.utils.actors.OrderedExecutorFactory; import org.jboss.logging.Logger; @@ -94,12 +92,20 @@ */ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQComponent { + public interface ReplicationEndpointEventListener { + + void onRemoteBackupUpToDate(); + + void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping message) throws ActiveMQException; + + void onLiveNodeId(String nodeId); + } + private static final Logger logger = Logger.getLogger(ReplicationEndpoint.class); - private final IOCriticalErrorListener criticalErrorListener; private final ActiveMQServerImpl server; private final boolean wantedFailBack; - private final SharedNothingBackupActivation activation; + private final ReplicationEndpointEventListener eventListener; private final boolean noSync = false; private Channel channel; private boolean supportResponseBatching; @@ -129,8 +135,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon private boolean deletePages = true; private volatile boolean started; - private SharedNothingBackupQuorum backupQuorum; - private Executor executor; private List outgoingInterceptors = null; @@ -140,13 +144,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon // Constructors -------------------------------------------------- public ReplicationEndpoint(final ActiveMQServerImpl server, - IOCriticalErrorListener criticalErrorListener, boolean wantedFailBack, - SharedNothingBackupActivation activation) { + ReplicationEndpointEventListener eventListener) { this.server = server; - this.criticalErrorListener = criticalErrorListener; this.wantedFailBack = wantedFailBack; - this.activation = activation; + this.eventListener = eventListener; this.pendingPackets = new ArrayDeque<>(); this.supportResponseBatching = false; } @@ -287,7 +289,7 @@ private void handleFatalError(BackupReplicationStartFailedMessage packet) { * @throws ActiveMQException */ private void handleLiveStopping(ReplicationLiveIsStoppingMessage packet) throws ActiveMQException { - activation.remoteFailOver(packet.isFinalMessage()); + eventListener.onLiveStopping(packet.isFinalMessage()); } @Override @@ -474,8 +476,8 @@ private synchronized void finishSynchronization(String liveID) throws Exception } journalsHolder = null; - backupQuorum.liveIDSet(liveID); - activation.setRemoteBackupUpToDate(); + eventListener.onLiveNodeId(liveID); + eventListener.onRemoteBackupUpToDate(); if (logger.isTraceEnabled()) { logger.trace("Backup is synchronized / BACKUP-SYNC-DONE"); @@ -597,7 +599,7 @@ private ReplicationResponseMessageV2 handleStartReplicationSynchronization(final if (packet.getNodeID() != null) { // At the start of replication, we still do not know which is the nodeID that the live uses. // This is the point where the backup gets this information. - backupQuorum.liveIDSet(packet.getNodeID()); + eventListener.onLiveNodeId(packet.getNodeID()); } break; @@ -900,16 +902,6 @@ public String toString() { } } - /** - * Sets the quorumManager used by the server in the replicationEndpoint. It is used to inform the - * backup server of the live's nodeID. - * - * @param backupQuorum - */ - public void setBackupQuorum(SharedNothingBackupQuorum backupQuorum) { - this.backupQuorum = backupQuorum; - } - /** * @param executor2 */ diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServer.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServer.java index 33d8d1834e4..e3249793e69 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServer.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServer.java @@ -40,7 +40,6 @@ import org.apache.activemq.artemis.core.persistence.StorageManager; import org.apache.activemq.artemis.core.postoffice.PostOffice; import org.apache.activemq.artemis.core.remoting.server.RemotingService; -import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; import org.apache.activemq.artemis.core.replication.ReplicationManager; import org.apache.activemq.artemis.core.security.Role; import org.apache.activemq.artemis.core.security.SecurityAuth; @@ -166,11 +165,6 @@ enum SERVER_STATE { CriticalAnalyzer getCriticalAnalyzer(); - /** - * @return - */ - ReplicationEndpoint getReplicationEndpoint(); - /** * it will release hold a lock for the activation. */ diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/LiveNodeLocator.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/LiveNodeLocator.java index 56fef7c77c3..1515bdc51c0 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/LiveNodeLocator.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/LiveNodeLocator.java @@ -21,7 +21,6 @@ import org.apache.activemq.artemis.api.core.TransportConfiguration; import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener; import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal; -import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum; /** * A class that will locate a particular live server running in a cluster. How this live is chosen @@ -31,16 +30,23 @@ */ public abstract class LiveNodeLocator implements ClusterTopologyListener { - private SharedNothingBackupQuorum backupQuorum; + @FunctionalInterface + public interface BackupRegistrationListener { - public LiveNodeLocator(SharedNothingBackupQuorum backupQuorum) { - this.backupQuorum = backupQuorum; + void onBackupRegistrationFailed(boolean alreadyReplicating); + } + + private final BackupRegistrationListener backupRegistrationListener; + + public LiveNodeLocator(BackupRegistrationListener backupRegistrationListener) { + this.backupRegistrationListener = backupRegistrationListener; } /** * Use this constructor when the LiveNodeLocator is used for scaling down rather than replicating */ public LiveNodeLocator() { + this(null); } /** @@ -67,12 +73,8 @@ public LiveNodeLocator() { * tells the locator the the current connector has failed. */ public void notifyRegistrationFailed(boolean alreadyReplicating) { - if (backupQuorum != null) { - if (alreadyReplicating) { - backupQuorum.notifyAlreadyReplicating(); - } else { - backupQuorum.notifyRegistrationFailed(); - } + if (backupRegistrationListener != null) { + backupRegistrationListener.onBackupRegistrationFailed(alreadyReplicating); } } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterController.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterController.java index 6ef9f262f4d..ec9f153cd94 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterController.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterController.java @@ -80,10 +80,16 @@ public class ClusterController implements ActiveMQComponent { private boolean started; private SimpleString replicatedClusterName; - public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) { + public ClusterController(ActiveMQServer server, + ScheduledExecutorService scheduledExecutor, + boolean useQuorumManager) { this.server = server; executor = server.getExecutorFactory().getExecutor(); - quorumManager = new QuorumManager(scheduledExecutor, this); + quorumManager = useQuorumManager ? new QuorumManager(scheduledExecutor, this) : null; + } + + public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) { + this(server, scheduledExecutor, true); } @Override @@ -108,11 +114,11 @@ public void start() throws Exception { //latch so we know once we are connected replicationClusterConnectedLatch = new CountDownLatch(1); //and add the quorum manager as a topology listener - if (defaultLocator != null) { - defaultLocator.addClusterTopologyListener(quorumManager); - } - if (quorumManager != null) { + if (defaultLocator != null) { + defaultLocator.addClusterTopologyListener(quorumManager); + } + //start the quorum manager quorumManager.start(); } @@ -126,6 +132,26 @@ public void start() throws Exception { } } + /** + * It adds {@code clusterTopologyListener} to {@code defaultLocator}. + */ + public void addClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) { + if (!this.started || defaultLocator == null) { + throw new IllegalStateException("the controller must be started and with a locator initialized"); + } + this.defaultLocator.addClusterTopologyListener(clusterTopologyListener); + } + + /** + * It remove {@code clusterTopologyListener} from {@code defaultLocator}. + */ + public void removeClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) { + if (!this.started || defaultLocator == null) { + throw new IllegalStateException("the controller must be started and with a locator initialized"); + } + this.defaultLocator.removeClusterTopologyListener(clusterTopologyListener); + } + @Override public void stop() throws Exception { if (logger.isDebugEnabled()) { @@ -138,7 +164,9 @@ public void stop() throws Exception { serverLocatorInternal.close(); } //stop the quorum manager - quorumManager.stop(); + if (quorumManager != null) { + quorumManager.stop(); + } } @Override @@ -223,6 +251,17 @@ public void addClusterTopologyListenerForReplication(ClusterTopologyListener lis } } + /** + * add a cluster listener + * + * @param listener + */ + public void removeClusterTopologyListenerForReplication(ClusterTopologyListener listener) { + if (replicationLocator != null) { + replicationLocator.removeClusterTopologyListener(listener); + } + } + /** * add an interceptor * @@ -232,6 +271,15 @@ public void addIncomingInterceptorForReplication(Interceptor interceptor) { replicationLocator.addIncomingInterceptor(interceptor); } + /** + * remove an interceptor + * + * @param interceptor + */ + public void removeIncomingInterceptorForReplication(Interceptor interceptor) { + replicationLocator.removeIncomingInterceptor(interceptor); + } + /** * connect to a specific node in the cluster used for replication * @@ -406,7 +454,11 @@ public void handlePacket(Packet packet) { logger.debug("there is no acceptor used configured at the CoreProtocolManager " + this); } } else if (packet.getType() == PacketImpl.QUORUM_VOTE) { - quorumManager.handleQuorumVote(clusterChannel, packet); + if (quorumManager != null) { + quorumManager.handleQuorumVote(clusterChannel, packet); + } else { + logger.warnf("Received %s on a cluster connection that's using the new quorum vote algorithm.", packet); + } } else if (packet.getType() == PacketImpl.SCALEDOWN_ANNOUNCEMENT) { ScaleDownAnnounceMessage message = (ScaleDownAnnounceMessage) packet; //we don't really need to check as it should always be true diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterManager.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterManager.java index 6950120a377..44a81e4b658 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterManager.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ClusterManager.java @@ -157,7 +157,7 @@ public ClusterManager(final ExecutorFactory executorFactory, final ManagementService managementService, final Configuration configuration, final NodeManager nodeManager, - final boolean backup) { + final boolean useQuorumManager) { this.executorFactory = executorFactory; executor = executorFactory.getExecutor(); @@ -174,7 +174,7 @@ public ClusterManager(final ExecutorFactory executorFactory, this.nodeManager = nodeManager; - clusterController = new ClusterController(server, scheduledExecutor); + clusterController = new ClusterController(server, scheduledExecutor, useQuorumManager); haManager = server.getActivation().getHAManager(); } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/HAPolicy.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/HAPolicy.java index c5d62ac3783..34c84127671 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/HAPolicy.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/HAPolicy.java @@ -57,4 +57,8 @@ default boolean isWaitForActivation() { String getScaleDownClustername(); + default boolean useQuorumManager() { + return true; + } + } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationBackupPolicy.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationBackupPolicy.java new file mode 100644 index 00000000000..5427360fcf1 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationBackupPolicy.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.cluster.ha; + +import java.util.Map; +import java.util.Objects; + +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.io.IOCriticalErrorListener; +import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; + +public class ReplicationBackupPolicy implements HAPolicy { + + private final ReplicationPrimaryPolicy livePolicy; + private final String groupName; + private final String clusterName; + private final int maxSavedReplicatedJournalsSize; + private final int voteRetries; + private final long voteRetryWait; + private final long retryReplicationWait; + private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration; + private final boolean tryFailback; + + private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration, + ReplicationPrimaryPolicy livePolicy) { + Objects.requireNonNull(livePolicy); + this.clusterName = configuration.getClusterName(); + this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize(); + this.groupName = configuration.getGroupName(); + this.voteRetries = configuration.getVoteRetries(); + this.voteRetryWait = configuration.getVoteRetryWait(); + this.retryReplicationWait = configuration.getRetryReplicationWait(); + this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration(); + this.tryFailback = true; + this.livePolicy = livePolicy; + } + + private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration) { + this.clusterName = configuration.getClusterName(); + this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize(); + this.groupName = configuration.getGroupName(); + this.voteRetries = configuration.getVoteRetries(); + this.voteRetryWait = configuration.getVoteRetryWait(); + this.retryReplicationWait = configuration.getRetryReplicationWait(); + this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration(); + this.tryFailback = false; + livePolicy = ReplicationPrimaryPolicy.failoverPolicy( + configuration.getInitialReplicationSyncTimeout(), + configuration.getGroupName(), + configuration.getClusterName(), + this, + configuration.isAllowFailBack(), + configuration.getDistributedManagerConfiguration()); + } + + public boolean isTryFailback() { + return tryFailback; + } + + /** + * It creates a policy which live policy won't cause to broker to try failback. + */ + public static ReplicationBackupPolicy with(ReplicationBackupPolicyConfiguration configuration) { + return new ReplicationBackupPolicy(configuration); + } + + /** + * It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback. + */ + static ReplicationBackupPolicy failback(int voteRetries, + long voteRetryWait, + long retryReplicationWait, + String clusterName, + String groupName, + ReplicationPrimaryPolicy livePolicy, + DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) { + return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault() + .setVoteRetries(voteRetries) + .setVoteRetryWait(voteRetryWait) + .setRetryReplicationWait(retryReplicationWait) + .setClusterName(clusterName) + .setGroupName(groupName) + .setDistributedManagerConfiguration(distributedManagerConfiguration), + livePolicy); + } + + @Override + public ReplicationBackupActivation createActivation(ActiveMQServerImpl server, + boolean wasLive, + Map activationParams, + IOCriticalErrorListener shutdownOnCriticalIO) throws Exception { + return new ReplicationBackupActivation(server, wasLive, DistributedPrimitiveManager.newInstanceOf( + distributedManagerConfiguration.getClassName(), + distributedManagerConfiguration.getProperties()), this); + } + + @Override + public boolean isSharedStore() { + return false; + } + + @Override + public boolean isBackup() { + return true; + } + + @Override + public boolean canScaleDown() { + return false; + } + + @Override + public String getScaleDownGroupName() { + return null; + } + + @Override + public String getScaleDownClustername() { + return null; + } + + public String getClusterName() { + return clusterName; + } + + @Override + public String getBackupGroupName() { + return groupName; + } + + public String getGroupName() { + return groupName; + } + + public ReplicationPrimaryPolicy getLivePolicy() { + return livePolicy; + } + + public int getMaxSavedReplicatedJournalsSize() { + return maxSavedReplicatedJournalsSize; + } + + public int getVoteRetries() { + return voteRetries; + } + + public long getVoteRetryWait() { + return voteRetryWait; + } + + public long getRetryReplicationWait() { + return retryReplicationWait; + } + + @Override + public boolean useQuorumManager() { + return false; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationPrimaryPolicy.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationPrimaryPolicy.java new file mode 100644 index 00000000000..fe9b99358e1 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/ha/ReplicationPrimaryPolicy.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.cluster.ha; + +import java.util.Map; +import java.util.Objects; + +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.io.IOCriticalErrorListener; +import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; +import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; + +public class ReplicationPrimaryPolicy implements HAPolicy { + + private final ReplicationBackupPolicy backupPolicy; + private final String clusterName; + private final String groupName; + private final boolean checkForLiveServer; + private final long initialReplicationSyncTimeout; + private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration; + private final boolean allowAutoFailBack; + + private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration, + ReplicationBackupPolicy backupPolicy, + boolean allowAutoFailBack) { + Objects.requireNonNull(backupPolicy); + clusterName = configuration.getClusterName(); + groupName = configuration.getGroupName(); + checkForLiveServer = configuration.isCheckForLiveServer(); + initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout(); + distributedManagerConfiguration = configuration.getDistributedManagerConfiguration(); + this.allowAutoFailBack = allowAutoFailBack; + this.backupPolicy = backupPolicy; + } + + private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) { + clusterName = config.getClusterName(); + groupName = config.getGroupName(); + checkForLiveServer = config.isCheckForLiveServer(); + initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout(); + distributedManagerConfiguration = config.getDistributedManagerConfiguration(); + this.allowAutoFailBack = false; + backupPolicy = ReplicationBackupPolicy.failback(config.getVoteRetries(), config.getVoteRetryWait(), + config.getRetryReplicationWait(), config.getClusterName(), + config.getGroupName(), this, + config.getDistributedManagerConfiguration()); + } + + /** + * It creates a companion failing-over primary policy for a natural-born backup: it's allowed to allow auto fail-back + * only if configured to do it. + */ + static ReplicationPrimaryPolicy failoverPolicy(long initialReplicationSyncTimeout, + String groupName, + String clusterName, + ReplicationBackupPolicy replicaPolicy, + boolean allowAutoFailback, + DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) { + return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault() + .setCheckForLiveServer(false) + .setInitialReplicationSyncTimeout(initialReplicationSyncTimeout) + .setGroupName(groupName) + .setClusterName(clusterName) + .setDistributedManagerConfiguration(distributedManagerConfiguration), + replicaPolicy, allowAutoFailback); + } + + /** + * It creates a primary policy that never allow auto fail-back.
+ * It's meant to be used for natural-born primary brokers: its backup policy is set to always try to fail-back. + */ + public static ReplicationPrimaryPolicy with(ReplicationPrimaryPolicyConfiguration configuration) { + return new ReplicationPrimaryPolicy(configuration); + } + + public ReplicationBackupPolicy getBackupPolicy() { + return backupPolicy; + } + + @Override + public ReplicationPrimaryActivation createActivation(ActiveMQServerImpl server, + boolean wasLive, + Map activationParams, + IOCriticalErrorListener shutdownOnCriticalIO) throws Exception { + return new ReplicationPrimaryActivation(server, + DistributedPrimitiveManager.newInstanceOf( + distributedManagerConfiguration.getClassName(), + distributedManagerConfiguration.getProperties()), this); + } + + @Override + public boolean isSharedStore() { + return false; + } + + @Override + public boolean isBackup() { + return false; + } + + @Override + public boolean isWaitForActivation() { + return true; + } + + @Override + public boolean canScaleDown() { + return false; + } + + @Override + public String getBackupGroupName() { + return groupName; + } + + @Override + public String getScaleDownGroupName() { + return null; + } + + @Override + public String getScaleDownClustername() { + return null; + } + + public boolean isCheckForLiveServer() { + return checkForLiveServer; + } + + public boolean isAllowAutoFailBack() { + return allowAutoFailBack; + } + + public String getClusterName() { + return clusterName; + } + + public long getInitialReplicationSyncTimeout() { + return initialReplicationSyncTimeout; + } + + public String getGroupName() { + return groupName; + } + + @Override + public boolean useQuorumManager() { + return false; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/SharedNothingBackupQuorum.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/SharedNothingBackupQuorum.java index 2e4b0f75235..82b0a3f4bae 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/SharedNothingBackupQuorum.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/SharedNothingBackupQuorum.java @@ -28,11 +28,12 @@ import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; +import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener; import org.apache.activemq.artemis.core.server.NetworkHealthCheck; import org.apache.activemq.artemis.core.server.NodeManager; import org.jboss.logging.Logger; -public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener { +public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener, BackupRegistrationListener { private static final Logger LOGGER = Logger.getLogger(SharedNothingBackupQuorum.class); @@ -236,13 +237,9 @@ public synchronized void failOver(ReplicationLiveIsStoppingMessage.LiveStopping } } - public void notifyRegistrationFailed() { - signal = BACKUP_ACTIVATION.FAILURE_REPLICATING; - latch.countDown(); - } - - public void notifyAlreadyReplicating() { - signal = BACKUP_ACTIVATION.ALREADY_REPLICATING; + @Override + public void onBackupRegistrationFailed(boolean alreadyReplicating) { + signal = alreadyReplicating ? BACKUP_ACTIVATION.ALREADY_REPLICATING : BACKUP_ACTIVATION.FAILURE_REPLICATING; latch.countDown(); } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/files/FileMoveManager.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/files/FileMoveManager.java index f29e4a17412..5e2c1cb4415 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/files/FileMoveManager.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/files/FileMoveManager.java @@ -36,6 +36,7 @@ public class FileMoveManager { private static final Logger logger = Logger.getLogger(FileMoveManager.class); private final File folder; + private final String[] prefixesToPreserve; private int maxFolders; public static final String PREFIX = "oldreplica."; @@ -70,9 +71,10 @@ public FileMoveManager(File folder) { this(folder, -1); } - public FileMoveManager(File folder, int maxFolders) { + public FileMoveManager(File folder, int maxFolders, String... prefixesToPreserve) { this.folder = folder; this.maxFolders = maxFolders; + this.prefixesToPreserve = prefixesToPreserve != null ? Arrays.copyOf(prefixesToPreserve, prefixesToPreserve.length) : null; } public int getMaxFolders() { @@ -99,8 +101,23 @@ public void doMove() throws IOException { ActiveMQServerLogger.LOGGER.backupDeletingData(folder.getPath()); for (String fileMove : files) { File fileFrom = new File(folder, fileMove); - logger.tracef("deleting %s", fileFrom); - deleteTree(fileFrom); + if (prefixesToPreserve != null) { + boolean skip = false; + for (String prefixToPreserve : prefixesToPreserve) { + if (fileMove.startsWith(prefixToPreserve)) { + logger.tracef("skipping %s", fileFrom); + skip = true; + break; + } + } + if (!skip) { + logger.tracef("deleting %s", fileFrom); + deleteTree(fileFrom); + } + } else { + logger.tracef("deleting %s", fileFrom); + deleteTree(fileFrom); + } } } else { // Since we will create one folder, we are already taking that one into consideration @@ -113,8 +130,26 @@ public void doMove() throws IOException { for (String fileMove : files) { File fileFrom = new File(folder, fileMove); File fileTo = new File(folderTo, fileMove); - logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo); - Files.move(fileFrom.toPath(), fileTo.toPath()); + if (prefixesToPreserve != null) { + boolean copy = false; + for (String prefixToPreserve : prefixesToPreserve) { + if (fileMove.startsWith(prefixToPreserve)) { + logger.tracef("skipping %s", fileFrom); + copy = true; + break; + } + } + if (copy) { + logger.tracef("copying %s to %s", fileFrom, fileTo); + Files.copy(fileFrom.toPath(), fileTo.toPath()); + } else { + logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo); + Files.move(fileFrom.toPath(), fileTo.toPath()); + } + } else { + logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo); + Files.move(fileFrom.toPath(), fileTo.toPath()); + } } } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/Activation.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/Activation.java index 0eab1ba30fd..a055036ef82 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/Activation.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/Activation.java @@ -110,4 +110,8 @@ public JournalLoader createJournalLoader(PostOffice postOffice, public ReplicationManager getReplicationManager() { return null; } + + public boolean isReplicaSync() { + return false; + } } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ActiveMQServerImpl.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ActiveMQServerImpl.java index c73e63e9c20..448866ac038 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ActiveMQServerImpl.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ActiveMQServerImpl.java @@ -109,7 +109,6 @@ import org.apache.activemq.artemis.core.postoffice.impl.PostOfficeImpl; import org.apache.activemq.artemis.core.remoting.server.RemotingService; import org.apache.activemq.artemis.core.remoting.server.impl.RemotingServiceImpl; -import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; import org.apache.activemq.artemis.core.replication.ReplicationManager; import org.apache.activemq.artemis.core.security.CheckType; import org.apache.activemq.artemis.core.security.Role; @@ -797,14 +796,6 @@ public void run() { } } - @Override - public ReplicationEndpoint getReplicationEndpoint() { - if (activation instanceof SharedNothingBackupActivation) { - return ((SharedNothingBackupActivation) activation).getReplicationEndpoint(); - } - return null; - } - @Override public void unlockActivation() { activationLock.release(); @@ -921,7 +912,7 @@ public ExecutorService getThreadPool() { return threadPool; } - public void setActivation(SharedNothingLiveActivation activation) { + public void setActivation(Activation activation) { this.activation = activation; } @@ -1145,19 +1136,7 @@ public final void stop(boolean failoverOnServerShutdown, boolean isExit) throws @Override public boolean isReplicaSync() { - if (activation instanceof SharedNothingLiveActivation) { - ReplicationManager replicationManager = getReplicationManager(); - - if (replicationManager == null) { - return false; - } else { - return !replicationManager.isSynchronizing(); - } - } else if (activation instanceof SharedNothingBackupActivation) { - return ((SharedNothingBackupActivation) activation).isRemoteBackupUpToDate(); - } else { - return false; - } + return activation.isReplicaSync(); } public void stop(boolean failoverOnServerShutdown, final boolean criticalIOError, boolean restarting) { @@ -3116,7 +3095,7 @@ synchronized boolean initialisePart1(boolean scalingDown) throws Exception { postOffice = new PostOfficeImpl(this, storageManager, pagingManager, queueFactory, managementService, configuration.getMessageExpiryScanPeriod(), configuration.getAddressQueueScanPeriod(), configuration.getWildcardConfiguration(), configuration.getIDCacheSize(), configuration.isPersistIDCache(), addressSettingsRepository); // This can't be created until node id is set - clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.isBackup()); + clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.useQuorumManager()); federationManager = new FederationManager(this); @@ -4178,10 +4157,16 @@ public boolean hasScaledDown(SimpleString scaledDownNodeId) { * move any older data away and log a warning about it. */ void moveServerData(int maxSavedReplicated) throws IOException { + moveServerData(maxSavedReplicated, false); + } + + void moveServerData(int maxSavedReplicated, boolean preserveLockFiles) throws IOException { File[] dataDirs = new File[]{configuration.getBindingsLocation(), configuration.getJournalLocation(), configuration.getPagingLocation(), configuration.getLargeMessagesLocation()}; for (File data : dataDirs) { - FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated); + final boolean isLockFolder = preserveLockFiles ? data.equals(configuration.getNodeManagerLockLocation()) : false; + final String[] lockPrefixes = isLockFolder ? new String[]{FileBasedNodeManager.SERVER_LOCK_NAME, "serverlock"} : null; + FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated, lockPrefixes); moveManager.doMove(); } } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/AnyLiveNodeLocatorForReplication.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/AnyLiveNodeLocatorForReplication.java index 015339aafee..de4b4099420 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/AnyLiveNodeLocatorForReplication.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/AnyLiveNodeLocatorForReplication.java @@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.TransportConfiguration; import org.apache.activemq.artemis.api.core.client.TopologyMember; import org.apache.activemq.artemis.core.server.LiveNodeLocator; -import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum; import org.apache.activemq.artemis.utils.ConcurrentUtil; /** @@ -47,8 +46,9 @@ public class AnyLiveNodeLocatorForReplication extends LiveNodeLocator { private String nodeID; - public AnyLiveNodeLocatorForReplication(SharedNothingBackupQuorum backupQuorum, ActiveMQServerImpl server, long retryReplicationWait) { - super(backupQuorum); + public AnyLiveNodeLocatorForReplication(BackupRegistrationListener backupRegistrationListener, + ActiveMQServerImpl server, long retryReplicationWait) { + super(backupRegistrationListener); this.server = server; this.retryReplicationWait = retryReplicationWait; } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ClusterTopologySearch.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ClusterTopologySearch.java new file mode 100644 index 00000000000..a8ceef42b19 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ClusterTopologySearch.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.impl; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.ActiveMQExceptionType; +import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.api.core.client.ActiveMQClient; +import org.apache.activemq.artemis.api.core.client.ClientSession; +import org.apache.activemq.artemis.api.core.client.ClientSessionFactory; +import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener; +import org.apache.activemq.artemis.api.core.client.ServerLocator; +import org.apache.activemq.artemis.api.core.client.TopologyMember; +import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal; +import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal; +import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; +import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ConfigurationUtils; +import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle; +import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; +import org.jboss.logging.Logger; + +/** + * This class contains some utils to allow a broker to check presence and role of another broker in the cluster. + */ +final class ClusterTopologySearch { + + private ClusterTopologySearch() { + + } + + /** + * Determines whether there is a live server already running with nodeID.
+ * This search isn't filtering the caller broker transport and is meant to be used + * when the broker acceptors aren't running yet. + */ + public static boolean searchActiveLiveNodeId(String clusterName, + String nodeId, + long timeout, + TimeUnit unit, + Configuration serverConfiguration) throws ActiveMQException { + if (serverConfiguration.getClusterConfigurations().isEmpty()) + return false; + final ClusterConnectionConfiguration clusterConnectionConfiguration = ConfigurationUtils.getReplicationClusterConfiguration(serverConfiguration, clusterName); + + final LiveNodeIdListener liveNodeIdListener = new LiveNodeIdListener(nodeId, serverConfiguration.getClusterUser(), serverConfiguration.getClusterPassword()); + + try (ServerLocatorInternal locator = createLocator(serverConfiguration, clusterConnectionConfiguration)) { + // if would like to filter out a transport configuration: + // locator.setClusterTransportConfiguration(callerBrokerTransportConfiguration) + locator.addClusterTopologyListener(liveNodeIdListener); + locator.setReconnectAttempts(0); + try (ClientSessionFactoryInternal ignored = locator.connectNoWarnings()) { + return liveNodeIdListener.awaitNodePresent(timeout, unit); + } catch (Exception notConnected) { + if (!(notConnected instanceof ActiveMQException) || ActiveMQExceptionType.INTERNAL_ERROR.equals(((ActiveMQException) notConnected).getType())) { + // report all exceptions that aren't ActiveMQException and all INTERNAL_ERRORs + ActiveMQServerLogger.LOGGER.failedConnectingToCluster(notConnected); + } + return false; + } + } + } + + private static final class LiveNodeIdListener implements ClusterTopologyListener { + + private static final Logger logger = Logger.getLogger(LiveNodeIdListener.class); + private final String nodeId; + private final String user; + private final String password; + private final CountDownLatch searchCompleted; + private boolean isNodePresent = false; + + LiveNodeIdListener(String nodeId, String user, String password) { + this.nodeId = nodeId; + this.user = user; + this.password = password; + this.searchCompleted = new CountDownLatch(1); + } + + @Override + public void nodeUP(TopologyMember topologyMember, boolean last) { + boolean isOurNodeId = nodeId != null && nodeId.equals(topologyMember.getNodeId()); + if (isOurNodeId && isActive(topologyMember.getLive())) { + isNodePresent = true; + } + if (isOurNodeId || last) { + searchCompleted.countDown(); + } + } + + public boolean awaitNodePresent(long timeout, TimeUnit unit) throws InterruptedException { + searchCompleted.await(timeout, unit); + return isNodePresent; + } + + /** + * In a cluster of replicated live/backup pairs if a backup crashes and then its live crashes the cluster will + * retain the topology information of the live such that when the live server restarts it will check the + * cluster to see if its nodeID is present (which it will be) and then it will activate as a backup rather than + * a live. To prevent this situation an additional check is necessary to see if the server with the matching + * nodeID is actually active or not which is done by attempting to make a connection to it. + * + * @param transportConfiguration + * @return + */ + private boolean isActive(TransportConfiguration transportConfiguration) { + try (ServerLocator serverLocator = ActiveMQClient.createServerLocator(false, transportConfiguration); + ClientSessionFactory clientSessionFactory = serverLocator.createSessionFactory(); + ClientSession clientSession = clientSessionFactory.createSession(user, password, false, false, false, false, 0)) { + return true; + } catch (Exception e) { + logger.debug("isActive check failed", e); + return false; + } + } + + @Override + public void nodeDown(long eventUID, String nodeID) { + // no-op + } + } + + private static ServerLocatorInternal createLocator(Configuration configuration, + ClusterConnectionConfiguration config) throws ActiveMQException { + final ServerLocatorInternal locator; + if (config.getDiscoveryGroupName() != null) { + DiscoveryGroupConfiguration dg = configuration.getDiscoveryGroupConfigurations().get(config.getDiscoveryGroupName()); + + if (dg == null) { + throw ActiveMQMessageBundle.BUNDLE.noDiscoveryGroupFound(null); + } + locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(dg); + } else { + TransportConfiguration[] tcConfigs = config.getStaticConnectors() != null ? configuration.getTransportConfigurations(config.getStaticConnectors()) : null; + + locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(tcConfigs); + } + return locator; + } + +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeIdLocatorForReplication.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeIdLocatorForReplication.java new file mode 100644 index 00000000000..58cb32a4b95 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeIdLocatorForReplication.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.impl; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.Queue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.Pair; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.api.core.client.TopologyMember; +import org.apache.activemq.artemis.core.server.LiveNodeLocator; +import org.apache.activemq.artemis.utils.ConcurrentUtil; + +/** + * It looks for a live server in the cluster with a specific NodeID + */ +public class NamedLiveNodeIdLocatorForReplication extends LiveNodeLocator { + + private final Lock lock = new ReentrantLock(); + private final Condition condition = lock.newCondition(); + private final String nodeID; + private final long retryReplicationWait; + private final Queue> liveConfigurations = new LinkedList<>(); + private final ArrayList> triedConfigurations = new ArrayList<>(); + private boolean found; + + public NamedLiveNodeIdLocatorForReplication(String nodeID, + BackupRegistrationListener backupRegistrationListener, + long retryReplicationWait) { + super(backupRegistrationListener); + this.nodeID = nodeID; + this.retryReplicationWait = retryReplicationWait; + } + + @Override + public void locateNode() throws ActiveMQException { + locateNode(-1L); + } + + @Override + public void locateNode(long timeout) throws ActiveMQException { + try { + lock.lock(); + if (liveConfigurations.size() == 0) { + try { + if (timeout != -1L) { + ConcurrentUtil.await(condition, timeout); + } else { + while (liveConfigurations.size() == 0) { + condition.await(retryReplicationWait, TimeUnit.MILLISECONDS); + liveConfigurations.addAll(triedConfigurations); + triedConfigurations.clear(); + } + } + } catch (InterruptedException e) { + //ignore + } + } + } finally { + lock.unlock(); + } + } + + @Override + public void nodeUP(TopologyMember topologyMember, boolean last) { + try { + lock.lock(); + if (nodeID.equals(topologyMember.getNodeId()) && topologyMember.getLive() != null) { + Pair liveConfiguration = new Pair<>(topologyMember.getLive(), topologyMember.getBackup()); + if (!liveConfigurations.contains(liveConfiguration)) { + liveConfigurations.add(liveConfiguration); + } + found = true; + condition.signal(); + } + } finally { + lock.unlock(); + } + } + + @Override + public void nodeDown(long eventUID, String nodeID) { + //no op + } + + @Override + public String getNodeID() { + return found ? nodeID : null; + } + + @Override + public Pair getLiveConfiguration() { + return liveConfigurations.peek(); + } + + @Override + public void notifyRegistrationFailed(boolean alreadyReplicating) { + try { + lock.lock(); + triedConfigurations.add(liveConfigurations.poll()); + super.notifyRegistrationFailed(alreadyReplicating); + } finally { + lock.unlock(); + } + } +} + diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeLocatorForReplication.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeLocatorForReplication.java index 624808d1f0d..c4775d27586 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeLocatorForReplication.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/NamedLiveNodeLocatorForReplication.java @@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.TransportConfiguration; import org.apache.activemq.artemis.api.core.client.TopologyMember; import org.apache.activemq.artemis.core.server.LiveNodeLocator; -import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum; import org.apache.activemq.artemis.utils.ConcurrentUtil; /** @@ -48,8 +47,10 @@ public class NamedLiveNodeLocatorForReplication extends LiveNodeLocator { private String nodeID; - public NamedLiveNodeLocatorForReplication(String backupGroupName, SharedNothingBackupQuorum quorumManager, long retryReplicationWait) { - super(quorumManager); + public NamedLiveNodeLocatorForReplication(String backupGroupName, + BackupRegistrationListener backupRegistrationListener, + long retryReplicationWait) { + super(backupRegistrationListener); this.backupGroupName = backupGroupName; this.retryReplicationWait = retryReplicationWait; } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationBackupActivation.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationBackupActivation.java new file mode 100644 index 00000000000..4441bf00186 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationBackupActivation.java @@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.impl; + +import javax.annotation.concurrent.GuardedBy; + +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException; +import org.apache.activemq.artemis.api.core.Pair; +import org.apache.activemq.artemis.api.core.SimpleString; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.protocol.core.Channel; +import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; +import org.apache.activemq.artemis.core.server.ActiveMQServer; +import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; +import org.apache.activemq.artemis.core.server.LiveNodeLocator; +import org.apache.activemq.artemis.core.server.NodeManager;; +import org.apache.activemq.artemis.core.server.cluster.ClusterControl; +import org.apache.activemq.artemis.core.server.cluster.ClusterController; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy; +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.apache.activemq.artemis.quorum.UnavailableStateException; +import org.jboss.logging.Logger; + +import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure; + +/** + * This activation can be used by a primary while trying to fail-back ie {@code failback == true} or + * by a natural-born backup ie {@code failback == false}.
+ */ +public final class ReplicationBackupActivation extends Activation implements DistributedPrimitiveManager.UnavailableManagerListener { + + private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class); + + private final boolean wasLive; + private final ReplicationBackupPolicy policy; + private final ActiveMQServerImpl activeMQServer; + // This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again. + private final String expectedNodeID; + @GuardedBy("this") + private boolean closed; + private final DistributedPrimitiveManager distributedManager; + // Used for monitoring purposes + private volatile ReplicationObserver replicationObserver; + // Used for testing purposes + private volatile ReplicationEndpoint replicationEndpoint; + // Used for testing purposes + private Consumer onReplicationEndpointCreation; + // Used to arbiter one-shot server stop/restart + private final AtomicBoolean stopping; + + public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer, + final boolean wasLive, + final DistributedPrimitiveManager distributedManager, + final ReplicationBackupPolicy policy) { + this.wasLive = wasLive; + this.activeMQServer = activeMQServer; + if (policy.isTryFailback()) { + final SimpleString serverNodeID = activeMQServer.getNodeID(); + if (serverNodeID == null || serverNodeID.isEmpty()) { + throw new IllegalStateException("A failback activation must be biased around a specific NodeID"); + } + this.expectedNodeID = serverNodeID.toString(); + } else { + this.expectedNodeID = null; + } + this.distributedManager = distributedManager; + this.policy = policy; + this.replicationObserver = null; + this.replicationEndpoint = null; + this.stopping = new AtomicBoolean(false); + } + + /** + * used for testing purposes. + */ + public DistributedPrimitiveManager getDistributedManager() { + return distributedManager; + } + + @Override + public void onUnavailableManagerEvent() { + synchronized (this) { + if (closed) { + return; + } + } + LOGGER.info("Unavailable quorum service detected: try restart server"); + asyncRestartServer(activeMQServer, true); + } + + /** + * This util class exists because {@link LiveNodeLocator} need a {@link LiveNodeLocator.BackupRegistrationListener} + * to forward backup registration failure events: this is used to switch on/off backup registration event listening + * on an existing locator. + */ + private static final class RegistrationFailureForwarder implements LiveNodeLocator.BackupRegistrationListener, AutoCloseable { + + private static final LiveNodeLocator.BackupRegistrationListener NOOP_LISTENER = ignore -> { + }; + private volatile LiveNodeLocator.BackupRegistrationListener listener = NOOP_LISTENER; + + public RegistrationFailureForwarder to(LiveNodeLocator.BackupRegistrationListener listener) { + this.listener = listener; + return this; + } + + @Override + public void onBackupRegistrationFailed(boolean alreadyReplicating) { + listener.onBackupRegistrationFailed(alreadyReplicating); + } + + @Override + public void close() { + listener = NOOP_LISTENER; + } + } + + @Override + public void run() { + synchronized (this) { + if (closed) { + return; + } + } + try { + LOGGER.info("Trying to reach majority of quorum service nodes"); + distributedManager.start(); + LOGGER.info("Quorum service available: starting broker"); + distributedManager.addUnavailableManagerListener(this); + // Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true: + // NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called. + activeMQServer.resetNodeManager(); + activeMQServer.getNodeManager().stop(); + // A primary need to preserve NodeID across runs + activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback()); + activeMQServer.getNodeManager().start(); + if (!activeMQServer.initialisePart1(false)) { + return; + } + synchronized (this) { + if (closed) + return; + } + final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController(); + clusterController.awaitConnectionToReplicationCluster(); + activeMQServer.getBackupManager().start(); + ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(), + activeMQServer.getNodeManager().getNodeId()); + activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED); + final DistributedLock liveLock = replicateAndFailover(clusterController); + if (liveLock == null) { + return; + } + startAsLive(liveLock); + } catch (Exception e) { + if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted()) { + // do not log these errors if the server is being stopped. + return; + } + ActiveMQServerLogger.LOGGER.initializationError(e); + } + } + + private void startAsLive(final DistributedLock liveLock) throws Exception { + activeMQServer.setHAPolicy(policy.getLivePolicy()); + + synchronized (activeMQServer) { + if (!activeMQServer.isStarted()) { + liveLock.close(); + return; + } + ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer); + // stopBackup is going to write the NodeID previously set on the NodeManager, + // because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true. + activeMQServer.getNodeManager().stopBackup(); + activeMQServer.getStorageManager().start(); + activeMQServer.getBackupManager().activated(); + // IMPORTANT: + // we're setting this activation JUST because it would allow the server to use its + // getActivationChannelHandler to handle replication + final ReplicationPrimaryActivation primaryActivation = new ReplicationPrimaryActivation(activeMQServer, distributedManager, policy.getLivePolicy()); + liveLock.addListener(primaryActivation); + activeMQServer.setActivation(primaryActivation); + activeMQServer.initialisePart2(false); + // calling primaryActivation.stateChanged !isHelByCaller is necessary in case the lock was unavailable + // before liveLock.addListener: just throwing an exception won't stop the broker. + final boolean stillLive; + try { + stillLive = liveLock.isHeldByCaller(); + } catch (UnavailableStateException e) { + LOGGER.warn(e); + primaryActivation.onUnavailableLockEvent(); + throw new ActiveMQIllegalStateException("This server cannot check its role as a live: activation is failed"); + } + if (!stillLive) { + primaryActivation.onUnavailableLockEvent(); + throw new ActiveMQIllegalStateException("This server is not live anymore: activation is failed"); + } + if (activeMQServer.getIdentity() != null) { + ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity()); + } else { + ActiveMQServerLogger.LOGGER.serverIsLive(); + } + activeMQServer.completeActivation(true); + } + } + + private LiveNodeLocator createLiveNodeLocator(final LiveNodeLocator.BackupRegistrationListener registrationListener) { + if (expectedNodeID != null) { + assert policy.isTryFailback(); + return new NamedLiveNodeIdLocatorForReplication(expectedNodeID, registrationListener, policy.getRetryReplicationWait()); + } + return policy.getGroupName() == null ? + new AnyLiveNodeLocatorForReplication(registrationListener, activeMQServer, policy.getRetryReplicationWait()) : + new NamedLiveNodeLocatorForReplication(policy.getGroupName(), registrationListener, policy.getRetryReplicationWait()); + } + + private DistributedLock replicateAndFailover(final ClusterController clusterController) throws ActiveMQException, InterruptedException { + final RegistrationFailureForwarder registrationFailureForwarder = new RegistrationFailureForwarder(); + // node locator isn't stateless and contains a live-list of candidate nodes to connect too, hence + // it MUST be reused for each replicateLive attempt + final LiveNodeLocator nodeLocator = createLiveNodeLocator(registrationFailureForwarder); + clusterController.addClusterTopologyListenerForReplication(nodeLocator); + try { + while (true) { + synchronized (this) { + if (closed) { + return null; + } + } + final ReplicationFailure failure = replicateLive(clusterController, nodeLocator, registrationFailureForwarder); + if (failure == null) { + Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster()); + continue; + } + if (!activeMQServer.isStarted()) { + return null; + } + LOGGER.debugf("ReplicationFailure = %s", failure); + boolean voluntaryFailOver = false; + switch (failure) { + case VoluntaryFailOver: + voluntaryFailOver = true; + case NonVoluntaryFailover: + final DistributedLock liveLock = tryAcquireLiveLock(); + // from now on we're meant to stop: + // - due to failover + // - due to restart/stop + assert stopping.get(); + if (liveLock != null) { + return liveLock; + } + boolean restart = true; + if (voluntaryFailOver && isFirstFailbackAttempt()) { + restart = false; + LOGGER.error("Failed to fail-back: stopping broker based on quorum results"); + } else { + ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults(); + } + // let's ignore the stopping flag here, we're in control of it + asyncRestartServer(activeMQServer, restart, false); + return null; + case RegistrationError: + LOGGER.error("Stopping broker because of critical registration error"); + asyncRestartServer(activeMQServer, false); + return null; + case AlreadyReplicating: + // can just retry here, data should be clean and nodeLocator + // should remove the live node that has answered this + LOGGER.info("Live broker was already replicating: retry sync with another live"); + continue; + case ClosedObserver: + return null; + case BackupNotInSync: + LOGGER.info("Replication failure while initial sync not yet completed: restart as backup"); + asyncRestartServer(activeMQServer, true); + return null; + case WrongNodeId: + LOGGER.error("Stopping broker because of wrong node ID communication from live: maybe a misbehaving live?"); + asyncRestartServer(activeMQServer, false); + return null; + default: + throw new AssertionError("Unsupported failure " + failure); + } + } + } finally { + silentExecution("Errored on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator)); + } + } + + /** + * {@code wasLive} is {code true} only while transitioning from primary to backup.
+ * If a natural born backup become live and allows failback, while transitioning to back again + * {@code wasLive} is still {@code false}.
+ * The check on {@link ReplicationBackupPolicy#isTryFailback()} is redundant but still useful for correctness. + *

+ * In case of fail-back, any event that's going to restart this broker as backup (eg quorum service unavailable + * or some replication failures) will cause {@code wasLive} to be {@code false}, because the HA policy set isn't + * a primary anymore. + */ + private boolean isFirstFailbackAttempt() { + return wasLive && policy.isTryFailback(); + } + + private DistributedLock tryAcquireLiveLock() throws InterruptedException { + // disable quorum service unavailability handling and just treat this imperatively + if (!stopping.compareAndSet(false, true)) { + // already unavailable quorum service: fail fast + return null; + } + distributedManager.removeUnavailableManagerListener(this); + assert activeMQServer.getNodeManager().getNodeId() != null; + final String liveID = activeMQServer.getNodeManager().getNodeId().toString(); + final int voteRetries = policy.getVoteRetries(); + final long maxAttempts = voteRetries >= 0 ? (voteRetries + 1) : -1; + if (maxAttempts == -1) { + LOGGER.error("It's not safe to retry an infinite amount of time to acquire a live lock: please consider setting a vote-retries value"); + } + final long voteRetryWait = policy.getVoteRetryWait(); + final DistributedLock liveLock = getLock(distributedManager, liveID); + if (liveLock == null) { + return null; + } + for (long attempt = 0; maxAttempts >= 0 ? (attempt < maxAttempts) : true; attempt++) { + try { + if (liveLock.tryLock(voteRetryWait, TimeUnit.MILLISECONDS)) { + LOGGER.debugf("%s live lock acquired after %d attempts.", liveID, (attempt + 1)); + return liveLock; + } + } catch (UnavailableStateException e) { + LOGGER.warnf(e, "Failed to acquire live lock %s because of unavailable quorum service: stop trying", liveID); + distributedManager.stop(); + return null; + } + } + LOGGER.warnf("Failed to acquire live lock %s after %d tries", liveID, maxAttempts); + distributedManager.stop(); + return null; + } + + private DistributedLock getLock(final DistributedPrimitiveManager manager, + final String lockId) throws InterruptedException { + if (!manager.isStarted()) { + return null; + } + try { + return manager.getDistributedLock(lockId); + } catch (ExecutionException e) { + LOGGER.warnf(e, "Errored while getting lock %s", lockId); + return null; + } catch (TimeoutException te) { + LOGGER.warnf(te, "Timeout while getting lock %s", lockId); + return null; + } + } + + private ReplicationObserver replicationObserver() { + if (policy.isTryFailback()) { + return ReplicationObserver.failbackObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool(), expectedNodeID); + } + return ReplicationObserver.failoverObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool()); + } + + private ReplicationFailure replicateLive(final ClusterController clusterController, + final LiveNodeLocator liveLocator, + final RegistrationFailureForwarder registrationFailureForwarder) throws ActiveMQException { + try (ReplicationObserver replicationObserver = replicationObserver(); + RegistrationFailureForwarder ignored = registrationFailureForwarder.to(replicationObserver)) { + this.replicationObserver = replicationObserver; + clusterController.addClusterTopologyListener(replicationObserver); + // ReplicationError notifies backup registration failures to live locator -> forwarder -> observer + final ReplicationError replicationError = new ReplicationError(liveLocator); + clusterController.addIncomingInterceptorForReplication(replicationError); + try { + final ClusterControl liveControl = tryLocateAndConnectToLive(liveLocator, clusterController); + if (liveControl == null) { + return null; + } + try { + final ReplicationEndpoint replicationEndpoint = tryAuthorizeAndAsyncRegisterAsBackupToLive(liveControl, replicationObserver); + if (replicationEndpoint == null) { + return ReplicationFailure.RegistrationError; + } + this.replicationEndpoint = replicationEndpoint; + assert replicationEndpoint != null; + try { + return replicationObserver.awaitReplicationFailure(); + } finally { + this.replicationEndpoint = null; + ActiveMQServerImpl.stopComponent(replicationEndpoint); + closeChannelOf(replicationEndpoint); + } + } finally { + silentExecution("Errored on live control close", liveControl::close); + } + } finally { + silentExecution("Errored on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver)); + silentExecution("Errored while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError)); + } + } finally { + this.replicationObserver = null; + } + } + + private static void silentExecution(String debugErrorMessage, Runnable task) { + try { + task.run(); + } catch (Throwable ignore) { + LOGGER.debug(debugErrorMessage, ignore); + } + } + + private static void closeChannelOf(final ReplicationEndpoint replicationEndpoint) { + if (replicationEndpoint == null) { + return; + } + if (replicationEndpoint.getChannel() != null) { + silentExecution("Errored while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close()); + replicationEndpoint.setChannel(null); + } + } + + private boolean asyncRestartServer(final ActiveMQServer server, boolean restart) { + return asyncRestartServer(server, restart, true); + } + + private boolean asyncRestartServer(final ActiveMQServer server, boolean restart, boolean checkStopping) { + if (checkStopping) { + if (!stopping.compareAndSet(false, true)) { + return false; + } + } + new Thread(() -> { + if (server.getState() != ActiveMQServer.SERVER_STATE.STOPPED && server.getState() != ActiveMQServer.SERVER_STATE.STOPPING) { + try { + server.stop(!restart); + if (restart) { + server.start(); + } + } catch (Exception e) { + if (restart) { + ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, server); + } else { + ActiveMQServerLogger.LOGGER.errorStoppingServer(e); + } + } + } + }).start(); + return true; + } + + private ClusterControl tryLocateAndConnectToLive(final LiveNodeLocator liveLocator, + final ClusterController clusterController) throws ActiveMQException { + liveLocator.locateNode(); + final Pair possibleLive = liveLocator.getLiveConfiguration(); + final String nodeID = liveLocator.getNodeID(); + if (nodeID == null) { + throw new RuntimeException("Could not establish the connection with any live"); + } + if (!policy.isTryFailback()) { + assert expectedNodeID == null; + activeMQServer.getNodeManager().setNodeID(nodeID); + } else { + assert expectedNodeID.equals(nodeID); + } + if (possibleLive == null) { + return null; + } + final ClusterControl liveControl = tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getA()); + if (liveControl != null) { + return liveControl; + } + return tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getB()); + } + + private static ClusterControl tryConnectToNodeInReplicatedCluster(final ClusterController clusterController, + final TransportConfiguration tc) { + try { + if (tc != null) { + return clusterController.connectToNodeInReplicatedCluster(tc); + } + } catch (Exception e) { + LOGGER.debug(e.getMessage(), e); + } + return null; + } + + @Override + public void close(final boolean permanently, final boolean restarting) throws Exception { + synchronized (this) { + closed = true; + final ReplicationObserver replicationObserver = this.replicationObserver; + if (replicationObserver != null) { + replicationObserver.close(); + } + } + //we have to check as the server policy may have changed + try { + if (activeMQServer.getHAPolicy().isBackup()) { + // To avoid a NPE cause by the stop + final NodeManager nodeManager = activeMQServer.getNodeManager(); + + activeMQServer.interruptActivationThread(nodeManager); + + if (nodeManager != null) { + nodeManager.stopBackup(); + } + } + } finally { + // this one need to happen after interrupting the activation thread + // in order to unblock distributedManager::start + distributedManager.stop(); + } + } + + @Override + public void preStorageClose() throws Exception { + // TODO replication endpoint close? + } + + private ReplicationEndpoint tryAuthorizeAndAsyncRegisterAsBackupToLive(final ClusterControl liveControl, + final ReplicationObserver liveObserver) { + ReplicationEndpoint replicationEndpoint = null; + try { + liveControl.getSessionFactory().setReconnectAttempts(1); + liveObserver.listenConnectionFailuresOf(liveControl.getSessionFactory()); + liveControl.authorize(); + replicationEndpoint = new ReplicationEndpoint(activeMQServer, policy.isTryFailback(), liveObserver); + final Consumer onReplicationEndpointCreation = this.onReplicationEndpointCreation; + if (onReplicationEndpointCreation != null) { + onReplicationEndpointCreation.accept(replicationEndpoint); + } + replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor()); + connectToReplicationEndpoint(liveControl, replicationEndpoint); + replicationEndpoint.start(); + liveControl.announceReplicatingBackupToLive(policy.isTryFailback(), policy.getClusterName()); + return replicationEndpoint; + } catch (Exception e) { + ActiveMQServerLogger.LOGGER.replicationStartProblem(e); + ActiveMQServerImpl.stopComponent(replicationEndpoint); + closeChannelOf(replicationEndpoint); + return null; + } + } + + private static boolean connectToReplicationEndpoint(final ClusterControl liveControl, + final ReplicationEndpoint replicationEndpoint) { + final Channel replicationChannel = liveControl.createReplicationChannel(); + replicationChannel.setHandler(replicationEndpoint); + replicationEndpoint.setChannel(replicationChannel); + return true; + } + + @Override + public boolean isReplicaSync() { + // NOTE: this method is just for monitoring purposes, not suitable to perform logic! + // During a failover this backup won't have any active liveObserver and will report `false`!! + final ReplicationObserver liveObserver = this.replicationObserver; + if (liveObserver == null) { + return false; + } + return liveObserver.isBackupUpToDate(); + } + + public ReplicationEndpoint getReplicationEndpoint() { + return replicationEndpoint; + } + + /** + * This must be used just for testing purposes. + */ + public void spyReplicationEndpointCreation(Consumer onReplicationEndpointCreation) { + Objects.requireNonNull(onReplicationEndpointCreation); + this.onReplicationEndpointCreation = onReplicationEndpointCreation; + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationObserver.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationObserver.java new file mode 100644 index 00000000000..be2737c26c9 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationObserver.java @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.impl; + +import javax.annotation.concurrent.GuardedBy; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener; +import org.apache.activemq.artemis.api.core.client.SessionFailureListener; +import org.apache.activemq.artemis.api.core.client.TopologyMember; +import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal; +import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection; +import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage; +import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; +import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener; +import org.apache.activemq.artemis.core.server.NodeManager; +import org.apache.activemq.artemis.core.server.cluster.BackupManager; +import org.jboss.logging.Logger; + +final class ReplicationObserver implements ClusterTopologyListener, SessionFailureListener, BackupRegistrationListener, ReplicationEndpoint.ReplicationEndpointEventListener, AutoCloseable { + + private static final Logger LOGGER = Logger.getLogger(ReplicationObserver.class); + + public enum ReplicationFailure { + VoluntaryFailOver, BackupNotInSync, NonVoluntaryFailover, RegistrationError, AlreadyReplicating, ClosedObserver, WrongNodeId; + } + + private final NodeManager nodeManager; + private final BackupManager backupManager; + private final ScheduledExecutorService scheduledPool; + private final boolean failback; + private final String expectedNodeID; + private final CompletableFuture replicationFailure; + + @GuardedBy("this") + private ClientSessionFactoryInternal sessionFactory; + @GuardedBy("this") + private CoreRemotingConnection connection; + @GuardedBy("this") + private ScheduledFuture forcedFailover; + + private volatile String liveID; + private volatile boolean backupUpToDate; + private volatile boolean closed; + + /** + * This is a safety net in case the live sends the first {@link ReplicationLiveIsStoppingMessage} + * with code {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#STOP_CALLED} and crashes before sending the second with + * {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#FAIL_OVER}. + *

+ * If the second message does come within this dead line, we fail over anyway. + */ + public static final int WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG = 60; + + private ReplicationObserver(final NodeManager nodeManager, + final BackupManager backupManager, + final ScheduledExecutorService scheduledPool, + final boolean failback, + final String expectedNodeID) { + this.nodeManager = nodeManager; + this.backupManager = backupManager; + this.scheduledPool = scheduledPool; + this.failback = failback; + this.expectedNodeID = expectedNodeID; + this.replicationFailure = new CompletableFuture<>(); + + this.sessionFactory = null; + this.connection = null; + this.forcedFailover = null; + + this.liveID = null; + this.backupUpToDate = false; + this.closed = false; + } + + public static ReplicationObserver failbackObserver(final NodeManager nodeManager, + final BackupManager backupManager, + final ScheduledExecutorService scheduledPool, + final String expectedNodeID) { + Objects.requireNonNull(expectedNodeID); + return new ReplicationObserver(nodeManager, backupManager, scheduledPool, true, expectedNodeID); + } + + public static ReplicationObserver failoverObserver(final NodeManager nodeManager, + final BackupManager backupManager, + final ScheduledExecutorService scheduledPool) { + return new ReplicationObserver(nodeManager, backupManager, scheduledPool, false, null); + } + + private void onLiveDown(boolean voluntaryFailover) { + if (closed || replicationFailure.isDone()) { + return; + } + synchronized (this) { + if (closed || replicationFailure.isDone()) { + return; + } + stopForcedFailoverAfterDelay(); + unlistenConnectionFailures(); + if (!isRemoteBackupUpToDate()) { + replicationFailure.complete(ReplicationFailure.BackupNotInSync); + } else if (voluntaryFailover) { + replicationFailure.complete(ReplicationFailure.VoluntaryFailOver); + } else { + replicationFailure.complete(ReplicationFailure.NonVoluntaryFailover); + } + } + } + + @Override + public void nodeDown(long eventUID, String nodeID) { + // ignore it during a failback: + // a failing slave close all connections but the one used for replication + // triggering a nodeDown before the restarted master receive a STOP_CALLED from it. + // This can make master to fire a useless quorum vote during a normal failback. + if (failback) { + return; + } + if (nodeID.equals(liveID)) { + onLiveDown(false); + } + } + + @Override + public void nodeUP(TopologyMember member, boolean last) { + } + + /** + * if the connection to our replicated live goes down then decide on an action + */ + @Override + public void connectionFailed(ActiveMQException exception, boolean failedOver) { + onLiveDown(false); + } + + @Override + public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) { + connectionFailed(me, failedOver); + } + + @Override + public void beforeReconnect(ActiveMQException exception) { + //noop + } + + @Override + public void close() { + if (closed) { + return; + } + synchronized (this) { + if (closed) { + return; + } + unlistenConnectionFailures(); + closed = true; + replicationFailure.complete(ReplicationFailure.ClosedObserver); + } + } + + /** + * @param liveSessionFactory the session factory used to connect to the live server + */ + public synchronized void listenConnectionFailuresOf(final ClientSessionFactoryInternal liveSessionFactory) { + if (closed) { + throw new IllegalStateException("the observer is closed: cannot listen to any failures"); + } + if (sessionFactory != null || connection != null) { + throw new IllegalStateException("this observer is already listening to other session factory failures"); + } + this.sessionFactory = liveSessionFactory; + //belts and braces, there are circumstances where the connection listener doesn't get called but the session does. + this.sessionFactory.addFailureListener(this); + connection = (CoreRemotingConnection) liveSessionFactory.getConnection(); + connection.addFailureListener(this); + } + + public synchronized void unlistenConnectionFailures() { + if (connection != null) { + connection.removeFailureListener(this); + connection = null; + } + if (sessionFactory != null) { + sessionFactory.removeFailureListener(this); + sessionFactory = null; + } + } + + @Override + public void onBackupRegistrationFailed(boolean alreadyReplicating) { + if (closed || replicationFailure.isDone()) { + return; + } + synchronized (this) { + if (closed || replicationFailure.isDone()) { + return; + } + stopForcedFailoverAfterDelay(); + unlistenConnectionFailures(); + replicationFailure.complete(alreadyReplicating ? ReplicationFailure.AlreadyReplicating : ReplicationFailure.RegistrationError); + } + } + + public ReplicationFailure awaitReplicationFailure() { + try { + return replicationFailure.get(); + } catch (Throwable e) { + return ReplicationFailure.ClosedObserver; + } + } + + private synchronized void scheduleForcedFailoverAfterDelay() { + if (forcedFailover != null) { + return; + } + forcedFailover = scheduledPool.schedule(() -> onLiveDown(false), WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG, TimeUnit.SECONDS); + } + + private synchronized void stopForcedFailoverAfterDelay() { + if (forcedFailover == null) { + return; + } + forcedFailover.cancel(false); + forcedFailover = null; + } + + @Override + public void onRemoteBackupUpToDate() { + if (backupUpToDate || closed || replicationFailure.isDone()) { + return; + } + synchronized (this) { + if (backupUpToDate || closed || replicationFailure.isDone()) { + return; + } + assert liveID != null; + backupManager.announceBackup(); + backupUpToDate = true; + } + } + + public boolean isBackupUpToDate() { + return backupUpToDate; + } + + public String getLiveID() { + return liveID; + } + + private boolean validateNodeId(String nodeID) { + if (nodeID == null) { + return false; + } + final String existingNodeId = this.liveID; + if (existingNodeId == null) { + if (!failback) { + return true; + } + return nodeID.equals(expectedNodeID); + } + return existingNodeId.equals(nodeID); + } + + @Override + public void onLiveNodeId(String nodeId) { + if (closed || replicationFailure.isDone()) { + return; + } + final String existingNodeId = this.liveID; + if (existingNodeId != null && existingNodeId.equals(nodeId)) { + return; + } + synchronized (this) { + if (closed || replicationFailure.isDone()) { + return; + } + if (!validateNodeId(nodeId)) { + stopForcedFailoverAfterDelay(); + unlistenConnectionFailures(); + replicationFailure.complete(ReplicationFailure.WrongNodeId); + } else if (liveID == null) { + liveID = nodeId; + nodeManager.setNodeID(nodeId); + } + } + } + + public boolean isRemoteBackupUpToDate() { + return backupUpToDate; + } + + @Override + public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) { + if (closed || replicationFailure.isDone()) { + return; + } + synchronized (this) { + if (closed || replicationFailure.isDone()) { + return; + } + switch (finalMessage) { + case STOP_CALLED: + scheduleForcedFailoverAfterDelay(); + break; + case FAIL_OVER: + onLiveDown(true); + break; + default: + LOGGER.errorf("unsupported LiveStopping type: %s", finalMessage); + } + } + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationPrimaryActivation.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationPrimaryActivation.java new file mode 100644 index 00000000000..aa4d0e7e5d2 --- /dev/null +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/ReplicationPrimaryActivation.java @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.core.server.impl; + +import javax.annotation.concurrent.GuardedBy; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException; +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException; +import org.apache.activemq.artemis.api.core.Pair; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.protocol.core.Channel; +import org.apache.activemq.artemis.core.protocol.core.ChannelHandler; +import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection; +import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl; +import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupRegistrationMessage; +import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupReplicationStartFailedMessage; +import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage; +import org.apache.activemq.artemis.core.remoting.CloseListener; +import org.apache.activemq.artemis.core.remoting.FailureListener; +import org.apache.activemq.artemis.core.remoting.server.RemotingService; +import org.apache.activemq.artemis.core.replication.ReplicationManager; +import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; +import org.apache.activemq.artemis.core.server.NodeManager; +import org.apache.activemq.artemis.core.server.cluster.ClusterConnection; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy; +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.apache.activemq.artemis.quorum.UnavailableStateException; +import org.apache.activemq.artemis.spi.core.remoting.Acceptor; +import org.jboss.logging.Logger; + +import static org.apache.activemq.artemis.core.server.impl.ClusterTopologySearch.searchActiveLiveNodeId; + +/** + * This is going to be {@link #run()} just by natural born primary, at the first start. + * Both during a failover or a failback, {@link #run()} isn't going to be used, but only {@link #getActivationChannelHandler(Channel, Acceptor)}. + */ +public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener { + + private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class); + private static final long DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS = 20_000; + private static final long BLOCKING_CALLS_TIMEOUT_MILLIS = 5_000; + + private final ReplicationPrimaryPolicy policy; + + private final ActiveMQServerImpl activeMQServer; + + @GuardedBy("replicationLock") + private ReplicationManager replicationManager; + + private final Object replicationLock; + + private final DistributedPrimitiveManager distributedManager; + + private volatile boolean stoppingServer; + + public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer, + final DistributedPrimitiveManager distributedManager, + final ReplicationPrimaryPolicy policy) { + this.activeMQServer = activeMQServer; + this.policy = policy; + this.replicationLock = new Object(); + this.distributedManager = distributedManager; + } + + /** + * used for testing purposes. + */ + public DistributedPrimitiveManager getDistributedManager() { + return distributedManager; + } + + @Override + public void freezeConnections(RemotingService remotingService) { + final ReplicationManager replicationManager = getReplicationManager(); + + if (remotingService != null && replicationManager != null) { + remotingService.freeze(null, replicationManager.getBackupTransportConnection()); + } else if (remotingService != null) { + remotingService.freeze(null, null); + } + } + + @Override + public void run() { + try { + final String nodeId = activeMQServer.getNodeManager().readNodeId().toString(); + + final DistributedLock liveLock = searchLiveOrAcquireLiveLock(nodeId, BLOCKING_CALLS_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS); + + if (liveLock == null) { + return; + } + + activeMQServer.initialisePart1(false); + + activeMQServer.initialisePart2(false); + + // must be registered before checking the caller + liveLock.addListener(this); + + // This control is placed here because initialisePart2 is going to load the journal that + // could pause the JVM for enough time to lose lock ownership + if (!liveLock.isHeldByCaller()) { + throw new IllegalStateException("This broker isn't live anymore, probably due to application pauses eg GC, OS etc: failing now"); + } + + activeMQServer.completeActivation(true); + + if (activeMQServer.getIdentity() != null) { + ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity()); + } else { + ActiveMQServerLogger.LOGGER.serverIsLive(); + } + } catch (Exception e) { + // async stop it, we don't need to await this to complete + distributedManager.stop(); + ActiveMQServerLogger.LOGGER.initializationError(e); + activeMQServer.callActivationFailureListeners(e); + } + } + + private DistributedLock searchLiveOrAcquireLiveLock(final String nodeId, + final long blockingCallTimeout, + final TimeUnit unit) throws ActiveMQException, InterruptedException { + if (policy.isCheckForLiveServer()) { + LOGGER.infof("Searching a live server with NodeID = %s", nodeId); + if (searchActiveLiveNodeId(policy.getClusterName(), nodeId, blockingCallTimeout, unit, activeMQServer.getConfiguration())) { + LOGGER.infof("Found a live server with NodeID = %s: restarting as backup", nodeId); + activeMQServer.setHAPolicy(policy.getBackupPolicy()); + return null; + } + } + startDistributedPrimitiveManager(); + return acquireDistributeLock(getDistributeLock(nodeId), blockingCallTimeout, unit); + } + + private void startDistributedPrimitiveManager() throws InterruptedException, ActiveMQException { + LOGGER.infof("Trying to reach the majority of quorum nodes in %d ms.", DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS); + try { + if (distributedManager.start(DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) { + return; + } + } catch (InterruptedException ie) { + throw ie; + } catch (Throwable t) { + LOGGER.debug(t); + } + assert !distributedManager.isStarted(); + throw new ActiveMQException("Cannot reach the majority of quorum nodes"); + } + + private DistributedLock getDistributeLock(final String nodeId) throws InterruptedException, ActiveMQException { + try { + return distributedManager.getDistributedLock(nodeId); + } catch (Throwable t) { + try { + distributedManager.stop(); + } catch (Throwable ignore) { + // don't care + } + if (t instanceof InterruptedException) { + throw (InterruptedException) t; + } + throw new ActiveMQException("Cannot obtain a live lock instance"); + } + } + + private DistributedLock acquireDistributeLock(final DistributedLock liveLock, + final long acquireLockTimeout, + final TimeUnit unit) throws InterruptedException, ActiveMQException { + try { + if (liveLock.tryLock(acquireLockTimeout, unit)) { + return liveLock; + } + } catch (UnavailableStateException e) { + LOGGER.debug(e); + } + try { + distributedManager.stop(); + } catch (Throwable ignore) { + // don't care + } + throw new ActiveMQException("Failed to become live"); + } + + @Override + public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) { + if (stoppingServer) { + return null; + } + return packet -> { + if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) { + onBackupRegistration(channel, acceptorUsed, (BackupRegistrationMessage) packet); + } + }; + } + + private void onBackupRegistration(final Channel channel, + final Acceptor acceptorUsed, + final BackupRegistrationMessage msg) { + try { + startAsyncReplication(channel.getConnection(), acceptorUsed.getClusterConnection(), msg.getConnector(), msg.isFailBackRequest()); + } catch (ActiveMQAlreadyReplicatingException are) { + channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.ALREADY_REPLICATING)); + } catch (ActiveMQException e) { + LOGGER.debug("Failed to process backup registration packet", e); + channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.EXCEPTION)); + } + } + + private void startAsyncReplication(final CoreRemotingConnection remotingConnection, + final ClusterConnection clusterConnection, + final TransportConfiguration backupTransport, + final boolean isFailBackRequest) throws ActiveMQException { + synchronized (replicationLock) { + if (replicationManager != null) { + throw new ActiveMQAlreadyReplicatingException(); + } + if (!activeMQServer.isStarted()) { + throw new ActiveMQIllegalStateException(); + } + final ReplicationFailureListener listener = new ReplicationFailureListener(); + remotingConnection.addCloseListener(listener); + remotingConnection.addFailureListener(listener); + final ReplicationManager replicationManager = new ReplicationManager(activeMQServer, remotingConnection, clusterConnection.getCallTimeout(), policy.getInitialReplicationSyncTimeout(), activeMQServer.getIOExecutorFactory()); + this.replicationManager = replicationManager; + replicationManager.start(); + final Thread replicatingThread = new Thread(() -> replicate(replicationManager, clusterConnection, isFailBackRequest, backupTransport)); + replicatingThread.setName("async-replication-thread"); + replicatingThread.start(); + } + } + + private void replicate(final ReplicationManager replicationManager, + final ClusterConnection clusterConnection, + final boolean isFailBackRequest, + final TransportConfiguration backupTransport) { + try { + final String nodeID = activeMQServer.getNodeID().toString(); + activeMQServer.getStorageManager().startReplication(replicationManager, activeMQServer.getPagingManager(), nodeID, isFailBackRequest && policy.isAllowAutoFailBack(), policy.getInitialReplicationSyncTimeout()); + + clusterConnection.nodeAnnounced(System.currentTimeMillis(), nodeID, policy.getGroupName(), policy.getScaleDownGroupName(), new Pair<>(null, backupTransport), true); + + if (isFailBackRequest && policy.isAllowAutoFailBack()) { + awaitBackupAnnouncementOnFailbackRequest(clusterConnection); + } + } catch (Exception e) { + if (activeMQServer.getState() == ActiveMQServerImpl.SERVER_STATE.STARTED) { + /* + * The reasoning here is that the exception was either caused by (1) the + * (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we + * can swallow the exception and ignore the replication request. If (2) the live + * will crash shortly. + */ + ActiveMQServerLogger.LOGGER.errorStartingReplication(e); + } + try { + ActiveMQServerImpl.stopComponent(replicationManager); + } catch (Exception amqe) { + ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe); + } finally { + synchronized (replicationLock) { + this.replicationManager = null; + } + } + } + } + + /** + * This is handling awaiting backup announcement before trying to failover. + * This broker is a backup broker, acting as a live and ready to restart as a backup + */ + private void awaitBackupAnnouncementOnFailbackRequest(ClusterConnection clusterConnection) throws Exception { + final String nodeID = activeMQServer.getNodeID().toString(); + final BackupTopologyListener topologyListener = new BackupTopologyListener(nodeID, clusterConnection.getConnector()); + clusterConnection.addClusterTopologyListener(topologyListener); + try { + if (topologyListener.waitForBackup()) { + restartAsBackupAfterFailback(); + } else { + ActiveMQServerLogger.LOGGER.failbackMissedBackupAnnouncement(); + } + } finally { + clusterConnection.removeClusterTopologyListener(topologyListener); + } + } + + /** + * If {@link #asyncStopServer()} happens before this call, the restart just won't happen. + * If {@link #asyncStopServer()} happens after this call, will make the server to stop right after being restarted. + */ + private void restartAsBackupAfterFailback() throws Exception { + if (stoppingServer) { + return; + } + synchronized (this) { + if (stoppingServer) { + return; + } + distributedManager.stop(); + activeMQServer.fail(true); + ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback(); + activeMQServer.setHAPolicy(policy.getBackupPolicy()); + activeMQServer.start(); + } + } + + private void asyncStopServer() { + if (stoppingServer) { + return; + } + synchronized (this) { + if (stoppingServer) { + return; + } + stoppingServer = true; + new Thread(() -> { + try { + activeMQServer.stop(); + } catch (Exception e) { + ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer); + } + }).start(); + } + } + + @Override + public void onUnavailableLockEvent() { + LOGGER.error("Quorum UNAVAILABLE: async stopping broker."); + asyncStopServer(); + } + + private final class ReplicationFailureListener implements FailureListener, CloseListener { + + @Override + public void connectionFailed(ActiveMQException exception, boolean failedOver) { + onReplicationConnectionClose(); + } + + @Override + public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) { + connectionFailed(me, failedOver); + } + + @Override + public void connectionClosed() { + onReplicationConnectionClose(); + } + } + + private void onReplicationConnectionClose() { + ExecutorService executorService = activeMQServer.getThreadPool(); + if (executorService != null) { + synchronized (replicationLock) { + if (replicationManager == null) { + return; + } + } + executorService.execute(() -> { + synchronized (replicationLock) { + if (replicationManager == null) { + return; + } + // this is going to stop the replication manager + activeMQServer.getStorageManager().stopReplication(); + assert !replicationManager.isStarted(); + replicationManager = null; + } + }); + } + } + + @Override + public void close(boolean permanently, boolean restarting) throws Exception { + synchronized (replicationLock) { + replicationManager = null; + } + distributedManager.stop(); + // To avoid a NPE cause by the stop + final NodeManager nodeManager = activeMQServer.getNodeManager(); + if (nodeManager != null) { + if (permanently) { + nodeManager.crashLiveServer(); + } else { + nodeManager.pauseLiveServer(); + } + } + } + + @Override + public void sendLiveIsStopping() { + final ReplicationManager replicationManager = getReplicationManager(); + if (replicationManager == null) { + return; + } + replicationManager.sendLiveIsStopping(ReplicationLiveIsStoppingMessage.LiveStopping.STOP_CALLED); + // this pool gets a 'hard' shutdown, no need to manage the Future of this Runnable. + activeMQServer.getScheduledPool().schedule(replicationManager::clearReplicationTokens, 30, TimeUnit.SECONDS); + } + + @Override + public ReplicationManager getReplicationManager() { + synchronized (replicationLock) { + return replicationManager; + } + } + + @Override + public boolean isReplicaSync() { + final ReplicationManager replicationManager = getReplicationManager(); + if (replicationManager == null) { + return false; + } + return !replicationManager.isSynchronizing(); + } +} diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingBackupActivation.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingBackupActivation.java index 0249cdfe53e..3876185803b 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingBackupActivation.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingBackupActivation.java @@ -32,6 +32,7 @@ import org.apache.activemq.artemis.core.protocol.core.Channel; import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage; import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; +import org.apache.activemq.artemis.core.replication.ReplicationEndpoint.ReplicationEndpointEventListener; import org.apache.activemq.artemis.core.server.ActivationParams; import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle; import org.apache.activemq.artemis.core.server.ActiveMQServer; @@ -54,7 +55,7 @@ import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAIL_OVER; import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.STOP; -public final class SharedNothingBackupActivation extends Activation { +public final class SharedNothingBackupActivation extends Activation implements ReplicationEndpointEventListener { private static final Logger logger = Logger.getLogger(SharedNothingBackupActivation.class); @@ -96,7 +97,7 @@ public void init() throws Exception { assert replicationEndpoint == null; activeMQServer.resetNodeManager(); backupUpToDate = false; - replicationEndpoint = new ReplicationEndpoint(activeMQServer, ioCriticalErrorListener, attemptFailBack, this); + replicationEndpoint = new ReplicationEndpoint(activeMQServer, attemptFailBack, this); } @Override @@ -156,9 +157,6 @@ public void run() { logger.debug("Starting backup manager"); activeMQServer.getBackupManager().start(); - logger.debug("Set backup Quorum"); - replicationEndpoint.setBackupQuorum(backupQuorum); - replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor()); EndpointConnector endpointConnector = new EndpointConnector(); @@ -461,7 +459,13 @@ public boolean isRemoteBackupUpToDate() { return backupUpToDate; } - public void setRemoteBackupUpToDate() { + @Override + public void onLiveNodeId(String nodeId) { + backupQuorum.liveIDSet(nodeId); + } + + @Override + public void onRemoteBackupUpToDate() { activeMQServer.getBackupManager().announceBackup(); backupUpToDate = true; backupSyncLatch.countDown(); @@ -470,7 +474,8 @@ public void setRemoteBackupUpToDate() { /** * @throws ActiveMQException */ - public void remoteFailOver(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException { + @Override + public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException { if (logger.isTraceEnabled()) { logger.trace("Remote fail-over, got message=" + finalMessage + ", backupUpToDate=" + backupUpToDate); @@ -526,4 +531,9 @@ private synchronized ReplicationEndpoint connectToReplicationEndpoint(final Clus return replicationEndpoint; } } + + @Override + public boolean isReplicaSync() { + return isRemoteBackupUpToDate(); + } } diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingLiveActivation.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingLiveActivation.java index 9de4be057ba..f876a7604b8 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingLiveActivation.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/impl/SharedNothingLiveActivation.java @@ -462,4 +462,13 @@ public void nodeDown(long eventUID, String nodeID) { private TransportConfiguration[] connectorNameListToArray(final List connectorNames) { return activeMQServer.getConfiguration().getTransportConfigurations(connectorNames); } + + @Override + public boolean isReplicaSync() { + final ReplicationManager replicationManager = getReplicationManager(); + if (replicationManager == null) { + return false; + } + return !replicationManager.isSynchronizing(); + } } diff --git a/artemis-server/src/main/resources/schema/artemis-configuration.xsd b/artemis-server/src/main/resources/schema/artemis-configuration.xsd index ad612d2f247..35e953c13e4 100644 --- a/artemis-server/src/main/resources/schema/artemis-configuration.xsd +++ b/artemis-server/src/main/resources/schema/artemis-configuration.xsd @@ -2605,7 +2605,7 @@ - + A key-value pair option for the DataSource @@ -2682,7 +2682,7 @@ - + @@ -2726,6 +2726,36 @@ + + + + + + The distributed-primitive-manager class name + + + + + + + A list of options for the distributed-primitive-manager + + + + + + + + A key-value pair option for the distributed-primitive-manager + + + + + + + + + @@ -2749,6 +2779,20 @@ + + + + A primary server configured to replicate. + + + + + + + A backup server configured to replicate. + + + @@ -3119,6 +3163,155 @@ + + + + + + It's the manager used to manager distributed locks used for this type of replication. + + + + + + + used for replication, if set, (remote) backup servers will only pair with live servers with matching + group-name + + + + + + + Name of the cluster configuration to use for replication. This setting is only necessary in case you + configure multiple cluster connections. It is used by a replicating backups and by live servers that + may attempt fail-back. + + + + + + + Whether to check the cluster for a (live) server using our own server ID when starting + up. This option is only necessary for performing 'fail-back' on replicating + servers. Strictly speaking this setting only applies to live servers and not to + backups. + + + + + + + The amount of time to wait for the replica to acknowledge it has received all the necessary data from + the replicating server at the final step of the initial replication synchronization process. + + + + + + + If we start as a replica and lose connection to the master, how many times should we attempt to vote + for quorum before restarting + + + + + + + How long to wait (in milliseconds) between each vote + + + + + + + If we start as a replica how long to wait (in milliseconds) before trying to replicate again after failing to find a replica + + + + + + + + + + + + It's the manager used to manager distributed locks used for this type of replication. + + + + + + + used for replication, if set, (remote) backup servers will only pair with live servers with matching + group-name + + + + + + + Name of the cluster configuration to use for replication. This setting is only necessary in case you + configure multiple cluster connections. It is used by a replicating backups and by live servers that + may attempt fail-back. + + + + + + + This specifies how many times a replicated backup server can restart after moving its files on start. + Once there are this number of backup journal files the server will stop permanently after if fails + back. + + + + + + + Whether a server will automatically stop when a another places a request to take over + its place. The use case is when a regular server stops and its backup takes over its + duties, later the main server restarts and requests the server (the former backup) to + stop operating. + + + + + + + If we have to start as a replicated server this is the amount of time to wait for the replica to + acknowledge it has received all the necessary data from the replicating server at the final step + of the initial replication synchronization process. + + + + + + + If we lose connection to the master, how many times should we attempt to vote for quorum before restarting + + + + + + + How long to wait (in milliseconds) between each vote + + + + + + + How long to wait (in milliseconds) before trying to replicate again after failing to find a replica + + + + + + diff --git a/artemis-server/src/test/java/org/apache/activemq/artemis/core/config/impl/HAPolicyConfigurationTest.java b/artemis-server/src/test/java/org/apache/activemq/artemis/core/config/impl/HAPolicyConfigurationTest.java index 425ff7b244c..1397602e401 100644 --- a/artemis-server/src/test/java/org/apache/activemq/artemis/core/config/impl/HAPolicyConfigurationTest.java +++ b/artemis-server/src/test/java/org/apache/activemq/artemis/core/config/impl/HAPolicyConfigurationTest.java @@ -17,7 +17,11 @@ package org.apache.activemq.artemis.core.config.impl; import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration; import org.apache.activemq.artemis.core.config.Configuration; import org.apache.activemq.artemis.core.config.FileDeploymentManager; import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; @@ -27,6 +31,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.LiveOnlyPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ScaleDownPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy; @@ -35,11 +41,18 @@ import org.apache.activemq.artemis.core.server.impl.ColocatedActivation; import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager; import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; +import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation; import org.apache.activemq.artemis.core.server.impl.SharedStoreBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedStoreLiveActivation; +import org.apache.activemq.artemis.quorum.DistributedLock; +import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager; +import org.apache.activemq.artemis.quorum.UnavailableStateException; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; +import org.hamcrest.MatcherAssert; +import org.hamcrest.core.IsInstanceOf; import org.junit.Test; import static org.hamcrest.CoreMatchers.instanceOf; @@ -124,6 +137,242 @@ public void liveOnlyTest5() throws Exception { liveOnlyTest("live-only-hapolicy-config5.xml"); } + public static class FakeDistributedPrimitiveManager implements DistributedPrimitiveManager { + + private final Map config; + private boolean started; + private DistributedLock lock; + + public FakeDistributedPrimitiveManager(Map config) { + this.config = config; + this.started = false; + } + + public Map getConfig() { + return config; + } + + @Override + public void addUnavailableManagerListener(UnavailableManagerListener listener) { + // no op + } + + @Override + public void removeUnavailableManagerListener(UnavailableManagerListener listener) { + // no op + } + + @Override + public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException { + started = true; + return true; + } + + @Override + public void start() throws InterruptedException, ExecutionException { + started = true; + } + + @Override + public boolean isStarted() { + return started; + } + + @Override + public void stop() { + started = false; + if (lock != null) { + lock.close(); + } + lock = null; + } + + @Override + public DistributedLock getDistributedLock(String lockId) { + if (!started) { + throw new IllegalStateException("need to start first"); + } + if (lock == null) { + lock = new DistributedLock() { + + private boolean held; + + @Override + public String getLockId() { + return lockId; + } + + @Override + public boolean isHeldByCaller() throws UnavailableStateException { + return held; + } + + @Override + public boolean tryLock() throws UnavailableStateException, InterruptedException { + if (held) { + return false; + } + held = true; + return true; + } + + @Override + public void unlock() throws UnavailableStateException { + held = false; + } + + @Override + public void addListener(UnavailableLockListener listener) { + + } + + @Override + public void removeListener(UnavailableLockListener listener) { + + } + + @Override + public void close() { + held = false; + } + }; + } else if (!lock.getLockId().equals(lockId)) { + throw new IllegalStateException("This shouldn't happen"); + } + return lock; + } + + @Override + public void close() { + stop(); + } + } + + private static void validateManagerConfig(Map config) { + assertEquals("127.0.0.1:6666", config.get("connect-string")); + assertEquals("16000", config.get("session-ms")); + assertEquals("2000", config.get("connection-ms")); + assertEquals("2", config.get("retries")); + assertEquals("2000", config.get("retries-ms")); + assertEquals("test", config.get("namespace")); + assertEquals("10", config.get("session-percent")); + assertEquals(7, config.size()); + } + + @Test + public void PrimaryReplicationTest() throws Exception { + Configuration configuration = createConfiguration("primary-hapolicy-config.xml"); + ActiveMQServerImpl server = new ActiveMQServerImpl(configuration); + try { + server.start(); + Activation activation = server.getActivation(); + assertTrue(activation instanceof ReplicationPrimaryActivation); + HAPolicy haPolicy = server.getHAPolicy(); + assertTrue(haPolicy instanceof ReplicationPrimaryPolicy); + ReplicationPrimaryPolicy policy = (ReplicationPrimaryPolicy) haPolicy; + assertFalse(policy.isAllowAutoFailBack()); + assertEquals(9876, policy.getInitialReplicationSyncTimeout()); + assertFalse(policy.canScaleDown()); + assertFalse(policy.isBackup()); + assertFalse(policy.isSharedStore()); + assertTrue(policy.isCheckForLiveServer()); + assertTrue(policy.isWaitForActivation()); + assertEquals("purple", policy.getGroupName()); + assertEquals("purple", policy.getBackupGroupName()); + assertEquals("abcdefg", policy.getClusterName()); + assertFalse(policy.useQuorumManager()); + // check failback companion backup policy + ReplicationBackupPolicy failbackPolicy = policy.getBackupPolicy(); + assertNotNull(failbackPolicy); + assertSame(policy, failbackPolicy.getLivePolicy()); + assertEquals(policy.getGroupName(), failbackPolicy.getGroupName()); + assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName()); + assertEquals(policy.getClusterName(), failbackPolicy.getClusterName()); + assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize()); + assertEquals(1, failbackPolicy.getVoteRetries()); + assertEquals(1000, failbackPolicy.getVoteRetryWait()); + assertTrue(failbackPolicy.isTryFailback()); + assertTrue(failbackPolicy.isBackup()); + assertFalse(failbackPolicy.isSharedStore()); + assertTrue(failbackPolicy.isWaitForActivation()); + assertFalse(failbackPolicy.useQuorumManager()); + assertEquals(12345, failbackPolicy.getRetryReplicationWait()); + // check scale-down properties + assertFalse(failbackPolicy.canScaleDown()); + assertNull(failbackPolicy.getScaleDownClustername()); + assertNull(failbackPolicy.getScaleDownGroupName()); + // validate manager + DistributedPrimitiveManager manager = ((ReplicationPrimaryActivation) activation).getDistributedManager(); + assertNotNull(manager); + assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName()); + MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class)); + FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager; + // validate manager config + validateManagerConfig(forwardingManager.getConfig()); + } finally { + server.stop(); + } + } + + @Test + public void BackupReplicationTest() throws Exception { + Configuration configuration = createConfiguration("backup-hapolicy-config.xml"); + ActiveMQServerImpl server = new ActiveMQServerImpl(configuration); + try { + server.start(); + Activation activation = server.getActivation(); + assertTrue(activation instanceof ReplicationBackupActivation); + HAPolicy haPolicy = server.getHAPolicy(); + assertTrue(haPolicy instanceof ReplicationBackupPolicy); + ReplicationBackupPolicy policy = (ReplicationBackupPolicy) haPolicy; + assertEquals("tiddles", policy.getGroupName()); + assertEquals("tiddles", policy.getBackupGroupName()); + assertEquals("33rrrrr", policy.getClusterName()); + assertEquals(22, policy.getMaxSavedReplicatedJournalsSize()); + assertEquals(1, policy.getVoteRetries()); + assertEquals(1000, policy.getVoteRetryWait()); + assertFalse(policy.isTryFailback()); + assertTrue(policy.isBackup()); + assertFalse(policy.isSharedStore()); + assertTrue(policy.isWaitForActivation()); + assertFalse(policy.useQuorumManager()); + assertEquals(12345, policy.getRetryReplicationWait()); + // check scale-down properties + assertFalse(policy.canScaleDown()); + assertNull(policy.getScaleDownClustername()); + assertNull(policy.getScaleDownGroupName()); + // check failover companion live policy + ReplicationPrimaryPolicy failoverLivePolicy = policy.getLivePolicy(); + assertNotNull(failoverLivePolicy); + assertSame(policy, failoverLivePolicy.getBackupPolicy()); + assertFalse(failoverLivePolicy.isAllowAutoFailBack()); + assertEquals(9876, failoverLivePolicy.getInitialReplicationSyncTimeout()); + assertFalse(failoverLivePolicy.canScaleDown()); + assertFalse(failoverLivePolicy.isBackup()); + assertFalse(failoverLivePolicy.isSharedStore()); + assertFalse(failoverLivePolicy.isCheckForLiveServer()); + assertTrue(failoverLivePolicy.isWaitForActivation()); + assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName()); + assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName()); + assertEquals(policy.getBackupGroupName(), failoverLivePolicy.getBackupGroupName()); + assertFalse(failoverLivePolicy.useQuorumManager()); + // check scale-down properties + assertFalse(failoverLivePolicy.canScaleDown()); + assertNull(failoverLivePolicy.getScaleDownClustername()); + assertNull(failoverLivePolicy.getScaleDownGroupName()); + // validate manager + DistributedPrimitiveManager manager = ((ReplicationBackupActivation) activation).getDistributedManager(); + assertNotNull(manager); + assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName()); + MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class)); + FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager; + // validate manager config + validateManagerConfig(forwardingManager.getConfig()); + } finally { + server.stop(); + } + } + @Test public void ReplicatedTest() throws Exception { Configuration configuration = createConfiguration("replicated-hapolicy-config.xml"); diff --git a/artemis-server/src/test/java/org/apache/activemq/artemis/tests/util/ActiveMQTestBase.java b/artemis-server/src/test/java/org/apache/activemq/artemis/tests/util/ActiveMQTestBase.java index adfceeed848..5a16f3e5f6d 100644 --- a/artemis-server/src/test/java/org/apache/activemq/artemis/tests/util/ActiveMQTestBase.java +++ b/artemis-server/src/test/java/org/apache/activemq/artemis/tests/util/ActiveMQTestBase.java @@ -113,6 +113,7 @@ import org.apache.activemq.artemis.core.remoting.impl.netty.NettyAcceptorFactory; import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnector; import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory; +import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; import org.apache.activemq.artemis.core.server.ActiveMQComponent; import org.apache.activemq.artemis.core.server.ActiveMQServer; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; @@ -129,6 +130,7 @@ import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; import org.apache.activemq.artemis.core.server.impl.AddressInfo; import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.core.settings.impl.AddressFullMessagePolicy; import org.apache.activemq.artemis.core.settings.impl.AddressSettings; @@ -1384,6 +1386,8 @@ public static final void waitForRemoteBackup(ClientSessionFactory sessionFactory if (isReplicated) { if (activation instanceof SharedNothingBackupActivation) { isRemoteUpToDate = backup.isReplicaSync(); + } else if (activation instanceof ReplicationBackupActivation) { + isRemoteUpToDate = backup.isReplicaSync(); } else { //we may have already failed over and changed the Activation if (actualServer.isStarted()) { @@ -2517,6 +2521,17 @@ public void run() { return !hadToInterrupt; } + protected static ReplicationEndpoint getReplicationEndpoint(ActiveMQServer server) { + final Activation activation = server.getActivation(); + if (activation instanceof SharedNothingBackupActivation) { + return ((SharedNothingBackupActivation) activation).getReplicationEndpoint(); + } + if (activation instanceof ReplicationBackupActivation) { + return ((ReplicationBackupActivation) activation).getReplicationEndpoint(); + } + return null; + } + // Private ------------------------------------------------------- // Inner classes ------------------------------------------------- diff --git a/artemis-server/src/test/resources/backup-hapolicy-config.xml b/artemis-server/src/test/resources/backup-hapolicy-config.xml new file mode 100644 index 00000000000..be552366760 --- /dev/null +++ b/artemis-server/src/test/resources/backup-hapolicy-config.xml @@ -0,0 +1,54 @@ + + + + + + + + + + tiddles + 22 + 33rrrrr + 9876 + 12345 + 1 + 1000 + false + + + org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager + + + + + + + + + + + + + + + + diff --git a/artemis-server/src/test/resources/primary-hapolicy-config.xml b/artemis-server/src/test/resources/primary-hapolicy-config.xml new file mode 100644 index 00000000000..5b88bcd80f8 --- /dev/null +++ b/artemis-server/src/test/resources/primary-hapolicy-config.xml @@ -0,0 +1,52 @@ + + + + + + + + purple + abcdefg + 9876 + 12345 + true + 1 + 1000 + + + org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager + + + + + + + + + + + + + + + + + diff --git a/docs/user-manual/en/ha.md b/docs/user-manual/en/ha.md index b2f61d5d8bb..96045e95e40 100644 --- a/docs/user-manual/en/ha.md +++ b/docs/user-manual/en/ha.md @@ -98,6 +98,36 @@ or ``` +*Replication* allows too to configure 2 new roles to enable *pluggable quorum* provider configuration, by using: +```xml + + + + + +``` +to configure the classic *master* role, and +```xml + + + + + +``` +for the classic *slave* one. + +If *replication* is configured using such new roles some additional element is required to complete configuration, detailed later. + +### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE + +This feature is still **EXPERIMENTAL** and not meant to be run in production yet. + +It means: +- its configuration can change until declared as **officially stable** +- it has to solve yet an inherent data misalignment issue with replication (it can happen with `classic` replication as well) + +More info about this issue are on [ARTEMIS-3340](https://issues.apache.org/jira/browse/ARTEMIS-3340). + ### Data Replication When using replication, the live and the backup servers do not share the @@ -199,16 +229,26 @@ Much like in the shared-store case, when the live server stops or crashes, its replicating backup will become active and take over its duties. Specifically, the backup will become active when it loses connection to its live server. This can be problematic because this can -also happen because of a temporary network problem. In order to address -this issue, the backup will try to determine whether it still can +also happen because of a temporary network problem. + +This issue is solved in 2 different ways depending on which replication roles are configured: +- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can connect to the other servers in the cluster. If it can connect to more than half the servers, it will become active, if more than half the servers also disappeared with the live, the backup will wait and try reconnecting with the live. This avoids a split brain situation. +- **pluggable quorum vote replication** (`primary`/`backup` roles): backup relies on a pluggable quorum provider + (configurable via `manager` xml element) to detect if there's any active live. + +> ***NOTE*** +> +> A backup in the **pluggable quorum vote replication** still need to carefully configure +> [connection-ttl](connection-ttl.md) in order to promptly issue a request to become live to the quorum service +> before failing-over. #### Configuration -To configure the live and backup servers to be a replicating pair, +To configure a classic replication's live and backup servers to be a replicating pair, configure the live server in ' `broker.xml` to have: ```xml @@ -235,6 +275,30 @@ The backup server must be similarly configured but as a `slave` ``` +To configure a pluggable quorum replication's primary and backup instead: + +```xml + + + + + +... + + + ... + + +``` +and +```xml + + + + + +``` + #### All Replication Configuration The following table lists all the `ha-policy` configuration elements for @@ -308,6 +372,142 @@ replica to acknowledge it has received all the necessary data. The default is 30,000 milliseconds. **Note:** during this interval any journal related operations will be blocked. +#### Pluggable Quorum Vote Replication configurations +Pluggable Quorum Vote replication configuration options are a bit different +from classic replication, mostly because of its customizable nature. + +[Apache curator](https://curator.apache.org/) is used by the default quorum provider. + +Below some example configurations to show how it works. + +For `primary`: +```xml + + + + + org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager + + + + + true + + + +``` +And `backup`: +```xml + + + + + org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager + + + + + true + + + +``` +The configuration of `class-name` as follows +```xml +org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager +``` +isn't really needed, because Apache Curator is the default provider, but has been shown for completeness. + +The `properties` element, instead +```xml + + + +``` +Can specify a list of `property` elements in the form of key-value pairs, depending the ones +accepted by the specified `class-name` provider. + +Apache Curator's provider allow to configure these properties: + +- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default) +- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms) +- [`session-percent`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#simulatedSessionExpirationPercent(int)): (default is 33); should be <= default, + see https://cwiki.apache.org/confluence/display/CURATOR/TN14 for more info +- [`connection-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectionTimeoutMs(int)): (default is 8000 ms) +- [`retries`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1) +- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms) +- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default) + +Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) nodes is left to the user, but there are few +**suggestions to improve the reliability of the quorum service**: +- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by + [Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup + can failover to an isolated/killed/unresponsive live; the higher, the slower. +- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol +to work reliably: if it's not possible, better increase `session_ms` accepting a slower failover +- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if + broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker +- network isolation protection requires configuring >=3 Zookeeper nodes + +#### *Important*: Notes on pluggable quorum replication configuration + +The first `classic` replication configuration that won't apply to the pluggable quorum replication +is `vote-on-replication-failure` and configure it produces a startup error: pluggable quorum replication +always behave like `vote-on-replication-failure` `true` ie shutting down a live broker (and its JVM) in case of quorum loss. + +The second deprecated `classic` replication configuration is `quorum-vote-wait`: given that the pluggable quorum vote replication +requires backup to have an always-on reliable quorum service, there's no need to specify the timeout to reach +the majority of quorum nodes. A backup remains inactive (ie JVM still up, console too, unable to sync with live, to failover etc etc) +until the majority of quorum nodes is reachable again, re-activating if happens. + +The only exception is with primary failing-back to an existing live backup using `true`: +if the quorum service isn't immediately available the primary (and its JVM) just stop, allowing fail-fast failing-back. + +There are few *semantic differences* of other existing properties: +- `vote-retry-wait`: in `classic` replication means how long to wait between each quorum vote try, while with pluggable quorum replication + means how long request to failover for each attempt +- `vote-retries`: differently from `classic`, the amount of vote attempt is `1 + vote-retries` (with classic is just `vote-retries`). + Setting `0` means no retries, leaving backup to still perform an initial attempt. + +**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider** + +As said some paragraphs above, `session-ms` affect the failover duration: a backup can +failover after `session-ms` expires or if the live broker voluntary give up its role +eg during a fail-back/manual broker stop, it happens immediately. + +For the former case (session expiration with live no longer present), the backup broker can detect an unresponsive live by using: +1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning) +2. closed TCP connection notification (depends by TCP configuration and networking stack/topology) + +These 2 cases have 2 different failover duration depending on different factors: +1. `connection-ttl` affect how much time of the expiring `session-ms` is used to just detect a missing live broker: the higher `connection-tt`, + the slower it reacts; backup can attempt to failover for the remaining `session-ms - connection-ttl` +2. `session-ms` expiration is immediately detected: backup must try to failover for >=`session-ms` to be sure to catch + the session expiration and complete failover + +The previous comments are meant to suggest to the careful reader that the minimum time to attempt to failover +cannot be below the full `session-ms` expires. +In short, it means +``` + total failover attempt time > session-ms +``` +with +``` + total failover attempt time = vote-retry-wait * (vote-retries + 1) +``` +and by consequence: +``` + vote-retry-wait * (vote-retries + 1) > session-ms +``` +For example with `session-ms = 18000 ms`, safe values for failover timeout are: +```xml + 11 + 2000 +``` +Because `11 * 2000 = 22000 ms` that's bigger then `18000 ms`. + +There's no risk that a backup broker will early stop attempting to failover, losing its chance to become live. + ### Shared Store When using a shared store, both live and backup servers share the *same* @@ -406,8 +606,32 @@ stop. This configuration would look like: ``` -In replication HA mode you need to set an extra property -`check-for-live-server` to `true` in the `master` configuration. If set +The same configuration option can be set for both replications, classic: +```xml + + + + true + + + +``` +and with pluggable quorum provider: +```xml + + + + + + + true + + + +``` + +In both replication HA mode you need to set an extra property +`check-for-live-server` to `true` in the `master`/`primary` configuration. If set to true, during start-up a live server will first search the cluster for another server using its nodeID. If it finds one, it will contact this server and try to "fail-back". Since this is a remote replication @@ -418,7 +642,7 @@ to shutdown for it to take over. This is necessary because otherwise the live server has no means to know whether there was a fail-over or not, and if there was if the server that took its duties is still running or not. To configure this option at your `broker.xml` -configuration file as follows: +configuration file as follows, for classic replication: ```xml @@ -430,6 +654,29 @@ configuration file as follows: ``` +And pluggable quorum replication: + +```xml + + + + + + + true + + + +``` + +The key difference from classic replication is that if `master` cannot reach any +live server with its same nodeID, it's going straight to become live, while `primary` +request it to the quorum provider, searching again for any existing live if +the quorum provider is not available (eg connectivity loss, consensus absence) or +if there's another live broker with the same nodeID alive, in an endless loop. + +In short: a started `primary` cannot become live without consensus. + > **Warning** > > Be aware that if you restart a live server while after failover has diff --git a/pom.xml b/pom.xml index 68d04d29925..76dd3c4bab9 100644 --- a/pom.xml +++ b/pom.xml @@ -64,6 +64,8 @@ artemis-distribution tests artemis-features + artemis-quorum-api + artemis-quorum-ri ActiveMQ Artemis Parent @@ -105,6 +107,9 @@ 3.9.0 2.1.2 4.1.65.Final + 5.1.0 + + 3.6.3 2.0.39.Final @@ -849,6 +854,32 @@ jakarta.security.auth.message-api ${jakarta.security.auth.message-api.version} + + + org.apache.curator + curator-recipes + ${curator.version} + + + org.apache.curator + curator-framework + ${curator.version} + + + org.apache.curator + curator-client + ${curator.version} + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + org.apache.zookeeper + zookeeper-jute + ${zookeeper.version} + diff --git a/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailoverTest.java b/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailoverTest.java index 4ccb6f9310a..e4e705bf3ce 100644 --- a/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailoverTest.java +++ b/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailoverTest.java @@ -44,9 +44,9 @@ public class ScaleDownFailoverTest extends ClusterTestBase { public void setUp() throws Exception { super.setUp(); stopCount = 0; - setupLiveServer(0, isFileStorage(), false, isNetty(), true); - setupLiveServer(1, isFileStorage(), false, isNetty(), true); - setupLiveServer(2, isFileStorage(), false, isNetty(), true); + setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); + setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); + setupLiveServer(2, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration(); ScaleDownConfiguration scaleDownConfiguration2 = new ScaleDownConfiguration(); scaleDownConfiguration2.setEnabled(false); diff --git a/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailureTest.java b/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailureTest.java index e592b163717..1551c501be7 100644 --- a/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailureTest.java +++ b/tests/extra-tests/src/test/java/org/apache/activemq/artemis/tests/extras/byteman/ScaleDownFailureTest.java @@ -35,8 +35,8 @@ public class ScaleDownFailureTest extends ClusterTestBase { @Before public void setUp() throws Exception { super.setUp(); - setupLiveServer(0, isFileStorage(), false, isNetty(), true); - setupLiveServer(1, isFileStorage(), false, isNetty(), true); + setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); + setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); if (isGrouped()) { ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration(); scaleDownConfiguration.setGroupName("bill"); diff --git a/tests/integration-tests/pom.xml b/tests/integration-tests/pom.xml index d7e71e598e4..2e5b3ea4d56 100644 --- a/tests/integration-tests/pom.xml +++ b/tests/integration-tests/pom.xml @@ -51,6 +51,12 @@ test test-jar + + org.apache.activemq + artemis-quorum-ri + ${project.version} + test + org.apache.activemq.tests unit-tests diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/InfiniteRedeliveryTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/InfiniteRedeliveryTest.java index efbc8efb0e7..a69c44ec187 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/InfiniteRedeliveryTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/InfiniteRedeliveryTest.java @@ -93,12 +93,11 @@ protected void createReplicatedConfigs() throws Exception { backupConfig = createDefaultConfig(0, true); liveConfig = createDefaultConfig(0, true); - ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null); + configureReplicationPair(backupConnector, backupAcceptor, liveConnector); backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false); - ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true); - ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false); + nodeManager = new InVMNodeManager(true, backupConfig.getJournalLocation()); @@ -109,6 +108,14 @@ protected void createReplicatedConfigs() throws Exception { liveServer = createTestableServer(liveConfig, nodeManager); } + protected void configureReplicationPair(TransportConfiguration backupConnector, + TransportConfiguration backupAcceptor, + TransportConfiguration liveConnector) { + ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null); + ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true); + ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false); + } + @Before @Override diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/PluggableQuorumInfiniteRedeliveryTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/PluggableQuorumInfiniteRedeliveryTest.java new file mode 100644 index 00000000000..2fbacac2671 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/client/PluggableQuorumInfiniteRedeliveryTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.client; + +import java.util.Collections; + +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +public class PluggableQuorumInfiniteRedeliveryTest extends InfiniteRedeliveryTest { + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + private DistributedPrimitiveManagerConfiguration managerConfiguration; + + public PluggableQuorumInfiniteRedeliveryTest(String protocol, boolean useCLI) { + super(protocol, useCLI); + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + this.managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), + Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + } + + @Override + protected void configureReplicationPair(TransportConfiguration backupConnector, + TransportConfiguration backupAcceptor, + TransportConfiguration liveConnector) { + + ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, + liveConfig, liveConnector, null, + managerConfiguration, managerConfiguration); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()) + .setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true); + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterTestBase.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterTestBase.java index a6aeeaa5642..b364ad09eba 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterTestBase.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterTestBase.java @@ -17,6 +17,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.distribution; import java.io.File; +import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URI; @@ -56,9 +57,12 @@ import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; import org.apache.activemq.artemis.core.config.Configuration; import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration; import org.apache.activemq.artemis.core.postoffice.Binding; @@ -85,6 +89,7 @@ import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration; import org.apache.activemq.artemis.core.server.impl.AddressInfo; import org.apache.activemq.artemis.core.server.impl.InVMNodeManager; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; import org.apache.activemq.artemis.utils.PortCheckRule; import org.jboss.logging.Logger; @@ -92,9 +97,14 @@ import org.junit.Assert; import org.junit.Before; import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; public abstract class ClusterTestBase extends ActiveMQTestBase { + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + private static final Logger log = Logger.getLogger(ClusterTestBase.class); private static final int[] PORTS = {TransportConstants.DEFAULT_PORT, TransportConstants.DEFAULT_PORT + 1, TransportConstants.DEFAULT_PORT + 2, TransportConstants.DEFAULT_PORT + 3, TransportConstants.DEFAULT_PORT + 4, TransportConstants.DEFAULT_PORT + 5, TransportConstants.DEFAULT_PORT + 6, TransportConstants.DEFAULT_PORT + 7, TransportConstants.DEFAULT_PORT + 8, TransportConstants.DEFAULT_PORT + 9,}; @@ -134,6 +144,21 @@ protected boolean isForceUniqueStorageManagerIds() { return true; } + private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null; + + private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() { + if (pluggableQuorumConfiguration != null) { + return pluggableQuorumConfiguration; + } + try { + pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + } catch (IOException ioException) { + log.error(ioException); + return null; + } + return pluggableQuorumConfiguration; + } + @Override @Before public void setUp() throws Exception { @@ -159,11 +184,19 @@ public void setUp() throws Exception { } + public enum HAType { + SharedStore, SharedNothingReplication, PluggableQuorumReplication + } + + protected HAType haType() { + return HAType.SharedNothingReplication; + } + /** * Whether the servers share the storage or not. */ - protected boolean isSharedStore() { - return false; + protected final boolean isSharedStore() { + return HAType.SharedStore.equals(haType()); } @Override @@ -1481,14 +1514,14 @@ protected ActiveMQServer getServer(final int node) { } protected void setupServer(final int node, final boolean fileStorage, final boolean netty) throws Exception { - setupLiveServer(node, fileStorage, false, netty, false); + setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, false); } protected void setupLiveServer(final int node, final boolean fileStorage, final boolean netty, boolean isLive) throws Exception { - setupLiveServer(node, fileStorage, false, netty, isLive); + setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, isLive); } protected boolean isResolveProtocols() { @@ -1497,27 +1530,26 @@ protected boolean isResolveProtocols() { protected void setupLiveServer(final int node, final boolean fileStorage, - final boolean sharedStorage, + final HAType haType, final boolean netty, boolean liveOnly) throws Exception { if (servers[node] != null) { throw new IllegalArgumentException("Already a server at node " + node); } - HAPolicyConfiguration haPolicyConfiguration = null; + final HAPolicyConfiguration haPolicyConfiguration; if (liveOnly) { haPolicyConfiguration = new LiveOnlyPolicyConfiguration(); } else { - if (sharedStorage) - haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration(); - else - haPolicyConfiguration = new ReplicatedPolicyConfiguration(); + haPolicyConfiguration = haPolicyLiveConfiguration(haType); } Configuration configuration = createBasicConfig(node).setJournalMaxIO_AIO(1000).setThreadPoolMaxSize(10).clearAcceptorConfigurations().addAcceptorConfiguration(createTransportConfiguration(netty, true, generateParams(node, netty))).setHAPolicyConfiguration(haPolicyConfiguration).setResolveProtocols(isResolveProtocols()); ActiveMQServer server; + final boolean sharedStorage = HAType.SharedStore.equals(haType); + if (fileStorage) { if (sharedStorage) { server = createInVMFailoverServer(true, configuration, nodeManagers[node], node); @@ -1538,6 +1570,20 @@ protected void setupLiveServer(final int node, servers[node] = addServer(server); } + private HAPolicyConfiguration haPolicyLiveConfiguration(HAType haType) { + switch (haType) { + case SharedStore: + return new SharedStoreMasterPolicyConfiguration(); + case SharedNothingReplication: + return new ReplicatedPolicyConfiguration(); + case PluggableQuorumReplication: + return ReplicationPrimaryPolicyConfiguration.withDefault() + .setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration()); + default: + throw new AssertionError("Unsupported haType = " + haType); + } + } + /** * Server lacks a {@link ClusterConnectionConfiguration} necessary for the remote (replicating) * backup case. @@ -1549,14 +1595,14 @@ protected void setupLiveServer(final int node, * @param node * @param liveNode * @param fileStorage - * @param sharedStorage + * @param haType * @param netty * @throws Exception */ protected void setupBackupServer(final int node, final int liveNode, final boolean fileStorage, - final boolean sharedStorage, + final HAType haType, final boolean netty) throws Exception { if (servers[node] != null) { throw new IllegalArgumentException("Already a server at node " + node); @@ -1566,7 +1612,9 @@ protected void setupBackupServer(final int node, TransportConfiguration backupConfig = createTransportConfiguration(netty, false, generateParams(node, netty)); TransportConfiguration acceptorConfig = createTransportConfiguration(netty, true, generateParams(node, netty)); - Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(sharedStorage ? new SharedStoreSlavePolicyConfiguration() : new ReplicaPolicyConfiguration()); + final boolean sharedStorage = HAType.SharedStore.equals(haType); + + Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(haPolicyBackupConfiguration(haType)); ActiveMQServer server; @@ -1580,6 +1628,21 @@ protected void setupBackupServer(final int node, servers[node] = addServer(server); } + private HAPolicyConfiguration haPolicyBackupConfiguration(HAType haType) { + switch (haType) { + + case SharedStore: + return new SharedStoreSlavePolicyConfiguration(); + case SharedNothingReplication: + return new ReplicaPolicyConfiguration(); + case PluggableQuorumReplication: + return ReplicationBackupPolicyConfiguration.withDefault() + .setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration()); + default: + throw new AssertionError("Unsupported ha type = " + haType); + } + } + protected void setupLiveServerWithDiscovery(final int node, final String groupAddress, final int port, diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterWithBackupTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterWithBackupTest.java index ed0c637579f..5b0df53b776 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterWithBackupTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/ClusterWithBackupTest.java @@ -87,14 +87,14 @@ protected void setupCluster(final MessageLoadBalancingType messageLoadBalancingT protected void setupServers() throws Exception { // The backups - setupBackupServer(0, 3, isFileStorage(), true, isNetty()); - setupBackupServer(1, 4, isFileStorage(), true, isNetty()); - setupBackupServer(2, 5, isFileStorage(), true, isNetty()); + setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty()); // The lives - setupLiveServer(3, isFileStorage(), true, isNetty(), false); - setupLiveServer(4, isFileStorage(), true, isNetty(), false); - setupLiveServer(5, isFileStorage(), true, isNetty(), false); + setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false); } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SimpleSymmetricClusterTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SimpleSymmetricClusterTest.java index fcadf9e638d..168bbefbefa 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SimpleSymmetricClusterTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SimpleSymmetricClusterTest.java @@ -41,14 +41,14 @@ public boolean isNetty() { @Test public void testSimpleWithBackup() throws Exception { // The backups - setupBackupServer(0, 3, isFileStorage(), true, isNetty()); - setupBackupServer(1, 4, isFileStorage(), true, isNetty()); - setupBackupServer(2, 5, isFileStorage(), true, isNetty()); + setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty()); // The lives - setupLiveServer(3, isFileStorage(), true, isNetty(), false); - setupLiveServer(4, isFileStorage(), true, isNetty(), false); - setupLiveServer(5, isFileStorage(), true, isNetty(), false); + setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false); setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 3, 4, 5); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SymmetricClusterWithBackupTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SymmetricClusterWithBackupTest.java index 4fcadcdc32e..9ac836bd1a0 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SymmetricClusterWithBackupTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/distribution/SymmetricClusterWithBackupTest.java @@ -453,18 +453,18 @@ protected void setupCluster(final MessageLoadBalancingType messageLoadBalancingT @Override protected void setupServers() throws Exception { // The backups - setupBackupServer(5, 0, isFileStorage(), true, isNetty()); - setupBackupServer(6, 1, isFileStorage(), true, isNetty()); - setupBackupServer(7, 2, isFileStorage(), true, isNetty()); - setupBackupServer(8, 3, isFileStorage(), true, isNetty()); - setupBackupServer(9, 4, isFileStorage(), true, isNetty()); + setupBackupServer(5, 0, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(6, 1, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(7, 2, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(8, 3, isFileStorage(), HAType.SharedStore, isNetty()); + setupBackupServer(9, 4, isFileStorage(), HAType.SharedStore, isNetty()); // The lives - setupLiveServer(0, isFileStorage(), true, isNetty(), false); - setupLiveServer(1, isFileStorage(), true, isNetty(), false); - setupLiveServer(2, isFileStorage(), true, isNetty(), false); - setupLiveServer(3, isFileStorage(), true, isNetty(), false); - setupLiveServer(4, isFileStorage(), true, isNetty(), false); + setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(2, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false); } @Override diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTest.java index b79c418329c..a13ef3dbbb3 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTest.java @@ -60,6 +60,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy; import org.apache.activemq.artemis.core.server.files.FileMoveManager; @@ -786,7 +788,7 @@ public void testFailBack() throws Exception { ((ReplicaPolicy) haPolicy).setMaxSavedReplicatedJournalsSize(1); } - simpleFailover(haPolicy instanceof ReplicaPolicy, doFailBack); + simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, doFailBack); } @Test(timeout = 120000) @@ -816,7 +818,9 @@ public void testFailBackLiveRestartsBackupIsGone() throws Exception { Thread.sleep(100); Assert.assertFalse("backup is not running", backupServer.isStarted()); - Assert.assertFalse("must NOT be a backup", liveServer.getServer().getHAPolicy() instanceof BackupPolicy); + final boolean isBackup = liveServer.getServer().getHAPolicy() instanceof BackupPolicy || + liveServer.getServer().getHAPolicy() instanceof ReplicationBackupPolicy; + Assert.assertFalse("must NOT be a backup", isBackup); adaptLiveConfigForReplicatedFailBack(liveServer); beforeRestart(liveServer); liveServer.start(); @@ -827,7 +831,8 @@ public void testFailBackLiveRestartsBackupIsGone() throws Exception { ClientSession session2 = createSession(sf, false, false); session2.start(); ClientConsumer consumer2 = session2.createConsumer(FailoverTestBase.ADDRESS); - boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy; + final boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy || + liveServer.getServer().getHAPolicy() instanceof ReplicationPrimaryPolicy; if (replication) receiveMessages(consumer2, 0, NUM_MESSAGES, true); assertNoMoreMessages(consumer2); @@ -838,7 +843,7 @@ public void testFailBackLiveRestartsBackupIsGone() throws Exception { public void testSimpleFailover() throws Exception { HAPolicy haPolicy = backupServer.getServer().getHAPolicy(); - simpleFailover(haPolicy instanceof ReplicaPolicy, false); + simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, false); } @Test(timeout = 120000) diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTestBase.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTestBase.java index a3e3dfc0b05..1bcd820a1b6 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTestBase.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/FailoverTestBase.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.net.ServerSocket; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -36,15 +37,19 @@ import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal; import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration; import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnector; import org.apache.activemq.artemis.core.remoting.impl.invm.InVMRegistry; import org.apache.activemq.artemis.core.server.NodeManager; +import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy; import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy; +import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy; import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; import org.apache.activemq.artemis.core.server.impl.InVMNodeManager; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer; import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; @@ -52,9 +57,13 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; public abstract class FailoverTestBase extends ActiveMQTestBase { // Constants ----------------------------------------------------- + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); protected static final SimpleString ADDRESS = new SimpleString("FailoverTestAddress"); @@ -216,7 +225,34 @@ protected void createReplicatedConfigs() throws Exception { } } + protected void createPluggableReplicatedConfigs() throws Exception { + final TransportConfiguration liveConnector = getConnectorTransportConfiguration(true); + final TransportConfiguration backupConnector = getConnectorTransportConfiguration(false); + final TransportConfiguration backupAcceptor = getAcceptorTransportConfiguration(false); + + backupConfig = createDefaultInVMConfig(); + liveConfig = createDefaultInVMConfig(); + + DistributedPrimitiveManagerConfiguration managerConfiguration = + new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), + Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + + ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null, managerConfiguration, managerConfiguration); + + backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false); + + setupHAPolicyConfiguration(); + nodeManager = createReplicatedBackupNodeManager(backupConfig); + + backupServer = createTestableServer(backupConfig); + + liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true)); + + liveServer = createTestableServer(liveConfig); + } + protected void setupHAPolicyConfiguration() { + Assert.assertTrue(backupConfig.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration); ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true); ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false); } @@ -233,8 +269,13 @@ protected final void adaptLiveConfigForReplicatedFailBack(TestableServer server) configuration.getConnectorConfigurations().put(backupConnector.getName(), backupConnector); return; } - ReplicatedPolicy haPolicy = (ReplicatedPolicy) server.getServer().getHAPolicy(); - haPolicy.setCheckForLiveServer(true); + HAPolicy policy = server.getServer().getHAPolicy(); + if (policy instanceof ReplicatedPolicy) { + ((ReplicatedPolicy) policy).setCheckForLiveServer(true); + } else if (policy instanceof ReplicationPrimaryPolicy) { + Assert.assertTrue("Adapting won't work for the current configuration", ((ReplicationPrimaryPolicy) policy).isCheckForLiveServer()); + } + } @Override diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverReplicationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverReplicationTest.java index 1b1388970de..cc187a2d435 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverReplicationTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverReplicationTest.java @@ -19,8 +19,8 @@ public class GroupingFailoverReplicationTest extends GroupingFailoverTestBase { @Override - protected boolean isSharedStore() { - return false; + protected HAType haType() { + return HAType.SharedNothingReplication; } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverSharedServerTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverSharedServerTest.java index a2849447b6d..9d576683b38 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverSharedServerTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverSharedServerTest.java @@ -19,7 +19,7 @@ public class GroupingFailoverSharedServerTest extends GroupingFailoverTestBase { @Override - protected boolean isSharedStore() { - return true; + protected HAType haType() { + return HAType.SharedStore; } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverTestBase.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverTestBase.java index de8c02c3e5b..2face9db413 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverTestBase.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/GroupingFailoverTestBase.java @@ -26,22 +26,26 @@ import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.core.server.cluster.impl.MessageLoadBalancingType; import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; +import org.apache.activemq.artemis.utils.Wait; import org.junit.Test; public abstract class GroupingFailoverTestBase extends ClusterTestBase { @Test public void testGroupingLocalHandlerFails() throws Exception { - setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty()); + setupBackupServer(2, 0, isFileStorage(), haType(), isNetty()); - setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false); + setupLiveServer(0, isFileStorage(), haType(), isNetty(), false); - setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false); + setupLiveServer(1, isFileStorage(), haType(), isNetty(), false); setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1); @@ -54,10 +58,18 @@ public void testGroupingLocalHandlerFails() throws Exception { setUpGroupHandler(GroupingHandlerConfiguration.TYPE.REMOTE, 1); setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2); - if (!isSharedStore()) { - ((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); - ((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); - ((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + switch (haType()) { + + case SharedNothingReplication: + ((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + ((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); + ((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + break; + case PluggableQuorumReplication: + ((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + ((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); + ((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + break; } startServers(0, 1, 2); @@ -129,11 +141,11 @@ public void waitForBackupTopologyAnnouncement(ClientSessionFactory sf) throws Ex @Test public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception { - setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty()); + setupBackupServer(2, 0, isFileStorage(), haType(), isNetty()); - setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false); + setupLiveServer(0, isFileStorage(), haType(), isNetty(), false); - setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false); + setupLiveServer(1, isFileStorage(), haType(), isNetty(), false); setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1); @@ -147,10 +159,18 @@ public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception { setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2); - if (!isSharedStore()) { - ((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); - ((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); - ((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + switch (haType()) { + + case SharedNothingReplication: + ((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + ((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); + ((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + break; + case PluggableQuorumReplication: + ((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + ((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2"); + ((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1"); + break; } startServers(0, 1, 2); @@ -187,9 +207,17 @@ public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception { verifyReceiveAllWithGroupIDRoundRobin(0, 30, 0, 1); - if (!isSharedStore()) { - SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation(); - assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS)); + switch (haType()) { + case SharedNothingReplication: { + SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation(); + assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS)); + } + break; + case PluggableQuorumReplication: { + ReplicationBackupActivation backupActivation = (ReplicationBackupActivation) servers[2].getActivation(); + Wait.assertTrue(backupActivation::isReplicaSync, TimeUnit.SECONDS.toMillis(10)); + } + break; } closeSessionFactory(0); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/LiveVoteOnBackupFailureClusterTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/LiveVoteOnBackupFailureClusterTest.java index 71281e6a400..e060e076a21 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/LiveVoteOnBackupFailureClusterTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/LiveVoteOnBackupFailureClusterTest.java @@ -49,14 +49,14 @@ protected void setupCluster(final MessageLoadBalancingType messageLoadBalancingT @Override protected void setupServers() throws Exception { // The backups - setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty()); - setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty()); - setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty()); + setupBackupServer(3, 0, isFileStorage(), haType(), isNetty()); + setupBackupServer(4, 1, isFileStorage(), haType(), isNetty()); + setupBackupServer(5, 2, isFileStorage(), haType(), isNetty()); // The lives - setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false); - setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false); - setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false); + setupLiveServer(0, isFileStorage(), haType(), isNetty(), false); + setupLiveServer(1, isFileStorage(), haType(), isNetty(), false); + setupLiveServer(2, isFileStorage(), haType(), isNetty(), false); //we need to know who is connected to who ((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group0"); @@ -71,9 +71,9 @@ protected void setupServers() throws Exception { ((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true); ((ReplicatedPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true); } - - protected boolean isSharedStorage() { - return false; + @Override + protected HAType haType() { + return HAType.SharedNothingReplication; } @Test diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/MultipleServerFailoverTestBase.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/MultipleServerFailoverTestBase.java index c13e2a7349f..a3f19cf412e 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/MultipleServerFailoverTestBase.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/MultipleServerFailoverTestBase.java @@ -16,7 +16,9 @@ */ package org.apache.activemq.artemis.tests.integration.cluster.failover; +import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.activemq.artemis.api.core.SimpleString; @@ -27,22 +29,47 @@ import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal; import org.apache.activemq.artemis.core.config.Configuration; import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration; import org.apache.activemq.artemis.core.server.ActiveMQServer; import org.apache.activemq.artemis.core.server.ActiveMQServerLogger; import org.apache.activemq.artemis.core.server.NodeManager; import org.apache.activemq.artemis.core.server.Queue; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase; import org.apache.activemq.artemis.tests.util.Wait; import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer; import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils; import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase { + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null; + + private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() { + if (pluggableQuorumConfiguration != null) { + return pluggableQuorumConfiguration; + } + try { + pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + } catch (IOException ioException) { + return null; + } + return pluggableQuorumConfiguration; + } + // Constants ----------------------------------------------------- // TODO: find a better solution for this @@ -67,7 +94,15 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase { public abstract boolean isNetty(); - public abstract boolean isSharedStore(); + public enum HAType { + SharedStore, SharedNothingReplication, PluggableQuorumReplication + } + + public abstract HAType haType(); + + protected final boolean isSharedStore() { + return ClusterTestBase.HAType.SharedStore.equals(haType()); + } public abstract String getNodeGroupName(); @@ -82,14 +117,22 @@ public void setUp() throws Exception { for (int i = 0; i < getLiveServerCount(); i++) { HAPolicyConfiguration haPolicyConfiguration = null; - - if (isSharedStore()) { - haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration(); - } else { - haPolicyConfiguration = new ReplicatedPolicyConfiguration(); - if (getNodeGroupName() != null) { - ((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i); - } + switch (haType()) { + + case SharedStore: + haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration(); + break; + case SharedNothingReplication: + haPolicyConfiguration = new ReplicatedPolicyConfiguration(); + if (getNodeGroupName() != null) { + ((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i); + } + break; + case PluggableQuorumReplication: + haPolicyConfiguration = ReplicationPrimaryPolicyConfiguration.withDefault() + .setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration()) + .setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null); + break; } Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true, i)).setHAPolicyConfiguration(haPolicyConfiguration); @@ -126,13 +169,24 @@ public void setUp() throws Exception { for (int i = 0; i < getBackupServerCount(); i++) { HAPolicyConfiguration haPolicyConfiguration = null; - if (isSharedStore()) { - haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration(); - } else { - haPolicyConfiguration = new ReplicaPolicyConfiguration(); - if (getNodeGroupName() != null) { - ((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i); - } + switch (haType()) { + + case SharedStore: + haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration(); + break; + case SharedNothingReplication: + haPolicyConfiguration = new ReplicaPolicyConfiguration(); + if (getNodeGroupName() != null) { + ((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i); + } + break; + case PluggableQuorumReplication: + haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault() + .setVoteRetries(1) + .setVoteRetryWait(1000) + .setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration()) + .setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null); + break; } Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(false, i)).setHAPolicyConfiguration(haPolicyConfiguration); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NettyReplicationStopTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NettyReplicationStopTest.java index 9f4eb9971ba..c8afba5b4f6 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NettyReplicationStopTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NettyReplicationStopTest.java @@ -103,7 +103,7 @@ public void testReplicaStop() throws Exception { final int numMessages = 10; - ReplicationEndpoint endpoint = backupServer.getServer().getReplicationEndpoint(); + ReplicationEndpoint endpoint = getReplicationEndpoint(backupServer.getServer()); endpoint.pause(); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NetworkIsolationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NetworkIsolationTest.java index aeea179e88d..3f8aabf8403 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NetworkIsolationTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/NetworkIsolationTest.java @@ -124,14 +124,14 @@ public void testDoNotActivateOnIsolation() throws Exception { liveServer.start(); - for (int i = 0; i < 1000 && backupServer.getServer().getReplicationEndpoint() != null && !backupServer.getServer().getReplicationEndpoint().isStarted(); i++) { + for (int i = 0; i < 1000 && getReplicationEndpoint(backupServer.getServer()) != null && !getReplicationEndpoint(backupServer.getServer()).isStarted(); i++) { Thread.sleep(10); } backupServer.getServer().getNetworkHealthCheck().clearAddresses(); // This will make sure the backup got synchronized after the network was activated again - Wait.assertTrue(() -> backupServer.getServer().getReplicationEndpoint().isStarted()); + Assert.assertTrue(getReplicationEndpoint(backupServer.getServer()).isStarted()); } finally { AssertionLoggerHandler.stopCapture(); } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicaTimeoutTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicaTimeoutTest.java index 91bbd20cfd7..d4a9c3d1e1e 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicaTimeoutTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicaTimeoutTest.java @@ -17,12 +17,10 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover; +import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.activemq.artemis.api.core.ActiveMQException; -import org.apache.activemq.artemis.api.core.Interceptor; import org.apache.activemq.artemis.api.core.QueueConfiguration; import org.apache.activemq.artemis.api.core.SimpleString; import org.apache.activemq.artemis.api.core.TransportConfiguration; @@ -34,16 +32,18 @@ import org.apache.activemq.artemis.core.config.Configuration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration; -import org.apache.activemq.artemis.core.protocol.core.Packet; import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl; +import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; import org.apache.activemq.artemis.core.server.NodeManager; +import org.apache.activemq.artemis.core.server.impl.Activation; import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; import org.apache.activemq.artemis.core.server.impl.InVMNodeManager; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.tests.util.Wait; import org.apache.activemq.artemis.logs.AssertionLoggerHandler; -import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection; import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer; import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; @@ -77,7 +77,9 @@ protected NodeManager createReplicatedBackupNodeManager(Configuration backupConf } protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception { - boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration; + boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicationBackupPolicyConfiguration || + config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || + config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration; return new SameProcessActiveMQServer(createInVMFailoverServer(true, config, nodeManager, isBackup ? 2 : 1)); } @@ -119,6 +121,19 @@ protected void crash(TestableServer liveServer, liveServer.crash(true, true, sessions); } + protected void configureReplicationPair(Configuration backupConfig, + Configuration liveConfig, + TransportConfiguration backupConnector, + TransportConfiguration backupAcceptor, + TransportConfiguration liveConnector) throws IOException { + ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null); + ((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000); + ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000); + ((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false); + } + @Test//(timeout = 120000) public void testFailbackTimeout() throws Exception { AssertionLoggerHandler.startCapture(); @@ -134,19 +149,13 @@ public void testFailbackTimeout() throws Exception { Configuration backupConfig = createDefaultInVMConfig(); Configuration liveConfig = createDefaultInVMConfig(); - ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null); - ((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000); - ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000); + configureReplicationPair(backupConfig, liveConfig, backupConnector, backupAcceptor, liveConnector); backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)). setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false); liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)). setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false); - ((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); - ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); - ((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false); - NodeManager nodeManager = createReplicatedBackupNodeManager(backupConfig); backupServer = createTestableServer(backupConfig, nodeManager); @@ -155,8 +164,6 @@ public void testFailbackTimeout() throws Exception { liveServer = createTestableServer(liveConfig, nodeManager); - AtomicBoolean ignoreIntercept = new AtomicBoolean(false); - final TestableServer theBackup = backupServer; liveServer.start(); @@ -174,23 +181,30 @@ public void testFailbackTimeout() throws Exception { Wait.assertTrue(backupServer.getServer()::isActive); - ignoreIntercept.set(true); - ((ActiveMQServerImpl) backupServer.getServer()).setAfterActivationCreated(new Runnable() { @Override public void run() { - //theBackup.getServer().getActivation() - - SharedNothingBackupActivation activation = (SharedNothingBackupActivation) theBackup.getServer().getActivation(); - activation.getReplicationEndpoint().addOutgoingInterceptorForReplication(new Interceptor() { - @Override - public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException { - if (ignoreIntercept.get() && packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) { + final Activation backupActivation = theBackup.getServer().getActivation(); + if (backupActivation instanceof SharedNothingBackupActivation) { + SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation; + ReplicationEndpoint repEnd = activation.getReplicationEndpoint(); + repEnd.addOutgoingInterceptorForReplication((packet, connection) -> { + if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) { return false; } return true; - } - }); + }); + } else if (backupActivation instanceof ReplicationBackupActivation) { + ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation; + activation.spyReplicationEndpointCreation(replicationEndpoint -> { + replicationEndpoint.addOutgoingInterceptorForReplication((packet, connection) -> { + if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) { + return false; + } + return true; + }); + }); + } } }); @@ -198,7 +212,9 @@ public boolean intercept(Packet packet, RemotingConnection connection) throws Ac Assert.assertTrue(Wait.waitFor(() -> AssertionLoggerHandler.findText("AMQ229114"))); - Wait.assertFalse(liveServer.getServer()::isStarted); + if (expectLiveSuicide()) { + Wait.assertFalse(liveServer.getServer()::isStarted); + } } finally { if (sf != null) { @@ -218,4 +234,8 @@ public boolean intercept(Packet packet, RemotingConnection connection) throws Ac } } + protected boolean expectLiveSuicide() { + return true; + } + } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedDistributionTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedDistributionTest.java index a7f058c1219..35f396672ea 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedDistributionTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedDistributionTest.java @@ -178,9 +178,9 @@ private void fail(final ClientSession session) throws InterruptedException { public void setUp() throws Exception { super.setUp(); - setupLiveServer(1, true, isSharedStore(), true, false); - setupLiveServer(3, true, isSharedStore(), true, false); - setupBackupServer(2, 3, true, isSharedStore(), true); + setupLiveServer(1, true, haType(), true, false); + setupLiveServer(3, true, haType(), true, false); + setupBackupServer(2, 3, true, haType(), true); final String address = ReplicatedDistributionTest.ADDRESS.toString(); // notice the abuse of the method call, '3' is not a backup for '1' @@ -210,7 +210,7 @@ public void setUp() throws Exception { } @Override - protected boolean isSharedStore() { - return false; + protected HAType haType() { + return HAType.SharedNothingReplication; } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverExtraBackupsTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverExtraBackupsTest.java index 9e8d2902cd6..6712c74692b 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverExtraBackupsTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverExtraBackupsTest.java @@ -29,6 +29,7 @@ import org.apache.activemq.artemis.api.core.client.FailoverEventType; import org.apache.activemq.artemis.api.core.client.ServerLocator; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; import org.apache.activemq.artemis.core.server.ActiveMQServer; import org.apache.activemq.artemis.tests.util.Wait; import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; @@ -51,8 +52,16 @@ private void waitForSync(ActiveMQServer server) throws Exception { @Override @Test public void testStartLiveFirst() throws Exception { - ((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); - ((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + switch (haType()) { + case SharedNothingReplication: + ((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); + ((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + break; + case PluggableQuorumReplication: + ((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); + ((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + break; + } startServers(liveServers); backupServers.get(0).start(); @@ -85,8 +94,17 @@ private void startServers(List servers) throws Exception { @Override @Test public void testStartBackupFirst() throws Exception { - ((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); - ((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + switch (haType()) { + case SharedNothingReplication: + ((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); + ((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + break; + case PluggableQuorumReplication: + ((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0"); + ((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1"); + break; + } + startServers(backupServers); startServers(liveServers); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverTest.java index 82777a71db3..383f97b8d37 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedMultipleServerFailoverTest.java @@ -16,6 +16,9 @@ */ package org.apache.activemq.artemis.tests.integration.cluster.failover; +import java.util.Arrays; +import java.util.Collection; + import org.apache.activemq.artemis.api.core.QueueConfiguration; import org.apache.activemq.artemis.api.core.client.ClientConsumer; import org.apache.activemq.artemis.api.core.client.ClientMessage; @@ -25,9 +28,20 @@ import org.apache.activemq.artemis.api.core.client.ServerLocator; import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +@RunWith(Parameterized.class) public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailoverTestBase { + @Parameterized.Parameter + public HAType haType; + + @Parameterized.Parameters(name = "ha={0}") + public static Collection getParams() { + return Arrays.asList(new Object[][]{{HAType.SharedNothingReplication}, {HAType.PluggableQuorumReplication}}); + } + @Test public void testStartLiveFirst() throws Exception { for (TestableServer liveServer : liveServers) { @@ -140,8 +154,8 @@ public boolean isNetty() { } @Override - public boolean isSharedStore() { - return false; + public HAType haType() { + return haType; } @Override diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDistributionTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDistributionTest.java index ee972e92796..ea3accd1f43 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDistributionTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDistributionTest.java @@ -19,7 +19,7 @@ public class SharedStoreDistributionTest extends ReplicatedDistributionTest { @Override - protected boolean isSharedStore() { - return true; + protected HAType haType() { + return HAType.SharedStore; } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDontWaitForActivationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDontWaitForActivationTest.java index a4424e43f35..f23e59ee48d 100755 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDontWaitForActivationTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreDontWaitForActivationTest.java @@ -41,8 +41,8 @@ private void setupServers() throws Exception { // 1. configure 0 as backup of one to share the same node manager and file // storage locations - setupBackupServer(0, 1, isFileStorage(), true, isNetty()); - setupLiveServer(1, isFileStorage(), true, isNetty(), false); + setupBackupServer(0, 1, isFileStorage(), HAType.SharedStore, isNetty()); + setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false); // now reconfigure the HA policy for both servers to master with automatic // failover and wait-for-activation disabled. diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreMetricsLeakTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreMetricsLeakTest.java index 43c904bda17..c78fba54113 100755 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreMetricsLeakTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreMetricsLeakTest.java @@ -40,8 +40,8 @@ public void setUp() throws Exception { } private void setupServers() throws Exception { - setupLiveServer(0, isFileStorage(), true, isNetty(), false); - setupBackupServer(1, 0, isFileStorage(), true, isNetty()); + setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupBackupServer(1, 0, isFileStorage(), HAType.SharedStore, isNetty()); getServer(0).getConfiguration().setHAPolicyConfiguration(new SharedStoreMasterPolicyConfiguration().setFailoverOnServerShutdown(true)); getServer(0).getConfiguration().setMetricsConfiguration(new MetricsConfiguration().setJvmThread(false).setJvmGc(false).setJvmMemory(false).setPlugin(new SimpleMetricsPlugin().init(null))); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreScaleDownBackupTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreScaleDownBackupTest.java index fb4c5002d8b..9bd16e8b48d 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreScaleDownBackupTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/SharedStoreScaleDownBackupTest.java @@ -41,9 +41,9 @@ protected boolean isNetty() { public void setUp() throws Exception { super.setUp(); - setupLiveServer(0, isFileStorage(), true, isNetty(), false); - setupLiveServer(1, isFileStorage(), true, isNetty(), false); - setupBackupServer(2, 0, isFileStorage(), true, isNetty()); + setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false); + setupBackupServer(2, 0, isFileStorage(), HAType.SharedStore, isNetty()); setupClusterConnection("cluster0", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1); setupClusterConnection("cluster1", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 1, 0); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/StaticClusterWithBackupFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/StaticClusterWithBackupFailoverTest.java index fa39efd2acb..be5d841035e 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/StaticClusterWithBackupFailoverTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/StaticClusterWithBackupFailoverTest.java @@ -42,13 +42,13 @@ protected boolean isSharedStorage() { @Override protected void setupServers() throws Exception { // The backups - setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty()); - setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty()); - setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty()); + setupBackupServer(3, 0, isFileStorage(), haType(), isNetty()); + setupBackupServer(4, 1, isFileStorage(), haType(), isNetty()); + setupBackupServer(5, 2, isFileStorage(), haType(), isNetty()); // The lives - setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false); - setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false); - setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false); + setupLiveServer(0, isFileStorage(), haType(), isNetty(), false); + setupLiveServer(1, isFileStorage(), haType(), isNetty(), false); + setupLiveServer(2, isFileStorage(), haType(), isNetty(), false); } } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupAuthenticationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupAuthenticationTest.java new file mode 100644 index 00000000000..b1e6a94dc2a --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupAuthenticationTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import java.util.Arrays; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.Interceptor; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.core.protocol.core.Packet; +import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl; +import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection; +import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase; +import org.apache.activemq.artemis.tests.integration.cluster.failover.FakeServiceComponent; +import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils; +import org.apache.activemq.artemis.tests.util.Wait; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static java.util.Arrays.asList; + +@RunWith(Parameterized.class) +public class PluggableQuorumBackupAuthenticationTest extends FailoverTestBase { + + private static CountDownLatch registrationStarted; + + @Parameterized.Parameter + public boolean useNetty; + + @Parameterized.Parameters(name = "useNetty={1}") + public static Iterable getParams() { + return asList(new Object[][]{{false}, {true}}); + } + + @Override + @Before + public void setUp() throws Exception { + startBackupServer = false; + registrationStarted = new CountDownLatch(1); + super.setUp(); + } + + @Test + public void testWrongPasswordSetting() throws Exception { + FakeServiceComponent fakeServiceComponent = new FakeServiceComponent("fake web server"); + Wait.assertTrue(liveServer.getServer()::isActive); + waitForServerToStart(liveServer.getServer()); + backupServer.start(); + backupServer.getServer().addExternalComponent(fakeServiceComponent, true); + assertTrue(registrationStarted .await(5, TimeUnit.SECONDS)); + /* + * can't intercept the message at the backup, so we intercept the registration message at the + * live. + */ + Wait.waitFor(() -> !backupServer.isStarted()); + assertFalse("backup should have stopped", backupServer.isStarted()); + Wait.assertFalse(fakeServiceComponent::isStarted); + backupServer.stop(); + liveServer.stop(); + } + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + backupConfig.setClusterPassword("crocodile"); + liveConfig.setIncomingInterceptorClassNames(Arrays.asList(NotifyingInterceptor.class.getName())); + backupConfig.setSecurityEnabled(true); + liveConfig.setSecurityEnabled(true); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } + + @Override + protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) { + return useNetty ? getNettyAcceptorTransportConfiguration(live) : + TransportConfigurationUtils.getInVMAcceptor(live); + } + + @Override + protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) { + return useNetty ? getNettyConnectorTransportConfiguration(live) : + TransportConfigurationUtils.getInVMConnector(live); + } + + public static final class NotifyingInterceptor implements Interceptor { + + @Override + public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException { + if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) { + registrationStarted.countDown(); + } else if (packet.getType() == PacketImpl.CLUSTER_CONNECT) { + registrationStarted.countDown(); + } + return true; + } + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupSyncJournalTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupSyncJournalTest.java new file mode 100644 index 00000000000..6f620a64f22 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumBackupSyncJournalTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.tests.integration.cluster.failover.BackupSyncJournalTest; + +public class PluggableQuorumBackupSyncJournalTest extends BackupSyncJournalTest { + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()) + .setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()) + .setMaxSavedReplicatedJournalsSize(2) + .setAllowFailBack(true); + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumExtraBackupReplicatedFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumExtraBackupReplicatedFailoverTest.java new file mode 100644 index 00000000000..e9f7576dc06 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumExtraBackupReplicatedFailoverTest.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import java.util.Arrays; + +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.core.server.ActiveMQServer; +import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase; +import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer; +import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils; +import org.apache.activemq.artemis.tests.util.Wait; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PluggableQuorumExtraBackupReplicatedFailoverTest extends FailoverTestBase { + + private static final String GROUP_NAME = "foo"; + + @Parameterized.Parameter + public boolean useGroupName; + + @Parameterized.Parameters(name = "useGroupName={0}") + public static Iterable getParams() { + return Arrays.asList(new Object[][]{{false}, {true}}); + } + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + if (useGroupName) { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setGroupName(GROUP_NAME); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setGroupName(GROUP_NAME); + } + } + + @Override + protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) { + return TransportConfigurationUtils.getInVMAcceptor(live); + } + + @Override + protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) { + return TransportConfigurationUtils.getInVMConnector(live); + } + + @Test + public void testExtraBackupReplicates() throws Exception { + Configuration secondBackupConfig = backupConfig.copy(); + String secondBackupGroupName = ((ReplicationBackupPolicyConfiguration) secondBackupConfig.getHAPolicyConfiguration()).getGroupName(); + Assert.assertEquals(((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).getGroupName(), + secondBackupGroupName); + if (useGroupName) { + Assert.assertEquals(GROUP_NAME, secondBackupGroupName); + } else { + Assert.assertNull(secondBackupGroupName); + } + TestableServer secondBackupServer = createTestableServer(secondBackupConfig); + secondBackupConfig.setBindingsDirectory(getBindingsDir(1, true)) + .setJournalDirectory(getJournalDir(1, true)) + .setPagingDirectory(getPageDir(1, true)) + .setLargeMessagesDirectory(getLargeMessagesDir(1, true)) + .setSecurityEnabled(false); + + waitForRemoteBackupSynchronization(backupServer.getServer()); + + secondBackupServer.start(); + Thread.sleep(5000); + backupServer.stop(); + waitForSync(secondBackupServer.getServer()); + waitForRemoteBackupSynchronization(secondBackupServer.getServer()); + + } + + private void waitForSync(ActiveMQServer server) throws Exception { + Wait.waitFor(server::isReplicaSync); + } + +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumGroupingFailoverReplicationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumGroupingFailoverReplicationTest.java new file mode 100644 index 00000000000..80ea3764708 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumGroupingFailoverReplicationTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase; +import org.apache.activemq.artemis.tests.integration.cluster.failover.GroupingFailoverTestBase; + +public class PluggableQuorumGroupingFailoverReplicationTest extends GroupingFailoverTestBase { + + @Override + protected ClusterTestBase.HAType haType() { + return HAType.PluggableQuorumReplication; + } + +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyNoGroupNameReplicatedFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyNoGroupNameReplicatedFailoverTest.java new file mode 100644 index 00000000000..eb2f1dc2a05 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyNoGroupNameReplicatedFailoverTest.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.concurrent.TimeUnit; + +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpHandler; +import com.sun.net.httpserver.HttpServer; +import org.apache.activemq.artemis.api.core.QueueConfiguration; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.api.core.client.ClientSession; +import org.apache.activemq.artemis.component.WebServerComponent; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.core.server.ActiveMQServer; +import org.apache.activemq.artemis.core.server.ServiceComponent; +import org.apache.activemq.artemis.dto.AppDTO; +import org.apache.activemq.artemis.dto.WebServerDTO; +import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTest; +import org.apache.activemq.artemis.tests.util.Wait; +import org.junit.Assert; +import org.junit.Test; + +public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends FailoverTest { + + protected void beforeWaitForRemoteBackupSynchronization() { + } + + private void waitForSync(ActiveMQServer server) throws Exception { + Wait.waitFor(server::isReplicaSync); + } + + /** + * Default maxSavedReplicatedJournalsSize is 2, this means the backup will fall back to replicated only twice, after this + * it is stopped permanently. + */ + @Test(timeout = 120000) + public void testReplicatedFailback() throws Exception { + try { + beforeWaitForRemoteBackupSynchronization(); + + waitForSync(backupServer.getServer()); + + createSessionFactory(); + + ClientSession session = createSession(sf, true, true); + + session.createQueue(new QueueConfiguration(ADDRESS)); + + crash(session); + + liveServer.start(); + + waitForSync(liveServer.getServer()); + + waitForSync(backupServer.getServer()); + + waitForServerToStart(liveServer.getServer()); + + session = createSession(sf, true, true); + + crash(session); + + liveServer.start(); + + waitForSync(liveServer.getServer()); + + waitForSync(backupServer.getServer()); + + waitForServerToStart(liveServer.getServer()); + + session = createSession(sf, true, true); + + crash(session); + + liveServer.start(); + + waitForSync(liveServer.getServer()); + + liveServer.getServer().waitForActivation(5, TimeUnit.SECONDS); + + waitForSync(liveServer.getServer()); + + waitForServerToStart(backupServer.getServer()); + + assertTrue(backupServer.getServer().isStarted()); + + } finally { + if (sf != null) { + sf.close(); + } + try { + liveServer.getServer().stop(); + } catch (Throwable ignored) { + } + try { + backupServer.getServer().stop(); + } catch (Throwable ignored) { + } + } + } + + @Test + public void testReplicatedFailbackBackupFromLiveBackToBackup() throws Exception { + + InetSocketAddress address = new InetSocketAddress("127.0.0.1", 8787); + HttpServer httpServer = HttpServer.create(address, 100); + httpServer.start(); + + try { + httpServer.createContext("/", new HttpHandler() { + @Override + public void handle(HttpExchange t) throws IOException { + String response = "This is a unit test"; + t.sendResponseHeaders(200, response.length()); + OutputStream os = t.getResponseBody(); + os.write(response.getBytes()); + os.close(); + } + }); + WebServerDTO wdto = new WebServerDTO(); + AppDTO appDTO = new AppDTO(); + appDTO.war = "console.war"; + appDTO.url = "console"; + wdto.apps = new ArrayList(); + wdto.apps.add(appDTO); + wdto.bind = "http://localhost:0"; + wdto.path = "console"; + WebServerComponent webServerComponent = new WebServerComponent(); + webServerComponent.configure(wdto, ".", "."); + webServerComponent.start(); + + backupServer.getServer().getNetworkHealthCheck().parseURIList("http://localhost:8787"); + Assert.assertTrue(backupServer.getServer().getNetworkHealthCheck().isStarted()); + backupServer.getServer().addExternalComponent(webServerComponent, false); + // this is called when backup servers go from live back to backup + backupServer.getServer().fail(true); + Assert.assertTrue(backupServer.getServer().getNetworkHealthCheck().isStarted()); + Assert.assertTrue(backupServer.getServer().getExternalComponents().get(0).isStarted()); + ((ServiceComponent) (backupServer.getServer().getExternalComponents().get(0))).stop(true); + } finally { + httpServer.stop(0); + } + + } + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()) + .setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()) + .setMaxSavedReplicatedJournalsSize(2) + .setAllowFailBack(true); + } + + @Override + protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) { + return getNettyAcceptorTransportConfiguration(live); + } + + @Override + protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) { + return getNettyConnectorTransportConfiguration(live); + } + + @Override + protected void crash(boolean waitFailure, ClientSession... sessions) throws Exception { + if (sessions.length > 0) { + for (ClientSession session : sessions) { + waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer()); + } + } else { + waitForRemoteBackup(null, 5, true, backupServer.getServer()); + } + super.crash(waitFailure, sessions); + } + + @Override + protected void crash(ClientSession... sessions) throws Exception { + if (sessions.length > 0) { + for (ClientSession session : sessions) { + waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer()); + } + } else { + waitForRemoteBackup(null, 5, true, backupServer.getServer()); + } + super.crash(sessions); + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyReplicationStopTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyReplicationStopTest.java new file mode 100644 index 00000000000..1381cad3c41 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumNettyReplicationStopTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.tests.integration.cluster.failover.NettyReplicationStopTest; + +public class PluggableQuorumNettyReplicationStopTest extends NettyReplicationStopTest { + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } + +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumPageCleanupWhileReplicaCatchupTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumPageCleanupWhileReplicaCatchupTest.java new file mode 100644 index 00000000000..b5b8b3af027 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumPageCleanupWhileReplicaCatchupTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.tests.integration.cluster.failover.PageCleanupWhileReplicaCatchupTest; + +public class PluggableQuorumPageCleanupWhileReplicaCatchupTest extends PageCleanupWhileReplicaCatchupTest { + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } + +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicaTimeoutTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicaTimeoutTest.java new file mode 100644 index 00000000000..4c15d7bd3bd --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicaTimeoutTest.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import java.io.IOException; +import java.util.Collections; + +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.apache.activemq.artemis.tests.integration.cluster.failover.ReplicaTimeoutTest; +import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +public class PluggableQuorumReplicaTimeoutTest extends ReplicaTimeoutTest { + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Override + protected void configureReplicationPair(Configuration backupConfig, + Configuration liveConfig, + TransportConfiguration backupConnector, + TransportConfiguration backupAcceptor, + TransportConfiguration liveConnector) throws IOException { + DistributedPrimitiveManagerConfiguration managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + + ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, + liveConfig, liveConnector, null, + managerConfiguration, managerConfiguration); + ReplicationPrimaryPolicyConfiguration primaryConfiguration = ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()); + primaryConfiguration.setInitialReplicationSyncTimeout(1000); + primaryConfiguration.setCheckForLiveServer(true); + ReplicationBackupPolicyConfiguration backupConfiguration = ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()); + backupConfiguration.setInitialReplicationSyncTimeout(1000); + backupConfiguration.setMaxSavedReplicatedJournalsSize(2) + .setAllowFailBack(true); + } + + @Override + protected boolean expectLiveSuicide() { + return false; + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedDistributionTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedDistributionTest.java new file mode 100644 index 00000000000..afbd01e9e6a --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedDistributionTest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.tests.integration.cluster.failover.ReplicatedDistributionTest; + +public class PluggableQuorumReplicatedDistributionTest extends ReplicatedDistributionTest { + + @Override + protected HAType haType() { + return HAType.PluggableQuorumReplication; + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageFailoverTest.java similarity index 63% rename from tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageFailoverTest.java rename to tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageFailoverTest.java index 7479257bd28..9d7cb4d3b24 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageFailoverTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageFailoverTest.java @@ -14,23 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.activemq.artemis.tests.integration.cluster.failover; +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; import org.apache.activemq.artemis.api.core.client.ClientSession; -import org.apache.activemq.artemis.core.client.impl.ClientSessionInternal; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.tests.integration.cluster.failover.LargeMessageFailoverTest; -public class ReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest { +public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest { @Override protected void createConfigs() throws Exception { - createReplicatedConfigs(); + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); } @Override protected void crash(boolean waitFailure, ClientSession... sessions) throws Exception { if (sessions.length > 0) { for (ClientSession session : sessions) { - waitForRemoteBackup(((ClientSessionInternal) session).getSessionFactory(), 5, true, backupServer.getServer()); + waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer()); } } else { waitForRemoteBackup(null, 5, true, backupServer.getServer()); @@ -42,11 +50,12 @@ protected void crash(boolean waitFailure, ClientSession... sessions) throws Exce protected void crash(ClientSession... sessions) throws Exception { if (sessions.length > 0) { for (ClientSession session : sessions) { - waitForRemoteBackup(((ClientSessionInternal) session).getSessionFactory(), 5, true, backupServer.getServer()); + waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer()); } } else { waitForRemoteBackup(null, 5, true, backupServer.getServer()); } super.crash(sessions); } + } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageWithDelayFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest.java similarity index 73% rename from tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageWithDelayFailoverTest.java rename to tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest.java index cb6ae62fd16..5954d8992db 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ReplicatedLargeMessageWithDelayFailoverTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest.java @@ -14,17 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.activemq.artemis.tests.integration.cluster.failover; +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; import org.apache.activemq.artemis.api.core.client.ClientSession; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; import org.apache.activemq.artemis.tests.integration.cluster.util.BackupSyncDelay; import org.junit.After; import org.junit.Before; -/** - * See {@link BackupSyncDelay} for the rationale about these 'WithDelay' tests. - */ -public class ReplicatedLargeMessageWithDelayFailoverTest extends ReplicatedLargeMessageFailoverTest { +public class PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest extends PluggableQuorumReplicatedLargeMessageFailoverTest { private BackupSyncDelay syncDelay; @@ -60,10 +59,23 @@ protected void crash(boolean waitFailure, ClientSession... sessions) throws Exce super.crash(waitFailure, sessions); } + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()) + .setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } + @Override @After public void tearDown() throws Exception { syncDelay.deliverUpToDateMsg(); super.tearDown(); } + } diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedPagingFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedPagingFailoverTest.java new file mode 100644 index 00000000000..d80460855ba --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/quorum/PluggableQuorumReplicatedPagingFailoverTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum; + +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.tests.integration.cluster.failover.PagingFailoverTest; + +public class PluggableQuorumReplicatedPagingFailoverTest extends PagingFailoverTest { + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/util/BackupSyncDelay.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/util/BackupSyncDelay.java index 9ddffd389a2..b0af71b3675 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/util/BackupSyncDelay.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/util/BackupSyncDelay.java @@ -32,6 +32,8 @@ import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationStartSyncMessage; import org.apache.activemq.artemis.core.replication.ReplicationEndpoint; import org.apache.activemq.artemis.core.server.ActiveMQServer; +import org.apache.activemq.artemis.core.server.impl.Activation; +import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation; import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation; import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection; @@ -94,8 +96,18 @@ public BackupSyncDelay(TestableServer backupServer, TestableServer liveServer) { public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException { if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) { try { - SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backup.getActivation(); - ReplicationEndpoint repEnd = activation.getReplicationEndpoint(); + Activation backupActivation = backup.getActivation(); + ReplicationEndpoint repEnd = null; + if (backupActivation instanceof SharedNothingBackupActivation) { + SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation; + repEnd = activation.getReplicationEndpoint(); + } else if (backupActivation instanceof ReplicationBackupActivation) { + ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation; + repEnd = activation.getReplicationEndpoint(); + } + if (repEnd == null) { + throw new NullPointerException("replication endpoint isn't supposed to be null"); + } handler.addSubHandler(repEnd); Channel repChannel = repEnd.getChannel(); repChannel.setHandler(handler); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/openwire/OpenWireProtocolManagerTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/openwire/OpenWireProtocolManagerTest.java index 4ee2a6c76cd..e1d6ce8a079 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/openwire/OpenWireProtocolManagerTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/openwire/OpenWireProtocolManagerTest.java @@ -78,7 +78,7 @@ static final class DummyServer extends ActiveMQServerImpl { @Override public ClusterManager getClusterManager() { - return new ClusterManager(getExecutorFactory(), this, null, null, null, null, null, false); + return new ClusterManager(getExecutorFactory(), this, null, null, null, null, null, true); } @Override diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationFlowControlTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationFlowControlTest.java new file mode 100644 index 00000000000..11e43802462 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationFlowControlTest.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.tests.integration.replication; + +import java.io.IOException; +import java.util.Collections; + +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +public class PluggableQuorumReplicationFlowControlTest extends SharedNothingReplicationFlowControlTest { + + private DistributedPrimitiveManagerConfiguration managerConfiguration; + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Before + public void init() throws IOException { + managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + } + + @Override + protected HAPolicyConfiguration createReplicationBackupConfiguration() { + ReplicationBackupPolicyConfiguration haPolicy = ReplicationBackupPolicyConfiguration.withDefault(); + haPolicy.setDistributedManagerConfiguration(managerConfiguration); + haPolicy.setClusterName("cluster"); + // fail-fast in order to let the backup to quickly retry syncing with primary + haPolicy.setVoteRetries(0); + return haPolicy; + } + + @Override + protected HAPolicyConfiguration createReplicationLiveConfiguration() { + ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault(); + haPolicy.setDistributedManagerConfiguration(managerConfiguration); + haPolicy.setCheckForLiveServer(false); + return haPolicy; + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationOrderTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationOrderTest.java new file mode 100644 index 00000000000..f5ed720792b --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationOrderTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.replication; + +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static java.util.Arrays.asList; + +@RunWith(Parameterized.class) +public class PluggableQuorumReplicationOrderTest extends ReplicationOrderTest { + + @Parameterized.Parameter + public boolean useNetty; + + @Parameterized.Parameters(name = "useNetty={1}") + public static Iterable getParams() { + return asList(new Object[][]{{false}, {true}}); + } + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()) + .setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()) + .setMaxSavedReplicatedJournalsSize(2) + .setAllowFailBack(true); + } + + @Override + protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) { + return useNetty ? getNettyAcceptorTransportConfiguration(live) : + super.getAcceptorTransportConfiguration(live); + } + + @Override + protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) { + return useNetty ? getNettyConnectorTransportConfiguration(live) : + super.getConnectorTransportConfiguration(live); + } +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationTest.java new file mode 100644 index 00000000000..1d702b5b88b --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/PluggableQuorumReplicationTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.replication; + +import java.io.IOException; +import java.util.Collections; + +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +public class PluggableQuorumReplicationTest extends SharedNothingReplicationTest { + + private DistributedPrimitiveManagerConfiguration managerConfiguration; + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Before + public void init() throws IOException { + managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + } + + @Override + protected HAPolicyConfiguration createReplicationLiveConfiguration() { + ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault(); + haPolicy.setDistributedManagerConfiguration(managerConfiguration); + haPolicy.setCheckForLiveServer(false); + return haPolicy; + } + + @Override + protected HAPolicyConfiguration createReplicationBackupConfiguration() { + ReplicationBackupPolicyConfiguration haPolicy = ReplicationBackupPolicyConfiguration.withDefault(); + haPolicy.setDistributedManagerConfiguration(managerConfiguration); + haPolicy.setClusterName("cluster"); + return haPolicy; + } + +} diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/ReplicationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/ReplicationTest.java index 539e845927d..88da15e93d2 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/ReplicationTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/ReplicationTest.java @@ -18,6 +18,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -48,6 +49,7 @@ import org.apache.activemq.artemis.api.core.client.ServerLocator; import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration; import org.apache.activemq.artemis.core.io.IOCallback; import org.apache.activemq.artemis.core.io.SequentialFileFactory; @@ -85,6 +87,7 @@ import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl; import org.apache.activemq.artemis.core.settings.HierarchicalRepository; import org.apache.activemq.artemis.core.settings.impl.AddressSettings; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection; import org.apache.activemq.artemis.tests.util.ActiveMQTestBase; import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils; @@ -97,10 +100,26 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +@RunWith(Parameterized.class) public final class ReplicationTest extends ActiveMQTestBase { + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Parameterized.Parameter + public boolean pluggableQuorum; + + @Parameterized.Parameters(name = "PluggableQuorum={0}") + public static Iterable data() { + return Arrays.asList(new Object[][]{{true}, {false}}); + } + private ThreadFactory tFactory; private ExecutorService executor; private ExecutorFactory factory; @@ -144,7 +163,15 @@ private void setupServer(boolean useNetty, Configuration backupConfig = createDefaultInVMConfig().setHAPolicyConfiguration(new SharedStoreSlavePolicyConfiguration()).setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setIncomingInterceptorClassNames(incomingInterceptors.length > 0 ? Arrays.asList(incomingInterceptors) : new ArrayList()); - ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, liveAcceptor); + if (!pluggableQuorum) { + ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, liveAcceptor); + } else { + DistributedPrimitiveManagerConfiguration managerConfiguration = + new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), + Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString())); + + ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, liveAcceptor, managerConfiguration, managerConfiguration); + } if (extraConfig != null) { extraConfig.config(liveConfig, backupConfig); @@ -558,7 +585,7 @@ public void testReplicationLargeMessageFileClose() throws Exception { blockOnReplication(storage, manager); - LargeServerMessageImpl message1 = (LargeServerMessageImpl) backupServer.getReplicationEndpoint().getLargeMessages().get(Long.valueOf(500)); + LargeServerMessageImpl message1 = (LargeServerMessageImpl) getReplicationEndpoint(backupServer).getLargeMessages().get(Long.valueOf(500)); Assert.assertNotNull(message1); Assert.assertFalse(largeMsg.getAppendFile().isOpen()); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationFlowControlTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationFlowControlTest.java index f2a8a283097..1d923731d60 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationFlowControlTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationFlowControlTest.java @@ -47,6 +47,7 @@ import org.apache.activemq.artemis.core.client.impl.ServerLocatorImpl; import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; import org.apache.activemq.artemis.core.config.impl.ConfigurationImpl; @@ -173,7 +174,6 @@ public void testReplicationIfFlowControlled() throws Exception { SequentialFileFactory fileFactory; - File liveJournalDir = brokersFolder.getRoot().toPath().resolve("live").resolve("data").resolve("journal").toFile(); fileFactory = new MappedSequentialFileFactory(liveConfiguration.getJournalLocation(), liveConfiguration.getJournalFileSize(), false, liveConfiguration.getJournalBufferSize_NIO(), liveConfiguration.getJournalBufferTimeout_NIO(), null); JournalImpl liveMessageJournal = new JournalImpl(liveConfiguration.getJournalFileSize(), liveConfiguration.getJournalMinFiles(), liveConfiguration.getJournalPoolFiles(), liveConfiguration.getJournalCompactMinFiles(), liveConfiguration.getJournalCompactPercentage(), fileFactory, "activemq-data", "amq", fileFactory.getMaxIO()); @@ -355,6 +355,12 @@ public synchronized void close(boolean waitSync, boolean block) throws IOExcepti } } + protected HAPolicyConfiguration createReplicationLiveConfiguration() { + return new ReplicatedPolicyConfiguration() + .setVoteOnReplicationFailure(false) + .setCheckForLiveServer(false); + } + // Set a small call timeout and write buffer high water mark value to trigger replication flow control private Configuration createLiveConfiguration() throws Exception { Configuration conf = new ConfigurationImpl(); @@ -370,10 +376,7 @@ private Configuration createLiveConfiguration() throws Exception { conf.setClusterUser("mycluster"); conf.setClusterPassword("mypassword"); - ReplicatedPolicyConfiguration haPolicy = new ReplicatedPolicyConfiguration(); - haPolicy.setVoteOnReplicationFailure(false); - haPolicy.setCheckForLiveServer(false); - conf.setHAPolicyConfiguration(haPolicy); + conf.setHAPolicyConfiguration(createReplicationLiveConfiguration()); ClusterConnectionConfiguration ccconf = new ClusterConnectionConfiguration(); ccconf.setStaticConnectors(new ArrayList<>()).getStaticConnectors().add("backup"); @@ -387,6 +390,10 @@ private Configuration createLiveConfiguration() throws Exception { return conf; } + protected HAPolicyConfiguration createReplicationBackupConfiguration() { + return new ReplicaPolicyConfiguration().setClusterName("cluster"); + } + private Configuration createBackupConfiguration() throws Exception { Configuration conf = new ConfigurationImpl(); conf.setName("localhost::backup"); @@ -394,9 +401,7 @@ private Configuration createBackupConfiguration() throws Exception { File backupDir = brokersFolder.newFolder("backup"); conf.setBrokerInstance(backupDir); - ReplicaPolicyConfiguration haPolicy = new ReplicaPolicyConfiguration(); - haPolicy.setClusterName("cluster"); - conf.setHAPolicyConfiguration(haPolicy); + conf.setHAPolicyConfiguration(createReplicationBackupConfiguration()); conf.addAcceptorConfiguration("backup", "tcp://localhost:61617"); conf.addConnectorConfiguration("live", "tcp://localhost:61616"); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationTest.java index 7cc8bb93978..1d3317c40e4 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/replication/SharedNothingReplicationTest.java @@ -31,6 +31,7 @@ import org.apache.activemq.artemis.core.client.impl.ServerLocatorImpl; import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration; import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.HAPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; import org.apache.activemq.artemis.core.config.impl.ConfigurationImpl; @@ -237,6 +238,12 @@ public void addRecord(RecordInfo info) { Assert.assertTrue("The test is not valid, slow persister stopped being used", SlowMessagePersister._getInstance().used); } + protected HAPolicyConfiguration createReplicationLiveConfiguration() { + return new ReplicatedPolicyConfiguration() + .setVoteOnReplicationFailure(false) + .setCheckForLiveServer(false); + } + private Configuration createLiveConfiguration() throws Exception { Configuration conf = new ConfigurationImpl(); conf.setName("localhost::live"); @@ -251,10 +258,7 @@ private Configuration createLiveConfiguration() throws Exception { conf.setClusterUser("mycluster"); conf.setClusterPassword("mypassword"); - ReplicatedPolicyConfiguration haPolicy = new ReplicatedPolicyConfiguration(); - haPolicy.setVoteOnReplicationFailure(false); - haPolicy.setCheckForLiveServer(false); - conf.setHAPolicyConfiguration(haPolicy); + conf.setHAPolicyConfiguration(createReplicationLiveConfiguration()); ClusterConnectionConfiguration ccconf = new ClusterConnectionConfiguration(); ccconf.setStaticConnectors(new ArrayList<>()).getStaticConnectors().add("backup"); @@ -267,6 +271,10 @@ private Configuration createLiveConfiguration() throws Exception { return conf; } + protected HAPolicyConfiguration createReplicationBackupConfiguration() { + return new ReplicaPolicyConfiguration().setClusterName("cluster"); + } + private Configuration createBackupConfiguration() throws Exception { Configuration conf = new ConfigurationImpl(); conf.setName("localhost::backup"); @@ -274,9 +282,7 @@ private Configuration createBackupConfiguration() throws Exception { File backupDir = brokersFolder.newFolder("backup"); conf.setBrokerInstance(backupDir); - ReplicaPolicyConfiguration haPolicy = new ReplicaPolicyConfiguration(); - haPolicy.setClusterName("cluster"); - conf.setHAPolicyConfiguration(haPolicy); + conf.setHAPolicyConfiguration(createReplicationBackupConfiguration()); conf.addAcceptorConfiguration("backup", "tcp://localhost:61617"); conf.addConnectorConfiguration("live", "tcp://localhost:61616"); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/security/PluggableQuorumBasicSecurityManagerFailoverTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/security/PluggableQuorumBasicSecurityManagerFailoverTest.java new file mode 100644 index 00000000000..60f27a13c27 --- /dev/null +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/security/PluggableQuorumBasicSecurityManagerFailoverTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.integration.security; + +import java.util.Collections; + +import org.apache.activemq.artemis.api.core.ActiveMQException; +import org.apache.activemq.artemis.api.core.TransportConfiguration; +import org.apache.activemq.artemis.api.core.client.ClientSession; +import org.apache.activemq.artemis.api.core.client.ClientSessionFactory; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; +import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager; +import org.apache.activemq.artemis.spi.core.security.ActiveMQBasicSecurityManager; +import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase; +import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils; +import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils; +import org.junit.Assert; +import org.junit.Test; + +public class PluggableQuorumBasicSecurityManagerFailoverTest extends FailoverTestBase { + + @Override + protected void createConfigs() throws Exception { + createPluggableReplicatedConfigs(); + } + + @Override + protected void createPluggableReplicatedConfigs() throws Exception { + final TransportConfiguration liveConnector = getConnectorTransportConfiguration(true); + final TransportConfiguration backupConnector = getConnectorTransportConfiguration(false); + final TransportConfiguration backupAcceptor = getAcceptorTransportConfiguration(false); + + backupConfig = createDefaultInVMConfig(); + liveConfig = createDefaultInVMConfig(); + + DistributedPrimitiveManagerConfiguration managerConfiguration = + new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), + Collections.singletonMap("locks-folder", + tmpFolder.newFolder("manager").toString())); + + ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, + liveConfig, liveConnector, null, + managerConfiguration, managerConfiguration); + + backupConfig + .setSecurityEnabled(true) + .setBindingsDirectory(getBindingsDir(0, true)) + .setJournalDirectory(getJournalDir(0, true)) + .setPagingDirectory(getPageDir(0, true)) + .setLargeMessagesDirectory(getLargeMessagesDir(0, true)); + + setupHAPolicyConfiguration(); + nodeManager = createReplicatedBackupNodeManager(backupConfig); + + backupServer = createTestableServer(backupConfig); + + backupServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager()); + + liveConfig + .setSecurityEnabled(true) + .clearAcceptorConfigurations() + .addAcceptorConfiguration(getAcceptorTransportConfiguration(true)); + + liveServer = createTestableServer(liveConfig); + + liveServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager()); + } + + @Override + protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) { + return TransportConfigurationUtils.getInVMAcceptor(live); + } + + @Override + protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) { + return TransportConfigurationUtils.getInVMConnector(live); + } + + @Override + protected void setupHAPolicyConfiguration() { + ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true); + ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true); + } + + @Test + public void testFailover() throws Exception { + + liveServer.getServer().getActiveMQServerControl().addUser("foo", "bar", "baz", false); + + ClientSessionFactory cf = createSessionFactory(getServerLocator()); + ClientSession session = null; + + try { + session = cf.createSession("foo", "bar", false, true, true, false, 0); + } catch (ActiveMQException e) { + e.printStackTrace(); + Assert.fail("should not throw exception"); + } + + crash(session); + waitForServerToStart(backupServer.getServer()); + + try { + cf = createSessionFactory(getServerLocator()); + session = cf.createSession("foo", "bar", false, true, true, false, 0); + } catch (ActiveMQException e) { + e.printStackTrace(); + Assert.fail("should not throw exception"); + } + } +} + diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/server/ScaleDown3NodeTest.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/server/ScaleDown3NodeTest.java index 726b079e79f..d7e3061f841 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/server/ScaleDown3NodeTest.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/server/ScaleDown3NodeTest.java @@ -48,11 +48,11 @@ public class ScaleDown3NodeTest extends ClusterTestBase { @Before public void setUp() throws Exception { super.setUp(); - setupLiveServer(0, isFileStorage(), false, isNetty(), true); + setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); servers[0].getConfiguration().setSecurityEnabled(true); - setupLiveServer(1, isFileStorage(), false, isNetty(), true); + setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); servers[1].getConfiguration().setSecurityEnabled(true); - setupLiveServer(2, isFileStorage(), false, isNetty(), true); + setupLiveServer(2, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true); servers[2].getConfiguration().setSecurityEnabled(true); LiveOnlyPolicyConfiguration haPolicyConfiguration0 = (LiveOnlyPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration(); ScaleDownConfiguration scaleDownConfiguration0 = new ScaleDownConfiguration(); diff --git a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/util/ReplicatedBackupUtils.java b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/util/ReplicatedBackupUtils.java index 1a38a6ac702..e2a4fcf8f47 100644 --- a/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/util/ReplicatedBackupUtils.java +++ b/tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/util/ReplicatedBackupUtils.java @@ -18,8 +18,11 @@ import org.apache.activemq.artemis.api.core.TransportConfiguration; import org.apache.activemq.artemis.core.config.Configuration; +import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration; import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration; +import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration; public final class ReplicatedBackupUtils { @@ -48,4 +51,30 @@ public static void configureReplicationPair(Configuration backupConfig, liveConfig.setName(LIVE_NODE_NAME).addConnectorConfiguration(LIVE_NODE_NAME, liveConnector).addConnectorConfiguration(BACKUP_NODE_NAME, backupConnector).setSecurityEnabled(false).addClusterConfiguration(ActiveMQTestBase.basicClusterConnectionConfig(LIVE_NODE_NAME, BACKUP_NODE_NAME)).setHAPolicyConfiguration(new ReplicatedPolicyConfiguration()); } + + + public static void configurePluggableQuorumReplicationPair(Configuration backupConfig, + TransportConfiguration backupConnector, + TransportConfiguration backupAcceptor, + Configuration liveConfig, + TransportConfiguration liveConnector, + TransportConfiguration liveAcceptor, + DistributedPrimitiveManagerConfiguration primaryManagerConfiguration, + DistributedPrimitiveManagerConfiguration backupManagerConfiguration) { + if (backupAcceptor != null) { + backupConfig.clearAcceptorConfigurations().addAcceptorConfiguration(backupAcceptor); + } + + if (liveAcceptor != null) { + liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(liveAcceptor); + } + + backupConfig.addConnectorConfiguration(BACKUP_NODE_NAME, backupConnector).addConnectorConfiguration(LIVE_NODE_NAME, liveConnector).addClusterConfiguration(ActiveMQTestBase.basicClusterConnectionConfig(BACKUP_NODE_NAME, LIVE_NODE_NAME)) + .setHAPolicyConfiguration(ReplicationBackupPolicyConfiguration.withDefault() + .setDistributedManagerConfiguration(backupManagerConfiguration)); + + liveConfig.setName(LIVE_NODE_NAME).addConnectorConfiguration(LIVE_NODE_NAME, liveConnector).addConnectorConfiguration(BACKUP_NODE_NAME, backupConnector).setSecurityEnabled(false).addClusterConfiguration(ActiveMQTestBase.basicClusterConnectionConfig(LIVE_NODE_NAME, BACKUP_NODE_NAME)) + .setHAPolicyConfiguration(ReplicationPrimaryPolicyConfiguration.withDefault() + .setDistributedManagerConfiguration(primaryManagerConfiguration)); + } } diff --git a/tests/smoke-tests/pom.xml b/tests/smoke-tests/pom.xml index ed9c53bcbfb..e7de05f31bc 100644 --- a/tests/smoke-tests/pom.xml +++ b/tests/smoke-tests/pom.xml @@ -161,6 +161,38 @@ test test-jar + + + org.apache.curator + curator-recipes + test + + + org.apache.curator + curator-client + test + + + org.apache.curator + curator-framework + test + + + org.apache.curator + curator-test + ${curator.version} + test + + + org.apache.zookeeper + zookeeper + test + + + org.apache.zookeeper + zookeeper-jute + test + @@ -592,6 +624,48 @@ + + + test-compile + create-zk-replication-primary + + create + + + + ${basedir}/target/classes/servers/zkReplicationPrimary + true + admin + admin + ${basedir}/target/zkReplicationPrimary + + + --java-options + -Djava.rmi.server.hostname=localhost + + + + + test-compile + create-zk-replication-backup + + create + + + + ${basedir}/target/classes/servers/zkReplicationBackup + true + admin + admin + ${basedir}/target/zkReplicationBackup + + + --java-options + -Djava.rmi.server.hostname=localhost + + + + test-compile create-replicated-failback-master2 diff --git a/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/broker.xml b/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/broker.xml new file mode 100644 index 00000000000..8b97bf9831b --- /dev/null +++ b/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/broker.xml @@ -0,0 +1,138 @@ + + + + + + backup + + ./data/bindings + + ./data/journal + + ./data/largemessages + + ./data/paging + + + + + + org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager + + + + + true + + + + + + + tcp://localhost:61916 + tcp://localhost:61616 + + + + + + tcp://localhost:61916 + + + admin + + password + + + + artemis + OFF + 1 + + primary + + + + + + + + + + + + + + + + + + + + + + + + + + + DLQ + ExpiryQueue + 0 + + -1 + 10 + PAGE + true + true + true + true + + + + DLQ + ExpiryQueue + 0 + + 10MB + 1MB + + 10 + PAGE + true + true + true + true + + + + +

+ + +
+
+ + + +
+ + + diff --git a/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/management.xml b/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/management.xml new file mode 100644 index 00000000000..14bbaf22187 --- /dev/null +++ b/tests/smoke-tests/src/main/resources/servers/zkReplicationBackup/management.xml @@ -0,0 +1,20 @@ + + + + + \ No newline at end of file diff --git a/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/broker.xml b/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/broker.xml new file mode 100644 index 00000000000..2095d208306 --- /dev/null +++ b/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/broker.xml @@ -0,0 +1,137 @@ + + + + + + primary + + ./data/bindings + + ./data/journal + + ./data/largemessages + + ./data/paging + + + + + + org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager + + + + + true + + + + + + + tcp://localhost:61616 + tcp://localhost:61916 + + + + + tcp://localhost:61616 + + + admin + + password + + + + artemis + OFF + 1 + + backup + + + + + + + + + + + + + + + + + + + + + + + + + + + DLQ + ExpiryQueue + 0 + + -1 + 10 + PAGE + true + true + true + true + + + + DLQ + ExpiryQueue + 0 + + 10MB + 1MB + + 10 + PAGE + true + true + true + true + + + + +
+ + +
+
+ + + +
+
+
+
diff --git a/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/management.xml b/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/management.xml new file mode 100644 index 00000000000..576f1e59954 --- /dev/null +++ b/tests/smoke-tests/src/main/resources/servers/zkReplicationPrimary/management.xml @@ -0,0 +1,20 @@ + + + + + \ No newline at end of file diff --git a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/PluggableQuorumSinglePairTest.java b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/PluggableQuorumSinglePairTest.java new file mode 100644 index 00000000000..d5948f57a1c --- /dev/null +++ b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/PluggableQuorumSinglePairTest.java @@ -0,0 +1,276 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.tests.smoke.quorum; + +import javax.management.remote.JMXServiceURL; +import java.net.MalformedURLException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration; +import org.apache.activemq.artemis.api.core.management.ObjectNameBuilder; +import org.apache.activemq.artemis.tests.smoke.common.SmokeTestBase; +import org.apache.activemq.artemis.tests.smoke.utils.Jmx; +import org.apache.activemq.artemis.util.ServerUtil; +import org.apache.activemq.artemis.utils.Wait; +import org.jboss.logging.Logger; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.backupOf; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.containsExactNodeIds; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.decodeNetworkTopologyJson; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.liveOf; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.validateNetworkTopology; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withBackup; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withLive; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withMembers; +import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withNodes; + +@RunWith(Parameterized.class) +public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase { + + private static final Logger LOGGER = Logger.getLogger(PluggableQuorumSinglePairTest.class); + + private static final String JMX_SERVER_HOSTNAME = "localhost"; + private static final int JMX_PORT_PRIMARY = 10099; + private static final int JMX_PORT_BACKUP = 10199; + + private static final String PRIMARY_DATA_FOLDER = "ReplicationPrimary";; + private static final String BACKUP_DATA_FOLDER = "ReplicationBackup"; + + private static final int PRIMARY_PORT_OFFSET = 0; + private static final int BACKUP_PORT_OFFSET = PRIMARY_PORT_OFFSET + 100; + + public static class BrokerControl { + + final String name; + final ObjectNameBuilder objectNameBuilder; + final String dataFolder; + final JMXServiceURL jmxServiceURL; + final int portID; + + private BrokerControl(final String name, int jmxPort, String dataFolder, int portID) { + this.portID = portID; + this.dataFolder = dataFolder; + try { + jmxServiceURL = new JMXServiceURL("service:jmx:rmi:///jndi/rmi://" + JMX_SERVER_HOSTNAME + ":" + jmxPort + "/jmxrmi"); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + this.objectNameBuilder = ObjectNameBuilder.create(ActiveMQDefaultConfiguration.getDefaultJmxDomain(), name, true); + this.name = name; + } + + public Process startServer(SmokeTestBase env, int millisTimeout) throws Exception { + return env.startServer(dataFolder, portID, millisTimeout); + } + + public void cleanupData() { + SmokeTestBase.cleanupData(dataFolder); + } + + public Optional isReplicaSync() throws Exception { + return Jmx.isReplicaSync(jmxServiceURL, objectNameBuilder); + } + + public Optional isBackup() throws Exception { + return Jmx.isBackup(jmxServiceURL, objectNameBuilder); + } + + public Optional getNodeID() throws Exception { + return Jmx.getNodeID(jmxServiceURL, objectNameBuilder); + } + + public Optional listNetworkTopology() throws Exception { + return Jmx.listNetworkTopology(jmxServiceURL, objectNameBuilder); + } + } + + @Parameterized.Parameter + public boolean forceKill; + + @Parameterized.Parameters(name = "forceKill={0}") + public static Iterable getParams() { + return Arrays.asList(new Object[][]{{false}, {true}}); + } + + private final BrokerControl primary; + private final BrokerControl backup; + private final Collection brokers; + + public PluggableQuorumSinglePairTest(String brokerFolderPrefix) { + primary = new BrokerControl("primary", JMX_PORT_PRIMARY, brokerFolderPrefix + PRIMARY_DATA_FOLDER, PRIMARY_PORT_OFFSET); + backup = new BrokerControl("backup", JMX_PORT_BACKUP, brokerFolderPrefix + BACKUP_DATA_FOLDER, BACKUP_PORT_OFFSET); + brokers = Collections.unmodifiableList(Arrays.asList(primary, backup)); + } + + protected abstract boolean awaitAsyncSetupCompleted(long timeout, TimeUnit unit) throws InterruptedException; + + protected abstract void stopMajority() throws Exception; + + @Before + public void setup() throws Exception { + brokers.forEach(BrokerControl::cleanupData); + } + + @Override + @After + public void after() throws Exception { + super.after(); + } + + @Test + public void testBackupFailoverAndPrimaryFailback() throws Exception { + final int timeout = (int) TimeUnit.SECONDS.toMillis(30); + LOGGER.info("starting primary"); + Process primaryInstance = primary.startServer(this, timeout); + Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS)); + Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout); + LOGGER.info("started primary"); + LOGGER.info("starting backup"); + Process backupInstance = backup.startServer(this, 0); + Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout); + final String nodeID = primary.getNodeID().get(); + Assert.assertNotNull(nodeID); + LOGGER.infof("NodeID: %s", nodeID); + for (BrokerControl broker : brokers) { + Wait.assertTrue(() -> validateNetworkTopology(broker.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, Objects::nonNull)) + .and(withBackup(nodeID, Objects::nonNull)) + .and(withMembers(1)) + .and(withNodes(2))), timeout); + } + LOGGER.infof("primary topology is: %s", primary.listNetworkTopology().get()); + LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get()); + Assert.assertTrue(backup.isReplicaSync().get()); + LOGGER.infof("backup is synchronized with live"); + final String urlBackup = backupOf(nodeID, decodeNetworkTopologyJson(backup.listNetworkTopology().get())); + Assert.assertNotNull(urlBackup); + LOGGER.infof("backup: %s", urlBackup); + final String urlPrimary = liveOf(nodeID, decodeNetworkTopologyJson(primary.listNetworkTopology().get())); + Assert.assertNotNull(urlPrimary); + LOGGER.infof("primary: %s", urlPrimary); + Assert.assertNotEquals(urlPrimary, urlBackup); + LOGGER.info("killing primary"); + ServerUtil.killServer(primaryInstance, forceKill); + LOGGER.info("killed primary"); + Wait.assertTrue(() -> !backup.isBackup().orElse(true), timeout); + Wait.assertTrue(() -> validateNetworkTopology(backup.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, urlBackup::equals)) + .and(withBackup(nodeID, Objects::isNull)) + .and(withMembers(1)) + .and(withNodes(1))), timeout); + LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get()); + Assert.assertEquals(nodeID, backup.getNodeID().get()); + // wait a bit before restarting primary + LOGGER.info("waiting before starting primary"); + TimeUnit.SECONDS.sleep(4); + LOGGER.info("starting primary"); + primary.startServer(this, 0); + LOGGER.info("started primary"); + Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout); + Assert.assertTrue(!primary.isBackup().get()); + for (BrokerControl broker : brokers) { + Wait.assertTrue(() -> validateNetworkTopology(broker.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, urlPrimary::equals)) + .and(withBackup(nodeID, urlBackup::equals)) + .and(withMembers(1)) + .and(withNodes(2))), timeout); + } + LOGGER.infof("primary topology is: %s", primary.listNetworkTopology().get()); + LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get()); + Assert.assertTrue(backup.isReplicaSync().get()); + LOGGER.infof("backup is synchronized with live"); + Assert.assertEquals(nodeID, primary.getNodeID().get()); + } + + @Test + public void testLivePrimarySuicideOnLostQuorum() throws Exception { + final int timeout = (int) TimeUnit.SECONDS.toMillis(30); + Process primaryInstance = primary.startServer(this, timeout); + Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS)); + Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout); + final String nodeID = primary.getNodeID().get(); + Wait.assertTrue(() -> validateNetworkTopology(primary.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, Objects::nonNull)) + .and(withBackup(nodeID, Objects::isNull)) + .and(withMembers(1)) + .and(withNodes(1))), timeout); + final String urlLive = liveOf(nodeID, decodeNetworkTopologyJson(primary.listNetworkTopology().get())); + Assert.assertTrue(validateNetworkTopology(primary.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, urlLive::equals)) + .and(withBackup(nodeID, Objects::isNull)) + .and(withMembers(1)) + .and(withNodes(1)))); + stopMajority(); + Wait.waitFor(()-> !primaryInstance.isAlive(), timeout); + } + + @Test + public void testLiveBackupSuicideOnLostQuorum() throws Exception { + final int timeout = (int) TimeUnit.SECONDS.toMillis(30); + Process primaryInstance = primary.startServer(this, timeout); + Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS)); + Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout); + Process backupInstance = backup.startServer(this, 0); + Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout); + final String nodeID = primary.getNodeID().get(); + Assert.assertNotNull(nodeID); + for (BrokerControl broker : brokers) { + Wait.assertTrue(() -> validateNetworkTopology(broker.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, Objects::nonNull)) + .and(withBackup(nodeID, Objects::nonNull)) + .and(withMembers(1)) + .and(withNodes(2))), timeout); + } + Assert.assertTrue(backup.isReplicaSync().get()); + final String urlBackup = backupOf(nodeID, decodeNetworkTopologyJson(backup.listNetworkTopology().get())); + Assert.assertNotNull(urlBackup); + final String urlPrimary = liveOf(nodeID, decodeNetworkTopologyJson(primary.listNetworkTopology().get())); + Assert.assertNotNull(urlPrimary); + Assert.assertNotEquals(urlPrimary, urlBackup); + ServerUtil.killServer(primaryInstance, forceKill); + Wait.assertTrue(() -> !backup.isBackup().orElse(true), timeout); + Wait.assertTrue(() -> validateNetworkTopology(backup.listNetworkTopology().orElse(""), + containsExactNodeIds(nodeID) + .and(withLive(nodeID, urlBackup::equals)) + .and(withBackup(nodeID, Objects::isNull)) + .and(withMembers(1)) + .and(withNodes(1))), timeout); + Assert.assertEquals(nodeID, backup.getNodeID().get()); + stopMajority(); + Wait.waitFor(()-> !backupInstance.isAlive(), timeout); + } + +} + diff --git a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/ZookeeperPluggableQuorumSinglePairTest.java b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/ZookeeperPluggableQuorumSinglePairTest.java new file mode 100644 index 00000000000..df2123f7d6c --- /dev/null +++ b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/quorum/ZookeeperPluggableQuorumSinglePairTest.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.tests.smoke.quorum; + +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.curator.test.InstanceSpec; +import org.apache.curator.test.TestingCluster; +import org.apache.curator.test.TestingZooKeeperServer; +import org.jboss.logging.Logger; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +public class ZookeeperPluggableQuorumSinglePairTest extends PluggableQuorumSinglePairTest { + + private static final Logger LOGGER = Logger.getLogger(ZookeeperPluggableQuorumSinglePairTest.class); + private static final int BASE_SERVER_PORT = 6666; + // Beware: the server tick must be small enough that to let the session to be correctly expired + private static final int SERVER_TICK_MS = 100; + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + private TestingCluster testingServer; + private InstanceSpec[] clusterSpecs; + private int nodes; + + @Before + @Override + public void setup() throws Exception { + super.setup(); + nodes = 3; + clusterSpecs = new InstanceSpec[nodes]; + for (int i = 0; i < nodes; i++) { + clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1); + } + testingServer = new TestingCluster(clusterSpecs); + testingServer.start(); + Assert.assertEquals("127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668", testingServer.getConnectString()); + LOGGER.infof("Cluster of %d nodes on: %s", 3, testingServer.getConnectString()); + } + + @Override + @After + public void after() throws Exception { + try { + super.after(); + } finally { + testingServer.close(); + } + } + + public ZookeeperPluggableQuorumSinglePairTest() { + super("zk"); + } + + @Override + protected boolean awaitAsyncSetupCompleted(long timeout, TimeUnit unit) { + return true; + } + + @Override + protected void stopMajority() throws Exception { + List followers = testingServer.getServers(); + final int quorum = (nodes / 2) + 1; + for (int i = 0; i < quorum; i++) { + followers.get(i).stop(); + } + } +} diff --git a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/utils/Jmx.java b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/utils/Jmx.java new file mode 100644 index 00000000000..079e2f75b07 --- /dev/null +++ b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/utils/Jmx.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.artemis.tests.smoke.utils; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.management.MBeanServerInvocationHandler; +import javax.management.ObjectName; +import javax.management.remote.JMXConnector; +import javax.management.remote.JMXConnectorFactory; +import javax.management.remote.JMXServiceURL; +import java.io.StringReader; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import org.apache.activemq.artemis.api.core.Pair; +import org.apache.activemq.artemis.api.core.management.ActiveMQServerControl; +import org.apache.activemq.artemis.api.core.management.ObjectNameBuilder; +import org.jboss.logging.Logger; + +public class Jmx { + + private static final Logger LOGGER = Logger.getLogger(Jmx.class); + + @FunctionalInterface + public interface ThrowableFunction { + + R apply(T t) throws Throwable; + } + + private static Optional queryControl(JMXServiceURL serviceURI, + ObjectName objectName, + ThrowableFunction queryControl, + Class controlClass, + Function onThrowable) { + try { + try (JMXConnector jmx = JMXConnectorFactory.connect(serviceURI)) { + final C control = MBeanServerInvocationHandler.newProxyInstance(jmx.getMBeanServerConnection(), objectName, controlClass, false); + return Optional.ofNullable(queryControl.apply(control)); + } + } catch (Throwable t) { + return Optional.ofNullable(onThrowable.apply(t)); + } + } + + public static Optional isReplicaSync(JMXServiceURL serviceURI, ObjectNameBuilder builder) throws Exception { + return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::isReplicaSync, ActiveMQServerControl.class, throwable -> null); + } + + public static Optional isBackup(JMXServiceURL serviceURI, ObjectNameBuilder builder) throws Exception { + return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::isBackup, ActiveMQServerControl.class, throwable -> null); + } + + public static Optional getNodeID(JMXServiceURL serviceURI, ObjectNameBuilder builder) throws Exception { + return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::getNodeID, ActiveMQServerControl.class, throwable -> null); + } + + public static Optional listNetworkTopology(JMXServiceURL serviceURI, + ObjectNameBuilder builder) throws Exception { + return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::listNetworkTopology, ActiveMQServerControl.class, throwable -> null); + } + + public static Map> decodeNetworkTopologyJson(String networkTopologyJson) { + if (networkTopologyJson == null || networkTopologyJson.isEmpty()) { + return Collections.emptyMap(); + } + try (JsonReader jsonReader = Json.createReader(new StringReader(networkTopologyJson))) { + final JsonArray nodeIDs = jsonReader.readArray(); + final int nodeCount = nodeIDs.size(); + Map> networkTopology = new HashMap<>(nodeCount); + for (int i = 0; i < nodeCount; i++) { + final JsonObject nodePair = nodeIDs.getJsonObject(i); + try { + final String nodeID = nodePair.getString("nodeID"); + final String live = nodePair.getString("live"); + final String backup = nodePair.getString("backup", null); + networkTopology.put(nodeID, new Pair<>(live, backup)); + } catch (Exception e) { + LOGGER.warnf(e, "Error on %s", nodePair); + } + } + return networkTopology; + } + } + + private static long countMembers(Map> networkTopology) { + final long count = networkTopology.values().stream() + .map(Pair::getA).filter(live -> live != null && !live.isEmpty()) + .count(); + return count; + } + + private static long countNodes(Map> networkTopology) { + final long count = networkTopology.values().stream() + .flatMap(pair -> Stream.of(pair.getA(), pair.getB())) + .filter(liveOrBackup -> liveOrBackup != null && !liveOrBackup.isEmpty()) + .count(); + return count; + } + + public static boolean validateNetworkTopology(String networkTopologyJson, + Predicate>> checkTopology) { + final Map> networkTopology = decodeNetworkTopologyJson(networkTopologyJson); + return checkTopology.test(networkTopology); + } + + public static String backupOf(String nodeID, Map> networkTopology) { + return networkTopology.get(nodeID).getB(); + } + + public static String liveOf(String nodeID, Map> networkTopology) { + return networkTopology.get(nodeID).getA(); + } + + public static Predicate>> containsExactNodeIds(String... nodeID) { + Objects.requireNonNull(nodeID); + return topology -> topology.size() == nodeID.length && Stream.of(nodeID).allMatch(topology::containsKey); + } + + public static Predicate>> withMembers(int count) { + return topology -> countMembers(topology) == count; + } + + public static Predicate>> withNodes(int count) { + return topology -> countNodes(topology) == count; + } + + public static Predicate>> withBackup(String nodeId, Predicate compare) { + return topology -> compare.test(backupOf(nodeId, topology)); + } + + public static Predicate>> withLive(String nodeId, Predicate compare) { + return topology -> compare.test(liveOf(nodeId, topology)); + } +}