From f8e8084ae1e7190bb8cfa88f5f7a1b8add92eaff Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 13:30:32 +0430 Subject: [PATCH 01/38] HDDS-3249: renew ContainerCache.INSTANCE in order to test it in a fresh state --- .../container/common/utils/ContainerCache.java | 18 ++++++++++++++++-- .../common/{ => utils}/TestContainerCache.java | 6 ++---- 2 files changed, 18 insertions(+), 6 deletions(-) rename hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/{ => utils}/TestContainerCache.java (94%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 4ddb4e48792..9008cb72e4d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.container.common.utils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.collections.MapIterator; import org.apache.commons.collections.map.LRUMap; @@ -58,9 +59,22 @@ private ContainerCache(int maxSize, float loadFactor, boolean * @return A instance of {@link ContainerCache}. */ public synchronized static ContainerCache getInstance(Configuration conf) { - if (cache == null) { + return getInstance(conf, false); + } + + /** + * Return a singleton instance of {@link ContainerCache} + * that holds the DB handlers. and recreate it if renew is true + * This method is useful for testing because we need a fresh new instance for each test + * + * @param conf - Configuration. + * @return A instance of {@link ContainerCache}. + */ + @VisibleForTesting + synchronized static ContainerCache getInstance(Configuration conf, boolean renew) { + if (renew || cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, - OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); + OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); } return cache; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java similarity index 94% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index b6584d17017..9c20c0d454b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -16,13 +16,11 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.container.common; +package org.apache.hadoop.ozone.container.common.utils; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; -import org.apache.hadoop.ozone.container.common.utils.ReferenceCountedDB; import org.apache.hadoop.hdds.utils.MetadataStore; import org.apache.hadoop.hdds.utils.MetadataStoreBuilder; import org.junit.Assert; @@ -62,7 +60,7 @@ public void testContainerCacheEviction() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); - ContainerCache cache = ContainerCache.getInstance(conf); + ContainerCache cache = ContainerCache.getInstance(conf, true); File containerDir1 = new File(root, "cont1"); File containerDir2 = new File(root, "cont2"); File containerDir3 = new File(root, "cont3"); From 49599c7abf497d7252302222a01a6975b48edae7 Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 13:40:44 +0430 Subject: [PATCH 02/38] correct style --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 9008cb72e4d..73b22efd9e1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -65,13 +65,15 @@ public synchronized static ContainerCache getInstance(Configuration conf) { /** * Return a singleton instance of {@link ContainerCache} * that holds the DB handlers. and recreate it if renew is true - * This method is useful for testing because we need a fresh new instance for each test + * This method is useful for testing because we need a fresh + * new instance for each test * * @param conf - Configuration. * @return A instance of {@link ContainerCache}. */ @VisibleForTesting - synchronized static ContainerCache getInstance(Configuration conf, boolean renew) { + synchronized static ContainerCache getInstance(Configuration conf, + boolean renew) { if (renew || cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); From c64d86f57e38ab19be58b3bc51f8bd1724b65746 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 23 Mar 2020 04:11:07 -0700 Subject: [PATCH 03/38] HDDS-3234. Fix retry interval default in Ozone client. (#698) --- .../src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index c88169893f3..fefcfca08aa 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -117,7 +117,7 @@ public final class ScmConfigKeys { "dfs.ratis.client.request.retry.interval"; public static final TimeDuration DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT = - TimeDuration.valueOf(1000, TimeUnit.MILLISECONDS); + TimeDuration.valueOf(15000, TimeUnit.MILLISECONDS); public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY = "dfs.ratis.server.retry-cache.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 69f62ad9374..ad400d766af 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -252,7 +252,7 @@ dfs.ratis.client.request.retry.interval - 1000ms + 15000ms OZONE, RATIS, MANAGEMENT Interval between successive retries for a ratis client request. From f2e259b8f7e958819df35ed1df5295c47bf7e392 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 23 Mar 2020 16:54:47 +0530 Subject: [PATCH 04/38] HDDS-3235.Change to default of max retry count for Ozone client (#699) --- .../src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 2fa9ff91f6f..c71e0d66e52 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -143,7 +143,7 @@ public final class OzoneConfigKeys { public static final String OZONE_CLIENT_MAX_RETRIES = "ozone.client.max.retries"; - public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 100; + public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 5; public static final String OZONE_CLIENT_RETRY_INTERVAL = "ozone.client.retry.interval"; public static final TimeDuration OZONE_CLIENT_RETRY_INTERVAL_DEFAULT = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ad400d766af..ca107c0e869 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -412,7 +412,7 @@ ozone.client.max.retries - 100 + 5 OZONE, CLIENT Maximum number of retries by Ozone Client on encountering exception while writing a key. From db89958e4c926d88e4083b208cc59f9026c7eee0 Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 23:58:50 +0430 Subject: [PATCH 05/38] cleanup ContainerCache after TestBlockDeletingService and TestContainerPersistence also set defaultCache to null on BlockUtils.shutdownCache --- .../common/utils/ContainerCache.java | 22 +++++-------------- .../keyvalue/helpers/BlockUtils.java | 1 + .../common/TestBlockDeletingService.java | 2 ++ .../common/impl/TestContainerPersistence.java | 7 ++++++ .../common/utils/TestContainerCache.java | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 73b22efd9e1..5950e0b4824 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -59,22 +59,7 @@ private ContainerCache(int maxSize, float loadFactor, boolean * @return A instance of {@link ContainerCache}. */ public synchronized static ContainerCache getInstance(Configuration conf) { - return getInstance(conf, false); - } - - /** - * Return a singleton instance of {@link ContainerCache} - * that holds the DB handlers. and recreate it if renew is true - * This method is useful for testing because we need a fresh - * new instance for each test - * - * @param conf - Configuration. - * @return A instance of {@link ContainerCache}. - */ - @VisibleForTesting - synchronized static ContainerCache getInstance(Configuration conf, - boolean renew) { - if (renew || cache == null) { + if (cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); @@ -82,6 +67,10 @@ synchronized static ContainerCache getInstance(Configuration conf, return cache; } + public static void clearDefaultCache() { + cache = null; + } + /** * Closes all the db instances and resets the cache. */ @@ -98,6 +87,7 @@ public void shutdownCache() { } // reset the cache cache.clear(); + cache = null; } finally { lock.unlock(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index 35e0b0c15a7..d5e519db3d3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -93,6 +93,7 @@ public static void removeDB(KeyValueContainerData container, Configuration */ public static void shutdownCache(ContainerCache cache) { cache.shutdownCache(); + ContainerCache.clearDefaultCache(); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index c1f4d9f6dd3..415c49f8082 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -35,6 +35,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Handler; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; @@ -96,6 +97,7 @@ public static void init() throws IOException { @AfterClass public static void cleanup() throws IOException { FileUtils.deleteDirectory(testRoot); + BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java index 1b0f70f4e13..77b055c495a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java @@ -20,6 +20,7 @@ import com.google.common.collect.Maps; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -38,6 +39,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; @@ -150,6 +152,11 @@ public void cleanupDir() throws IOException { } } + @After + public void cleanupBlockUtil() { + BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); + } + private long getTestContainerID() { return ContainerTestHelper.getTestContainerID(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index 9c20c0d454b..1d98be57138 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -60,7 +60,7 @@ public void testContainerCacheEviction() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); - ContainerCache cache = ContainerCache.getInstance(conf, true); + ContainerCache cache = ContainerCache.getInstance(conf); File containerDir1 = new File(root, "cont1"); File containerDir2 = new File(root, "cont2"); File containerDir3 = new File(root, "cont3"); From f259a649b225180bf9c9cf06115b9de705500ed6 Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 00:40:33 +0430 Subject: [PATCH 06/38] remove unused import --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 5950e0b4824..242d296daed 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.container.common.utils; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.collections.MapIterator; import org.apache.commons.collections.map.LRUMap; From 081b9334bfb5f994d3be4a1a0735be3e8894131a Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 00:53:44 +0430 Subject: [PATCH 07/38] remove setting default instance to null in shutdown method --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 242d296daed..60a72a2f568 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -86,7 +86,6 @@ public void shutdownCache() { } // reset the cache cache.clear(); - cache = null; } finally { lock.unlock(); } From ad7cd6a0815d346227511847c3e5bb36176e11b3 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Singh Date: Tue, 24 Mar 2020 14:38:58 +0530 Subject: [PATCH 08/38] HDDS-2878. Refactor MiniOzoneLoadGenerator to add more load generators to chaos testing. (#438) --- .../hadoop/ozone/MiniOzoneChaosCluster.java | 6 + .../hadoop/ozone/MiniOzoneLoadGenerator.java | 265 ++++-------------- .../ozone/TestMiniChaosOzoneCluster.java | 31 +- .../loadgenerators/AgedLoadGenerator.java | 98 +++++++ .../ozone/loadgenerators/DataBuffer.java | 53 ++++ .../FilesystemLoadGenerator.java | 69 +++++ .../ozone/loadgenerators/LoadExecutors.java | 101 +++++++ .../ozone/loadgenerators/LoadGenerator.java | 37 +++ .../loadgenerators/RandomLoadGenerator.java | 68 +++++ .../apache/hadoop/ozone/utils/LoadBucket.java | 23 +- .../src/test/resources/log4j.properties | 1 + 11 files changed, 509 insertions(+), 243 deletions(-) create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 67923cc1571..22cb3b4dc14 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -242,6 +242,12 @@ protected void initializeConfiguration() throws IOException { 1, TimeUnit.SECONDS); conf.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 1, TimeUnit.SECONDS); + conf.setInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY, + 4); + conf.setInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_KEY, + 2); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); conf.setInt("hdds.scm.replication.thread.interval", 10 * 1000); conf.setInt("hdds.scm.replication.event.timeout", 20 * 1000); diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java index 521f172ae47..d1256b1670b 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java @@ -17,26 +17,23 @@ */ package org.apache.hadoop.ozone; -import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.conf.StorageUnit; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.loadgenerators.FilesystemLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.AgedLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.RandomLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.DataBuffer; +import org.apache.hadoop.ozone.loadgenerators.LoadExecutors; +import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; import org.apache.hadoop.ozone.utils.LoadBucket; -import org.apache.hadoop.ozone.utils.TestProbability; -import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; -import java.util.Optional; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.CompletableFuture; +import java.util.function.Function; import java.util.concurrent.TimeUnit; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; /** * A Simple Load generator for testing. @@ -46,213 +43,59 @@ public class MiniOzoneLoadGenerator { private static final Logger LOG = LoggerFactory.getLogger(MiniOzoneLoadGenerator.class); - private static String keyNameDelimiter = "_"; - - private ThreadPoolExecutor writeExecutor; - private int numThreads; - // number of buffer to be allocated, each is allocated with length which - // is multiple of 2, each buffer is populated with random data. - private int numBuffers; - private List buffers; - - private AtomicBoolean isIOThreadRunning; - - private final List ozoneBuckets; - - private final AtomicInteger agedFileWrittenIndex; - private final ExecutorService agedFileExecutor; - private final LoadBucket agedLoadBucket; - private final TestProbability agedWriteProbability; - - private final ThreadPoolExecutor fsExecutor; - private final LoadBucket fsBucket; - - MiniOzoneLoadGenerator(List bucket, - LoadBucket agedLoadBucket, LoadBucket fsBucket, - int numThreads, int numBuffers) { - this.ozoneBuckets = bucket; - this.numThreads = numThreads; - this.numBuffers = numBuffers; - this.writeExecutor = createExecutor(); - - this.agedFileWrittenIndex = new AtomicInteger(0); - this.agedFileExecutor = Executors.newSingleThreadExecutor(); - this.agedLoadBucket = agedLoadBucket; - this.agedWriteProbability = TestProbability.valueOf(10); - - this.fsExecutor = createExecutor(); - this.fsBucket = fsBucket; - - this.isIOThreadRunning = new AtomicBoolean(false); - - // allocate buffers and populate random data. - buffers = new ArrayList<>(); - for (int i = 0; i < numBuffers; i++) { - int size = (int) StorageUnit.KB.toBytes(1 << i); - ByteBuffer buffer = ByteBuffer.allocate(size); - buffer.put(RandomUtils.nextBytes(size)); - buffers.add(buffer); - } - } - - private ThreadPoolExecutor createExecutor() { - ThreadPoolExecutor executor = new ThreadPoolExecutor(numThreads, numThreads, - 100, TimeUnit.SECONDS, new ArrayBlockingQueue<>(1024), - new ThreadPoolExecutor.CallerRunsPolicy()); - executor.prestartAllCoreThreads(); - return executor; - - } - - // Start IO load on an Ozone bucket. - private void load(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("Started Mixed IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { - LoadBucket bucket = - ozoneBuckets.get((int) (Math.random() * ozoneBuckets.size())); - try { - int index = RandomUtils.nextInt(); - ByteBuffer buffer = getBuffer(index); - String keyName = getKeyName(index, threadName); - bucket.writeKey(buffer, keyName); - - bucket.readKey(buffer, keyName); - - bucket.deleteKey(keyName); - } catch (Exception e) { - LOG.error("LOADGEN: Exiting due to exception", e); - break; - } + private final List loadExecutors; + + private final OzoneVolume volume; + private final OzoneConfiguration conf; + + MiniOzoneLoadGenerator(OzoneVolume volume, int numClients, int numThreads, + int numBuffers, OzoneConfiguration conf) + throws Exception { + DataBuffer buffer = new DataBuffer(numBuffers); + loadExecutors = new ArrayList<>(); + this.volume = volume; + this.conf = conf; + + // Random Load + String mixBucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); + volume.createBucket(mixBucketName); + List ozoneBuckets = new ArrayList<>(numClients); + for (int i = 0; i < numClients; i++) { + ozoneBuckets.add(new LoadBucket(volume.getBucket(mixBucketName), + conf)); } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); - } - - private Optional randomKeyToRead() { - int currentIndex = agedFileWrittenIndex.get(); - return currentIndex != 0 - ? Optional.of(RandomUtils.nextInt(0, currentIndex)) - : Optional.empty(); - } - - private void startAgedLoad(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("AGED LOADGEN: Started Aged IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { + RandomLoadGenerator loadGenerator = + new RandomLoadGenerator(buffer, ozoneBuckets); + loadExecutors.add(new LoadExecutors(numThreads, loadGenerator)); - String keyName = null; - try { - if (agedWriteProbability.isTrue()) { - int index = agedFileWrittenIndex.getAndIncrement(); - ByteBuffer buffer = getBuffer(index); - keyName = getKeyName(index, threadName); + // Aged Load + addLoads(numThreads, + bucket -> new AgedLoadGenerator(buffer, bucket)); - agedLoadBucket.writeKey(buffer, keyName); - } else { - Optional index = randomKeyToRead(); - if (index.isPresent()) { - ByteBuffer buffer = getBuffer(index.get()); - keyName = getKeyName(index.get(), threadName); - agedLoadBucket.readKey(buffer, keyName); - } - } - } catch (Throwable t) { - LOG.error("AGED LOADGEN: {} Exiting due to exception", keyName, t); - break; - } - } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); + //Filesystem Load + addLoads(numThreads, + bucket -> new FilesystemLoadGenerator(buffer, bucket)); } - // Start IO load on an Ozone bucket. - private void startFsLoad(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("Started Filesystem IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { - try { - int index = RandomUtils.nextInt(); - ByteBuffer buffer = getBuffer(index); - String keyName = getKeyName(index, threadName); - fsBucket.writeKey(true, buffer, keyName); - - fsBucket.readKey(true, buffer, keyName); - - fsBucket.deleteKey(true, keyName); - } catch (Exception e) { - LOG.error("LOADGEN: Exiting due to exception", e); - break; - } - } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); + private void addLoads(int numThreads, + Function function) + throws Exception { + String bucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); + volume.createBucket(bucketName); + LoadBucket bucket = new LoadBucket(volume.getBucket(bucketName), conf); + LoadGenerator loadGenerator = function.apply(bucket); + loadExecutors.add(new LoadExecutors(numThreads, loadGenerator)); } void startIO(long time, TimeUnit timeUnit) { - List> writeFutures = new ArrayList<>(); - LOG.info("Starting MiniOzoneLoadGenerator for time {}:{} with {} buffers " + - "and {} threads", time, timeUnit, numBuffers, numThreads); - if (isIOThreadRunning.compareAndSet(false, true)) { - // Start the IO thread - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> load(timeUnit.toMillis(time)), - writeExecutor)); - } - - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> startAgedLoad( - timeUnit.toMillis(time)), agedFileExecutor)); - } - - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> startFsLoad( - timeUnit.toMillis(time)), fsExecutor)); - } - - // Wait for IO to complete - for (CompletableFuture f : writeFutures) { - try { - f.get(); - } catch (Throwable t) { - LOG.error("startIO failed with exception", t); - } - } - } - } - - public void shutdownLoadGenerator() { - try { - writeExecutor.shutdown(); - writeExecutor.awaitTermination(1, TimeUnit.DAYS); - } catch (Exception e) { - LOG.error("error while closing ", e); - } - } - - private ByteBuffer getBuffer(int keyIndex) { - return buffers.get(keyIndex % numBuffers); + LOG.info("Starting MiniOzoneLoadGenerator for time {}:{}", time, timeUnit); + long runTime = timeUnit.toMillis(time); + // start and wait for executors to finish + loadExecutors.forEach(le -> le.startLoad(runTime)); + loadExecutors.forEach(LoadExecutors::waitForCompletion); } - private String getKeyName(int keyIndex, String threadName) { - return threadName + keyNameDelimiter + keyIndex; + void shutdownLoadGenerator() { + loadExecutors.forEach(LoadExecutors::shutdown); } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 5d20a15f3bf..0fa9a149a85 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -20,7 +20,6 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.client.ObjectStore; -import org.apache.hadoop.ozone.utils.LoadBucket; import org.apache.hadoop.ozone.client.OzoneVolume; import org.junit.BeforeClass; import org.junit.AfterClass; @@ -30,8 +29,6 @@ import picocli.CommandLine.Option; import picocli.CommandLine; -import java.util.ArrayList; -import java.util.List; import java.util.concurrent.TimeUnit; /** @@ -77,35 +74,13 @@ public static void init() throws Exception { cluster.waitForClusterToBeReady(); String volumeName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); - String bucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); ObjectStore store = cluster.getRpcClient().getObjectStore(); store.createVolume(volumeName); OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - List ozoneBuckets = new ArrayList<>(numClients); - for (int i = 0; i < numClients; i++) { - ozoneBuckets.add(new LoadBucket(volume.getBucket(bucketName), - configuration)); - } - - String agedBucketName = - RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - volume.createBucket(agedBucketName); - LoadBucket agedLoadBucket = - new LoadBucket(volume.getBucket(agedBucketName), configuration); - - String fsBucketName = - RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - volume.createBucket(fsBucketName); - LoadBucket fsBucket = - new LoadBucket(volume.getBucket(fsBucketName), configuration); - loadGenerator = - new MiniOzoneLoadGenerator(ozoneBuckets, agedLoadBucket, fsBucket, - numThreads, numBuffers); + new MiniOzoneLoadGenerator(volume, numClients, numThreads, + numBuffers, configuration); } /** @@ -140,6 +115,6 @@ public static void main(String... args) { @Test public void testReadWriteWithChaosCluster() { cluster.startChaos(5, 10, TimeUnit.SECONDS); - loadGenerator.startIO(1, TimeUnit.MINUTES); + loadGenerator.startIO(120, TimeUnit.SECONDS); } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java new file mode 100644 index 00000000000..766343d6899 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.apache.hadoop.ozone.utils.TestProbability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Aged Load Generator for Ozone. + * + * This Load Generator reads and write key to an Ozone bucket. + * + * The default writes to read ratio is 10:90. + */ +public class AgedLoadGenerator extends LoadGenerator { + + private static final Logger LOG = + LoggerFactory.getLogger(AgedLoadGenerator.class); + private static String agedSuffix = "aged"; + + private final AtomicInteger agedFileWrittenIndex; + private final AtomicInteger agedFileAllocationIndex; + private final LoadBucket agedLoadBucket; + private final TestProbability agedWriteProbability; + private final DataBuffer dataBuffer; + + public AgedLoadGenerator(DataBuffer data, LoadBucket agedLoadBucket) { + this.dataBuffer = data; + this.agedFileWrittenIndex = new AtomicInteger(0); + this.agedFileAllocationIndex = new AtomicInteger(0); + this.agedLoadBucket = agedLoadBucket; + this.agedWriteProbability = TestProbability.valueOf(10); + } + + @Override + public String generateLoad() throws Exception { + if (agedWriteProbability.isTrue()) { + synchronized (agedFileAllocationIndex) { + int index = agedFileAllocationIndex.getAndIncrement(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, agedSuffix); + agedLoadBucket.writeKey(buffer, keyName); + agedFileWrittenIndex.getAndIncrement(); + return keyName; + } + } else { + Optional index = randomKeyToRead(); + if (index.isPresent()) { + ByteBuffer buffer = dataBuffer.getBuffer(index.get()); + String keyName = getKeyName(index.get(), agedSuffix); + agedLoadBucket.readKey(buffer, keyName); + return keyName; + } else { + return "NoKey"; + } + } + } + + private Optional randomKeyToRead() { + int currentIndex = agedFileWrittenIndex.get(); + return currentIndex != 0 + ? Optional.of(RandomUtils.nextInt(0, currentIndex)) + : Optional.empty(); + } + + @Override + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "Aged"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java new file mode 100644 index 00000000000..43126eec92e --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.conf.StorageUnit; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +/** + * List of buffers used by the load generators. + */ +public class DataBuffer { + private List buffers; + // number of buffer to be allocated, each is allocated with length which + // is multiple of 2, each buffer is populated with random data. + private int numBuffers; + + public DataBuffer(int numBuffers) { + // allocate buffers and populate random data. + this.numBuffers = numBuffers; + this.buffers = new ArrayList<>(); + for (int i = 0; i < numBuffers; i++) { + int size = (int) StorageUnit.KB.toBytes(1 << i); + ByteBuffer buffer = ByteBuffer.allocate(size); + buffer.put(RandomUtils.nextBytes(size)); + this.buffers.add(buffer); + } + } + + public ByteBuffer getBuffer(int keyIndex) { + return buffers.get(keyIndex % numBuffers); + } + +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java new file mode 100644 index 00000000000..557c73bd5df --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; + +/** + * Filesystem load generator for Ozone. + * + * This load generator read, writes and deletes data using the filesystem + * apis. + */ +public class FilesystemLoadGenerator extends LoadGenerator { + private static final Logger LOG = + LoggerFactory.getLogger(FilesystemLoadGenerator.class); + + + private final LoadBucket fsBucket; + private final DataBuffer dataBuffer; + + public FilesystemLoadGenerator(DataBuffer dataBuffer, LoadBucket fsBucket) { + this.dataBuffer = dataBuffer; + this.fsBucket = fsBucket; + } + + @Override + public String generateLoad() throws Exception { + int index = RandomUtils.nextInt(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, name()); + fsBucket.writeKey(true, buffer, keyName); + + fsBucket.readKey(true, buffer, keyName); + + fsBucket.deleteKey(true, keyName); + return keyName; + } + + @Override + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "FileSystem"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java new file mode 100644 index 00000000000..5e34fb45590 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Load executors for Ozone, this class provides a plugable + * executor for different load generators. + */ +public class LoadExecutors { + private static final Logger LOG = + LoggerFactory.getLogger(LoadExecutors.class); + + private final LoadGenerator generator; + private final int numThreads; + private final ExecutorService executor; + private final List> futures = new ArrayList<>(); + + public LoadExecutors(int numThreads, LoadGenerator generator) { + this.numThreads = numThreads; + this.generator = generator; + this.executor = Executors.newFixedThreadPool(numThreads); + } + + private void load(long runTimeMillis) { + long threadID = Thread.currentThread().getId(); + LOG.info("{} LOADGEN: Started Aged IO Thread:{}.", + generator.name(), threadID); + long startTime = Time.monotonicNow(); + + while (Time.monotonicNow() - startTime < runTimeMillis) { + + String keyName = null; + try { + keyName = generator.generateLoad(); + } catch (Throwable t) { + LOG.error("{} LOADGEN: {} Exiting due to exception", + generator.name(), keyName, t); + ExitUtil.terminate(new ExitUtil.ExitException(1, t)); + break; + } + } + } + + + public void startLoad(long time) { + LOG.info("Starting {} threads for {}", numThreads, generator.name()); + generator.initialize(); + for (int i = 0; i < numThreads; i++) { + futures.add(CompletableFuture.runAsync( + () -> load(time), executor)); + } + } + + public void waitForCompletion() { + // Wait for IO to complete + for (CompletableFuture f : futures) { + try { + f.get(); + } catch (Throwable t) { + LOG.error("startIO failed with exception", t); + } + } + } + + public void shutdown() { + try { + executor.shutdown(); + executor.awaitTermination(1, TimeUnit.DAYS); + } catch (Exception e) { + LOG.error("error while closing ", e); + } + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java new file mode 100644 index 00000000000..014a46f40a3 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +/** + * Interface for load generator. + */ +public abstract class LoadGenerator { + + private final String keyNameDelimiter = "_"; + + public abstract void initialize(); + + public abstract String generateLoad() throws Exception; + + public abstract String name(); + + String getKeyName(int keyIndex, String prefix) { + return prefix + keyNameDelimiter + keyIndex; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java new file mode 100644 index 00000000000..a9fc41c2fcb --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.List; + +/** + * Random load generator which writes, read and deletes keys from + * the bucket. + */ +public class RandomLoadGenerator extends LoadGenerator { + private static final Logger LOG = + LoggerFactory.getLogger(RandomLoadGenerator.class); + + private final List ozoneBuckets; + private final DataBuffer dataBuffer; + + public RandomLoadGenerator(DataBuffer dataBuffer, List buckets) { + this.ozoneBuckets = buckets; + this.dataBuffer = dataBuffer; + } + + @Override + public String generateLoad() throws Exception { + LoadBucket bucket = + ozoneBuckets.get((int) (Math.random() * ozoneBuckets.size())); + int index = RandomUtils.nextInt(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, name()); + bucket.writeKey(buffer, keyName); + + bucket.readKey(buffer, keyName); + + bucket.deleteKey(keyName); + return keyName; + } + + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "Random"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java index 8b44fdc5786..2fb92d1885c 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java @@ -110,7 +110,7 @@ abstract class Op { } public void execute() throws Exception { - LOG.info("Going to {} key {}", this.opName, keyName); + LOG.info("Going to {}", this); try { if (fsOp) { Path p = new Path("/", keyName); @@ -119,9 +119,9 @@ public void execute() throws Exception { doBucketOp(keyName); } doPostOp(); - LOG.trace("Done: {} key {}", this.opName, keyName); + LOG.trace("Done: {}", this); } catch (Throwable t) { - LOG.error("Unable to {} key:{}", this.opName, keyName, t); + LOG.error("Unable to {}", this, t); throw t; } } @@ -132,7 +132,7 @@ public void execute() throws Exception { @Override public String toString() { - return "opType=" + opName; + return "opType=" + opName + " keyName=" + keyName; } } @@ -167,6 +167,11 @@ void doPostOp() throws IOException { os.close(); } } + + @Override + public String toString() { + return super.toString() + " buffer:" + buffer.limit(); + } } /** @@ -213,6 +218,11 @@ void doPostOp() throws IOException { is.close(); } } + + @Override + public String toString() { + return super.toString() + " buffer:" + buffer.limit(); + } } /** @@ -237,5 +247,10 @@ void doBucketOp(String key) throws IOException { void doPostOp() { // Nothing to do here } + + @Override + public String toString() { + return super.toString(); + } } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties index a7684a5c317..11c6bf61431 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties @@ -24,6 +24,7 @@ log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR log4j.logger.org.apache.ratis.grpc.client.GrpcClientProtocolClient=WARN log4j.logger.org.apache.hadoop.ozone.utils=DEBUG,stdout,CHAOS +log4j.logger.org.apache.hadoop.ozone.loadgenerator=DEBUG,stdout,CHAOS log4j.appender.CHAOS.File=${chaoslogfilename} log4j.appender.CHAOS=org.apache.log4j.FileAppender log4j.appender.CHAOS.layout=org.apache.log4j.PatternLayout From cee9c6c0742f19254645be73c3cdff3b7983d584 Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 16:39:10 +0430 Subject: [PATCH 09/38] remove cache.isFull assertion from TestContainerCache --- .../ozone/container/common/utils/ContainerCache.java | 6 +----- .../ozone/container/keyvalue/helpers/BlockUtils.java | 1 - .../ozone/container/common/TestBlockDeletingService.java | 2 -- .../container/common/impl/TestContainerPersistence.java | 7 ------- .../ozone/container/common/utils/TestContainerCache.java | 2 -- 5 files changed, 1 insertion(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 60a72a2f568..4ddb4e48792 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -60,16 +60,12 @@ private ContainerCache(int maxSize, float loadFactor, boolean public synchronized static ContainerCache getInstance(Configuration conf) { if (cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, - OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); + OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); } return cache; } - public static void clearDefaultCache() { - cache = null; - } - /** * Closes all the db instances and resets the cache. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index d5e519db3d3..35e0b0c15a7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -93,7 +93,6 @@ public static void removeDB(KeyValueContainerData container, Configuration */ public static void shutdownCache(ContainerCache cache) { cache.shutdownCache(); - ContainerCache.clearDefaultCache(); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 415c49f8082..c1f4d9f6dd3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -35,7 +35,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Handler; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; @@ -97,7 +96,6 @@ public static void init() throws IOException { @AfterClass public static void cleanup() throws IOException { FileUtils.deleteDirectory(testRoot); - BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java index 77b055c495a..1b0f70f4e13 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java @@ -20,7 +20,6 @@ import com.google.common.collect.Maps; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -39,7 +38,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; @@ -152,11 +150,6 @@ public void cleanupDir() throws IOException { } } - @After - public void cleanupBlockUtil() { - BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); - } - private long getTestContainerID() { return ContainerTestHelper.getTestContainerID(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index 1d98be57138..0412536e660 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -91,8 +91,6 @@ public void testContainerCacheEviction() throws Exception { db3.close(); Assert.assertEquals(0, db3.getReferenceCount()); - Assert.assertTrue(cache.isFull()); - // add one more reference to ContainerCache and verify that it will not // evict the least recent entry as it has reference. ReferenceCountedDB db4 = cache.getDB(3, "RocksDB", From 512d607df157b2a3551bbbbaf6ddb4b0f7c20752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elek=20M=C3=A1rton?= Date: Tue, 24 Mar 2020 16:00:10 +0100 Subject: [PATCH 10/38] Revert "HDDS-3142. Create isolated enviornment for OM to test it without SCM. (#656)" This reverts commit 281faf3a2ee4cd624ece0ed05165c164d5b4774f. --- dev-support/byteman/mock-scm.btm | 34 ------ .../hdds/freon/FakeClusterTopology.java | 92 ---------------- .../FakeScmBlockLocationProtocolClient.java | 100 ------------------ ...akeScmContainerLocationProtocolClient.java | 76 ------------- .../hadoop/hdds/freon/package-info.java | 24 ----- ...ocationProtocolClientSideTranslatorPB.java | 7 +- .../hadoop/hdds/utils/db/DBProfile.java | 1 + 7 files changed, 2 insertions(+), 332 deletions(-) delete mode 100644 dev-support/byteman/mock-scm.btm delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java diff --git a/dev-support/byteman/mock-scm.btm b/dev-support/byteman/mock-scm.btm deleted file mode 100644 index a291e28f215..00000000000 --- a/dev-support/byteman/mock-scm.btm +++ /dev/null @@ -1,34 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -RULE mock scm block client -CLASS org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB -METHOD submitRequest -AT ENTRY -BIND client:org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB = $0; - result:org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$SCMBlockLocationResponse = org.apache.hadoop.hdds.freon.FakeScmBlockLocationProtocolClient.submitRequest($1); -IF true -DO return result; -ENDRULE - -RULE mock scm container client -CLASS org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB -METHOD submitRpcRequest -AT ENTRY -BIND client:org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB = $0; - result:org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos$ScmContainerLocationResponse = org.apache.hadoop.hdds.freon.FakeScmContainerLocationProtocolClient.submitRequest($1); -IF true -DO return result; -ENDRULE \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java deleted file mode 100644 index 347323a5f95..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.UUID; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Pipeline; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Port; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.hdds.scm.pipeline.PipelineID; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Class to store pre-generated topology information for load-tests. - */ -public class FakeClusterTopology { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeClusterTopology.class); - - public static final FakeClusterTopology INSTANCE = new FakeClusterTopology(); - - private List datanodes = new ArrayList<>(); - - private List pipelines = new ArrayList<>(); - - private Random random = new Random(); - - public FakeClusterTopology() { - try { - for (int i = 0; i < 9; i++) { - datanodes.add(createDatanode(i)); - if ((i + 1) % 3 == 0) { - pipelines.add(Pipeline.newBuilder() - .setId(PipelineID.randomId().getProtobuf()) - .setFactor(ReplicationFactor.THREE) - .setType(ReplicationType.RATIS) - .addMembers(getDatanode(i - 2)) - .addMembers(getDatanode(i - 1)) - .addMembers(getDatanode(i)) - .build()); - } - } - } catch (Exception ex) { - LOGGER.error("Can't initialize FakeClusterTopology", ex); - } - } - - private DatanodeDetailsProto createDatanode(int index) { - return DatanodeDetailsProto.newBuilder() - .setUuid(UUID.randomUUID().toString()) - .setHostName("localhost") - .setIpAddress("127.0.0.1") - .addPorts( - Port.newBuilder().setName("RATIS").setValue(1234)) - .build(); - } - - public DatanodeDetailsProto getDatanode(int i) { - return datanodes.get(i); - } - - public Pipeline getRandomPipeline() { - return pipelines.get(random.nextInt(pipelines.size())); - } - - public List getAllDatanodes() { - return datanodes; - } -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java deleted file mode 100644 index f1e7e0f8893..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.io.IOException; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerBlockID; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.GetScmInfoResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto.Builder; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationRequest; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Status; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Type; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Fake SCM client to return a simulated block location. - */ -public final class FakeScmBlockLocationProtocolClient { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeScmBlockLocationProtocolClient.class); - - public static final int BLOCK_PER_CONTAINER = 1000; - - private static AtomicLong counter = new AtomicLong(); - - private FakeScmBlockLocationProtocolClient() { - } - - public static SCMBlockLocationResponse submitRequest( - SCMBlockLocationRequest req) - throws IOException { - try { - if (req.getCmdType() == Type.GetScmInfo) { - return SCMBlockLocationResponse.newBuilder() - .setCmdType(req.getCmdType()) - .setStatus(Status.OK) - .setSuccess(true) - .setGetScmInfoResponse( - GetScmInfoResponseProto.newBuilder() - .setScmId("scm-id") - .setClusterId("cluster-id") - .build() - ) - .build(); - } else if (req.getCmdType() == Type.AllocateScmBlock) { - Builder allocateBlockResponse = - AllocateScmBlockResponseProto.newBuilder(); - for (int i = 0; - i < req.getAllocateScmBlockRequest().getNumBlocks(); i++) { - long seq = counter.incrementAndGet(); - - allocateBlockResponse.addBlocks(AllocateBlockResponse.newBuilder() - .setPipeline(FakeClusterTopology.INSTANCE.getRandomPipeline()) - .setContainerBlockID(ContainerBlockID.newBuilder() - .setContainerID(seq / BLOCK_PER_CONTAINER) - .setLocalID(seq)) - ); - } - return SCMBlockLocationResponse.newBuilder() - .setCmdType(req.getCmdType()) - .setStatus(Status.OK) - .setSuccess(true) - .setAllocateScmBlockResponse( - allocateBlockResponse - ) - .build(); - } else { - throw new IllegalArgumentException( - "Unsupported request. Fake answer is not implemented for " + req - .getCmdType()); - } - } catch (Exception ex) { - LOGGER.error("Error on creating fake SCM response", ex); - return null; - } - } - -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java deleted file mode 100644 index 771f357d56f..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.io.IOException; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Node; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeQueryResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeQueryResponseProto.Builder; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse.Status; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Fake SCM client to return a simulated block location. - */ -public final class FakeScmContainerLocationProtocolClient { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeScmContainerLocationProtocolClient.class); - - private FakeScmContainerLocationProtocolClient() { - } - - public static ScmContainerLocationResponse submitRequest( - ScmContainerLocationRequest req) - throws IOException { - try { - if (req.getCmdType() == Type.QueryNode) { - Builder builder = NodeQueryResponseProto.newBuilder(); - for (DatanodeDetailsProto datanode : FakeClusterTopology.INSTANCE - .getAllDatanodes()) { - builder.addDatanodes(Node.newBuilder() - .setNodeID(datanode) - .addNodeStates(NodeState.HEALTHY) - .build()); - } - - return ScmContainerLocationResponse.newBuilder() - .setCmdType(Type.QueryNode) - .setStatus(Status.OK) - .setNodeQueryResponse(builder.build()) - .build(); - } else { - throw new IllegalArgumentException( - "Unsupported request. Fake answer is not implemented for " + req - .getCmdType()); - } - } catch (Exception ex) { - LOGGER.error("Error on creating fake SCM response", ex); - return null; - } - } - -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java deleted file mode 100644 index 381c81100e4..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *

- * Freon related helper classes used for load testing. - */ - -/** - * Freon related helper classes used for load testing. - */ -package org.apache.hadoop.hdds.freon; \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index dffae117de2..7582b4402c8 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -111,18 +111,13 @@ private ScmContainerLocationResponse submitRequest( builderConsumer.accept(builder); ScmContainerLocationRequest wrapper = builder.build(); - response = submitRpcRequest(wrapper); + response = rpcProxy.submitRequest(NULL_RPC_CONTROLLER, wrapper); } catch (ServiceException ex) { throw ProtobufHelper.getRemoteException(ex); } return response; } - private ScmContainerLocationResponse submitRpcRequest( - ScmContainerLocationRequest wrapper) throws ServiceException { - return rpcProxy.submitRequest(NULL_RPC_CONTROLLER, wrapper); - } - /** * Asks SCM where a container should be allocated. SCM responds with the set * of datanodes that should be used creating this container. Ozone/SCM only diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java index b9b7ef76fc8..347b83b48cf 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java @@ -65,6 +65,7 @@ public ColumnFamilyOptions getColumnFamilyOptions() { new BlockBasedTableConfig() .setBlockCache(new LRUCache(blockCacheSize)) .setBlockSize(blockSize) + .setCacheIndexAndFilterBlocks(true) .setPinL0FilterAndIndexBlocksInCache(true) .setFilterPolicy(new BloomFilter())); } From 824938534891f90dc6676ec3064485e1a401e654 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Singh Date: Wed, 25 Mar 2020 15:43:36 +0530 Subject: [PATCH 11/38] HDDS-3250. Create a separate log file for Warnings and Errors in MiniOzoneChaosCluster. (#711) --- .../mini-chaos-tests/src/test/bin/start-chaos.sh | 4 ++++ .../mini-chaos-tests/src/test/resources/log4j.properties | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh index 1546bbd490d..c02fa962223 100755 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh @@ -19,6 +19,7 @@ date=$(date +"%Y-%m-%d-%H-%M-%S-%Z") logfiledirectory="/tmp/chaos-${date}/" completesuffix="complete.log" chaossuffix="chaos.log" +problemsuffix="problem.log" compilesuffix="compile.log" heapformat="dump.hprof" @@ -30,6 +31,8 @@ chaosfilename="${logfiledirectory}${chaossuffix}" compilefilename="${logfiledirectory}${compilesuffix}" #log goes to something like /tmp/2019-12-04--00-01-26-IST/dump.hprof heapdumpfile="${logfiledirectory}${heapformat}" +#log goes to something like /tmp/2019-12-04--00-01-26-IST/problem.log +problemfilename="${logfiledirectory}${problemsuffix}" #TODO: add gc log file details as well MVN_OPTS="-XX:+HeapDumpOnOutOfMemoryError " @@ -46,6 +49,7 @@ mvn exec:java \ -Dexec.mainClass="org.apache.hadoop.ozone.TestMiniChaosOzoneCluster" \ -Dexec.classpathScope=test \ -Dchaoslogfilename=${chaosfilename} \ + -Dproblemlogfilename=${problemfilename} \ -Dorg.apache.ratis.thirdparty.io.netty.allocator.useCacheForAllThreads=false \ -Dio.netty.leakDetection.level=advanced \ -Dio.netty.leakDetectionLevel=advanced \ diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties index 11c6bf61431..f491fadeb5f 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties @@ -11,7 +11,7 @@ # limitations under the License. # log4j configuration used during build and unit tests -log4j.rootLogger=INFO,stdout +log4j.rootLogger=INFO,stdout,PROBLEM log4j.threshold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout @@ -29,4 +29,11 @@ log4j.appender.CHAOS.File=${chaoslogfilename} log4j.appender.CHAOS=org.apache.log4j.FileAppender log4j.appender.CHAOS.layout=org.apache.log4j.PatternLayout log4j.appender.CHAOS.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + +log4j.appender.PROBLEM.File=${problemlogfilename} +log4j.appender.PROBLEM.Threshold=WARN +log4j.appender.PROBLEM=org.apache.log4j.FileAppender +log4j.appender.PROBLEM.layout=org.apache.log4j.PatternLayout +log4j.appender.PROBLEM.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + log4j.additivity.org.apache.hadoop.ozone.utils=false \ No newline at end of file From f6be7660a52ac0e7ebfa3818989c30c0b9f977ed Mon Sep 17 00:00:00 2001 From: avijayanhwx <14299376+avijayanhwx@users.noreply.github.com> Date: Wed, 25 Mar 2020 09:20:25 -0700 Subject: [PATCH 12/38] HDDS-3243. Recon should not have the ability to send Create/Close Container commands to Datanode. (#712) --- .../ozone/recon/TestReconAsPassiveScm.java | 11 +++++++++ .../ozone/recon/scm/ReconNodeManager.java | 23 ++++++++++++++++++- .../ReconStorageContainerManagerFacade.java | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java index faf2c0aa302..eabf667e2d2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.RATIS; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CLOSE_CONTAINER; import static org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer.runTestOzoneContainerViaDataNode; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -41,7 +42,9 @@ import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.recon.scm.ReconNodeManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Assert; @@ -126,6 +129,14 @@ public void testDatanodeRegistrationAndReports() throws Exception { // Verify Recon picked up the new container that was created. assertEquals(scmContainerManager.getContainerIDs(), reconContainerManager.getContainerIDs()); + + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs(ReconNodeManager.LOG); + reconScm.getEventQueue().fireEvent(CLOSE_CONTAINER, + containerInfo.containerID()); + GenericTestUtils.waitFor(() -> logCapturer.getOutput() + .contains("Ignoring unsupported command closeContainerCommand"), + 1000, 20000); } @Test(timeout = 120000) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java index 2febf508571..9a3d5181f15 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.recon.scm; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.reregisterCommand; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_SCM_NODE_DB; @@ -27,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.node.SCMNodeManager; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; @@ -41,21 +44,26 @@ import org.apache.hadoop.hdds.utils.MetadataStore; import org.apache.hadoop.hdds.utils.MetadataStoreBuilder; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.ozone.recon.ReconUtils; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableSet; + /** * Recon SCM's Node manager that includes persistence. */ public class ReconNodeManager extends SCMNodeManager { - private static final Logger LOG = LoggerFactory + public static final Logger LOG = LoggerFactory .getLogger(ReconNodeManager.class); private final MetadataStore nodeStore; + private final static Set ALLOWED_COMMANDS = + ImmutableSet.of(reregisterCommand); /** * Map that contains mapping between datanodes @@ -132,6 +140,19 @@ public long getLastHeartbeat(DatanodeDetails datanodeDetails) { return datanodeHeartbeatMap.getOrDefault(datanodeDetails.getUuid(), 0L); } + @Override + public void onMessage(CommandForDatanode commandForDatanode, + EventPublisher ignored) { + if (ALLOWED_COMMANDS.contains( + commandForDatanode.getCommand().getType())) { + super.onMessage(commandForDatanode, ignored); + } else { + LOG.info("Ignoring unsupported command {} for Datanode {}.", + commandForDatanode.getCommand().getType(), + commandForDatanode.getDatanodeId()); + } + } + /** * Send heartbeat to indicate the datanode is alive and doing well. * diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 7964d6ee17f..9cb6a3193f0 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -129,6 +129,7 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, ContainerActionsHandler actionsHandler = new ContainerActionsHandler(); ReconNewNodeHandler newNodeHandler = new ReconNewNodeHandler(nodeManager); + eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, nodeManager); eventQueue.addHandler(SCMEvents.NODE_REPORT, nodeReportHandler); eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler); eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionHandler); From 4682babb6629f93d2d21103e0713fc74b17a0ff3 Mon Sep 17 00:00:00 2001 From: Vivek Ratnavel Subramanian Date: Wed, 25 Mar 2020 14:46:27 -0700 Subject: [PATCH 13/38] HDDS-3164. Add Recon endpoint to serve missing containers and its metadata. (#714) --- ...KeyService.java => ContainerEndpoint.java} | 54 +++++- .../api/types/MissingContainerMetadata.java | 76 ++++++++ .../api/types/MissingContainersResponse.java | 53 ++++++ .../recon/fsck/MissingContainerTask.java | 10 +- .../recon/spi/ContainerDBServiceProvider.java | 9 + .../impl/ContainerDBServiceProviderImpl.java | 13 +- .../webapps/recon/ozone-recon-web/api/db.json | 8 +- ...ervice.java => TestContainerEndpoint.java} | 173 +++++++++++++----- .../hadoop/ozone/recon/api/TestEndpoints.java | 34 +--- .../recon/fsck/TestMissingContainerTask.java | 36 ++-- .../types/GuiceInjectorUtilsForTests.java | 21 ++- 11 files changed, 389 insertions(+), 98 deletions(-) rename hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/{ContainerKeyService.java => ContainerEndpoint.java} (80%) create mode 100644 hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java create mode 100644 hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java rename hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/{TestContainerKeyService.java => TestContainerEndpoint.java} (67%) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java similarity index 80% rename from hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java rename to hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 17ae2b48102..b33db8dab89 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -20,9 +20,11 @@ import java.io.IOException; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; @@ -37,6 +39,9 @@ import javax.inject.Inject; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; @@ -46,7 +51,10 @@ import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeyMetadata.ContainerBlockMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; +import org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata; +import org.apache.hadoop.ozone.recon.api.types.MissingContainersResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconContainerManager; import org.apache.hadoop.ozone.recon.spi.ContainerDBServiceProvider; import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT; @@ -60,7 +68,7 @@ */ @Path("/containers") @Produces(MediaType.APPLICATION_JSON) -public class ContainerKeyService { +public class ContainerEndpoint { @Inject private ContainerDBServiceProvider containerDBServiceProvider; @@ -68,6 +76,14 @@ public class ContainerKeyService { @Inject private ReconOMMetadataManager omMetadataManager; + private ReconContainerManager containerManager; + + @Inject + public ContainerEndpoint(OzoneStorageContainerManager reconSCM) { + this.containerManager = + (ReconContainerManager) reconSCM.getContainerManager(); + } + /** * Return @{@link org.apache.hadoop.ozone.recon.api.types.ContainerMetadata} * for the containers starting from the given "prev-key" query param for the @@ -173,7 +189,6 @@ public Response getKeysForContainer( keyMetadata.getBlockIds().put(containerKeyPrefix.getKeyVersion(), blockIds); } - } } @@ -188,6 +203,41 @@ public Response getKeysForContainer( return Response.ok(keysResponse).build(); } + /** + * Return + * {@link org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata} + * for all missing containers. + * + * @return {@link Response} + */ + @GET + @Path("/missing") + public Response getMissingContainers() { + List missingContainers = new ArrayList<>(); + containerDBServiceProvider.getMissingContainers().forEach(container -> { + long containerID = container.getContainerId(); + try { + ContainerInfo containerInfo = + containerManager.getContainer(new ContainerID(containerID)); + long keyCount = containerInfo.getNumberOfKeys(); + UUID pipelineID = containerInfo.getPipelineID().getId(); + + // TODO: Find out which datanodes had replicas of this container + // and populate this list + List datanodes = Collections.emptyList(); + missingContainers.add(new MissingContainerMetadata(containerID, + container.getMissingSince(), keyCount, pipelineID, datanodes)); + } catch (IOException ioEx) { + throw new WebApplicationException(ioEx, + Response.Status.INTERNAL_SERVER_ERROR); + } + }); + MissingContainersResponse response = + new MissingContainersResponse(missingContainers.size(), + missingContainers); + return Response.ok(response).build(); + } + /** * Helper function to extract the blocks for a given container from a given * OM Key. diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java new file mode 100644 index 00000000000..f24bc5723ac --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.recon.api.types; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import java.util.List; +import java.util.UUID; + +/** + * Metadata object that represents a Missing Container. + */ +@XmlAccessorType(XmlAccessType.FIELD) +public class MissingContainerMetadata { + + @XmlElement(name = "containerID") + private long containerID; + + @XmlElement(name = "missingSince") + private long missingSince; + + @XmlElement(name = "keys") + private long keys; + + @XmlElement(name = "pipelineID") + private UUID pipelineID; + + @XmlElement(name = "datanodes") + private List datanodes; + + public MissingContainerMetadata(long containerID, long missingSince, + long keys, UUID pipelineID, + List datanodes) { + this.containerID = containerID; + this.missingSince = missingSince; + this.keys = keys; + this.pipelineID = pipelineID; + this.datanodes = datanodes; + } + + public long getContainerID() { + return containerID; + } + + public long getKeys() { + return keys; + } + + public List getDatanodes() { + return datanodes; + } + + public long getMissingSince() { + return missingSince; + } + + public UUID getPipelineID() { + return pipelineID; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java new file mode 100644 index 00000000000..dd8888c6fe4 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Collection; + +/** + * Class that represents the API Response structure of Missing Containers. + */ +public class MissingContainersResponse { + /** + * Total count of the missing containers. + */ + @JsonProperty("totalCount") + private long totalCount; + + /** + * A collection of missing containers. + */ + @JsonProperty("containers") + private Collection containers; + + public MissingContainersResponse(long totalCount, + Collection + containers) { + this.totalCount = totalCount; + this.containers = containers; + } + + public long getTotalCount() { + return totalCount; + } + + public Collection getContainers() { + return containers; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java index 60d601e1fe2..6db20259f3c 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java @@ -22,6 +22,7 @@ import javax.inject.Inject; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; @@ -83,8 +84,15 @@ private void processContainer(ContainerID containerID, long currentTime) { try { Set containerReplicas = containerManager.getContainerReplicas(containerID); - if (CollectionUtils.isEmpty(containerReplicas)) { + // check if a container has 0 replicas or if all available replicas + // are marked UNHEALTHY. + boolean isAllUnhealthy = + containerReplicas.stream().allMatch(replica -> + replica.getState().equals(State.UNHEALTHY)); + if (CollectionUtils.isEmpty(containerReplicas) || isAllUnhealthy) { if (!missingContainersDao.existsById(containerID.getId())) { + LOG.info("Found a missing container with ID {}. Adding it to the " + + "database", containerID.getId()); MissingContainers newRecord = new MissingContainers(containerID.getId(), currentTime); missingContainersDao.insert(newRecord); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java index 449eb7d8561..03b66239f02 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java @@ -19,12 +19,14 @@ package org.apache.hadoop.ozone.recon.spi; import java.io.IOException; +import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; import org.apache.hadoop.ozone.recon.api.types.ContainerMetadata; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; /** * The Recon Container DB Service interface. @@ -161,4 +163,11 @@ void deleteContainerMapping(ContainerKeyPrefix containerKeyPrefix) * @param count no. of new containers to add to containers total count. */ void incrementContainerCountBy(long count); + + /** + * Get all the missing containers. + * + * @return List of MissingContainers. + */ + List getMissingContainers(); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java index 7915e724766..11f8bfe00aa 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.sql.Timestamp; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import javax.inject.Inject; @@ -47,7 +48,9 @@ import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.hadoop.ozone.recon.schema.tables.daos.GlobalStatsDao; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; import org.hadoop.ozone.recon.schema.tables.pojos.GlobalStats; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; import org.jooq.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +81,9 @@ public class ContainerDBServiceProviderImpl @Inject private ReconUtils reconUtils; + @Inject + private MissingContainersDao missingContainersDao; + @Inject public ContainerDBServiceProviderImpl(DBStore dbStore, Configuration sqlConfiguration) { @@ -141,9 +147,10 @@ private void initializeTables() { this.containerKeyCountTable = containerDbStore .getTable(CONTAINER_KEY_COUNT_TABLE, Long.class, Long.class); } catch (IOException e) { - LOG.error("Unable to create Container Key tables. {}", e); + LOG.error("Unable to create Container Key tables.", e); } } + /** * Concatenate the containerID and Key Prefix using a delimiter and store the * count into the container DB store. @@ -351,6 +358,10 @@ public Map getContainers(int limit, return containers; } + public List getMissingContainers() { + return missingContainersDao.findAll(); + } + @Override public void deleteContainerMapping(ContainerKeyPrefix containerKeyPrefix) throws IOException { diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json index a89427174ad..ab807df415c 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json @@ -361,7 +361,9 @@ "localhost1.storage.enterprise.com", "localhost3.storage.enterprise.com", "localhost5.storage.enterprise.com" - ] + ], + "missingSince": 1578491371528, + "pipelineId": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982" }, { "id": 2, @@ -370,7 +372,9 @@ "localhost1.storage.enterprise.com", "localhost3.storage.enterprise.com", "localhost5.storage.enterprise.com" - ] + ], + "missingSince": 1578491471528, + "pipelineId": "04a5d908-ff01-4ce6-ad75-f3ec73dfc8a2" } ] }, diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java similarity index 67% rename from hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java rename to hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index 9cca5a71000..5b373ccadf7 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -29,13 +29,16 @@ import java.util.Iterator; import java.util.List; import java.util.Map; - -import javax.sql.DataSource; import javax.ws.rs.core.Response; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; @@ -45,14 +48,25 @@ import org.apache.hadoop.ozone.recon.api.types.ContainersResponse; import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; +import org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata; +import org.apache.hadoop.ozone.recon.api.types.MissingContainersResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconContainerManager; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.ContainerDBServiceProvider; +import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; +import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTask; import org.apache.hadoop.hdds.utils.db.Table; +import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; import org.hadoop.ozone.recon.schema.StatsSchemaDefinition; -import org.jooq.impl.DSL; -import org.jooq.impl.DefaultConfiguration; +import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; +import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; +import org.jooq.Configuration; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -60,33 +74,87 @@ import com.google.inject.Injector; /** - * Test for container key service. + * Test for container endpoint. */ -public class TestContainerKeyService extends AbstractOMMetadataManagerTest { +public class TestContainerEndpoint extends AbstractOMMetadataManagerTest { private ContainerDBServiceProvider containerDbServiceProvider; - private Injector injector; - private OzoneManagerServiceProviderImpl ozoneManagerServiceProvider; - private ContainerKeyService containerKeyService; + private ContainerEndpoint containerEndpoint; private GuiceInjectorUtilsForTestsImpl guiceInjectorTest = new GuiceInjectorUtilsForTestsImpl(); private boolean isSetupDone = false; private ReconOMMetadataManager reconOMMetadataManager; + private MissingContainersDao missingContainersDao; + private ContainerID containerID = new ContainerID(1L); + private PipelineID pipelineID; + private long keyCount = 5L; private void initializeInjector() throws Exception { reconOMMetadataManager = getTestMetadataManager( initializeNewOmMetadataManager()); - ozoneManagerServiceProvider = getMockOzoneManagerServiceProvider(); + OzoneManagerServiceProviderImpl ozoneManagerServiceProvider = + mock(OzoneManagerServiceProviderImpl.class); Injector parentInjector = guiceInjectorTest.getInjector( ozoneManagerServiceProvider, reconOMMetadataManager, temporaryFolder); - injector = parentInjector.createChildInjector(new AbstractModule() { - @Override - protected void configure() { - containerKeyService = new ContainerKeyService(); - bind(ContainerKeyService.class).toInstance(containerKeyService); - } - }); + Pipeline pipeline = getRandomPipeline(); + pipelineID = pipeline.getId(); + + // Mock ReconStorageContainerManagerFacade and other SCM related methods + OzoneStorageContainerManager mockReconSCM = + mock(ReconStorageContainerManagerFacade.class); + ContainerManager mockContainerManager = + mock(ReconContainerManager.class); + + when(mockContainerManager.getContainer(containerID)).thenReturn( + new ContainerInfo.Builder() + .setContainerID(containerID.getId()) + .setNumberOfKeys(keyCount) + .setPipelineID(pipelineID) + .build() + ); + when(mockReconSCM.getContainerManager()) + .thenReturn(mockContainerManager); + + Injector injector = parentInjector.createChildInjector( + new AbstractModule() { + @Override + protected void configure() { + Configuration sqlConfiguration = + parentInjector.getInstance((Configuration.class)); + + try { + ReconTaskSchemaDefinition taskSchemaDefinition = parentInjector + .getInstance(ReconTaskSchemaDefinition.class); + taskSchemaDefinition.initializeSchema(); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + + ReconTaskStatusDao reconTaskStatusDao = + new ReconTaskStatusDao(sqlConfiguration); + + bind(ReconTaskStatusDao.class).toInstance(reconTaskStatusDao); + + StorageContainerServiceProvider mockScmServiceProvider = mock( + StorageContainerServiceProviderImpl.class); + bind(StorageContainerServiceProvider.class) + .toInstance(mockScmServiceProvider); + bind(OzoneStorageContainerManager.class) + .toInstance(mockReconSCM); + bind(ContainerEndpoint.class); + } + }); + containerEndpoint = injector.getInstance(ContainerEndpoint.class); + containerDbServiceProvider = injector.getInstance( + ContainerDBServiceProvider.class); + StatsSchemaDefinition schemaDefinition = injector.getInstance( + StatsSchemaDefinition.class); + schemaDefinition.initializeSchema(); + UtilizationSchemaDefinition utilizationSchemaDefinition = + injector.getInstance(UtilizationSchemaDefinition.class); + utilizationSchemaDefinition.initializeSchema(); + missingContainersDao = injector.getInstance(MissingContainersDao.class); } @Before @@ -94,17 +162,6 @@ public void setUp() throws Exception { // The following setup runs only once if (!isSetupDone) { initializeInjector(); - - DSL.using(new DefaultConfiguration().set( - injector.getInstance(DataSource.class))); - - containerDbServiceProvider = injector.getInstance( - ContainerDBServiceProvider.class); - - StatsSchemaDefinition schemaDefinition = injector.getInstance( - StatsSchemaDefinition.class); - schemaDefinition.initializeSchema(); - isSetupDone = true; } @@ -185,7 +242,7 @@ public void setUp() throws Exception { @Test public void testGetKeysForContainer() { - Response response = containerKeyService.getKeysForContainer(1L, -1, ""); + Response response = containerEndpoint.getKeysForContainer(1L, -1, ""); KeysResponse responseObject = (KeysResponse) response.getEntity(); KeysResponse.KeysResponseData data = responseObject.getKeysResponseData(); @@ -214,7 +271,7 @@ public void testGetKeysForContainer() { assertEquals(103, blockIds.get(0L).iterator().next().getLocalID()); assertEquals(104, blockIds.get(1L).iterator().next().getLocalID()); - response = containerKeyService.getKeysForContainer(3L, -1, ""); + response = containerEndpoint.getKeysForContainer(3L, -1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); keyMetadataList = data.getKeys(); @@ -222,7 +279,7 @@ public void testGetKeysForContainer() { assertEquals(0, data.getTotalCount()); // test if limit works as expected - response = containerKeyService.getKeysForContainer(1L, 1, ""); + response = containerEndpoint.getKeysForContainer(1L, 1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); keyMetadataList = data.getKeys(); @@ -233,7 +290,7 @@ public void testGetKeysForContainer() { @Test public void testGetKeysForContainerWithPrevKey() { // test if prev-key param works as expected - Response response = containerKeyService.getKeysForContainer( + Response response = containerEndpoint.getKeysForContainer( 1L, -1, "/sampleVol/bucketOne/key_one"); KeysResponse responseObject = @@ -253,7 +310,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals(2, keyMetadata.getVersions().size()); assertEquals(2, keyMetadata.getBlockIds().size()); - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 1L, -1, StringUtils.EMPTY); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -266,7 +323,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals("key_one", keyMetadata.getKey()); // test for negative cases - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 1L, -1, "/sampleVol/bucketOne/invalid_key"); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -274,7 +331,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals(3, data.getTotalCount()); assertEquals(0, keyMetadataList.size()); - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 5L, -1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -286,7 +343,7 @@ public void testGetKeysForContainerWithPrevKey() { @Test public void testGetContainers() { - Response response = containerKeyService.getContainers(-1, 0L); + Response response = containerEndpoint.getContainers(-1, 0L); ContainersResponse responseObject = (ContainersResponse) response.getEntity(); @@ -310,7 +367,7 @@ public void testGetContainers() { assertEquals(2L, containerMetadata.getNumberOfKeys()); // test if limit works as expected - response = containerKeyService.getContainers(1, 0L); + response = containerEndpoint.getContainers(1, 0L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -321,7 +378,7 @@ public void testGetContainers() { @Test public void testGetContainersWithPrevKey() { - Response response = containerKeyService.getContainers(1, 1L); + Response response = containerEndpoint.getContainers(1, 1L); ContainersResponse responseObject = (ContainersResponse) response.getEntity(); @@ -339,7 +396,7 @@ public void testGetContainersWithPrevKey() { assertEquals(1, containers.size()); assertEquals(2L, containerMetadata.getContainerID()); - response = containerKeyService.getContainers(-1, 0L); + response = containerEndpoint.getContainers(-1, 0L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -350,14 +407,14 @@ public void testGetContainersWithPrevKey() { assertEquals(1L, containerMetadata.getContainerID()); // test for negative cases - response = containerKeyService.getContainers(-1, 5L); + response = containerEndpoint.getContainers(-1, 5L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); assertEquals(0, containers.size()); assertEquals(2, data.getTotalCount()); - response = containerKeyService.getContainers(-1, -1L); + response = containerEndpoint.getContainers(-1, -1L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -365,9 +422,33 @@ public void testGetContainersWithPrevKey() { assertEquals(2, data.getTotalCount()); } - private OzoneManagerServiceProviderImpl getMockOzoneManagerServiceProvider() { - OzoneManagerServiceProviderImpl omServiceProviderMock = - mock(OzoneManagerServiceProviderImpl.class); - return omServiceProviderMock; + @Test + public void testGetMissingContainers() { + Response response = containerEndpoint.getMissingContainers(); + + MissingContainersResponse responseObject = + (MissingContainersResponse) response.getEntity(); + + assertEquals(0, responseObject.getTotalCount()); + assertEquals(Collections.EMPTY_LIST, responseObject.getContainers()); + + // Add missing containers to the database + long missingSince = System.currentTimeMillis(); + MissingContainers newRecord = + new MissingContainers(1L, missingSince); + missingContainersDao.insert(newRecord); + + response = containerEndpoint.getMissingContainers(); + responseObject = (MissingContainersResponse) response.getEntity(); + assertEquals(1, responseObject.getTotalCount()); + MissingContainerMetadata container = + responseObject.getContainers().stream().findFirst().orElse(null); + Assert.assertNotNull(container); + + assertEquals(containerID.getId(), container.getContainerID()); + assertEquals(keyCount, container.getKeys()); + assertEquals(pipelineID.getId(), container.getPipelineID()); + assertEquals(0, container.getDatanodes().size()); + assertEquals(missingSince, container.getMissingSince()); } } \ No newline at end of file diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java index 027db0daabd..6d5ea503f22 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java @@ -19,7 +19,6 @@ package org.apache.hadoop.ozone.recon.api; import com.google.inject.AbstractModule; -import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Singleton; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -50,9 +49,6 @@ import org.apache.hadoop.ozone.recon.api.types.DatanodesResponse; import org.apache.hadoop.ozone.recon.api.types.PipelineMetadata; import org.apache.hadoop.ozone.recon.api.types.PipelinesResponse; -import org.apache.hadoop.ozone.recon.persistence.AbstractSqlDatabaseTest; -import org.apache.hadoop.ozone.recon.persistence.DataSourceConfiguration; -import org.apache.hadoop.ozone.recon.persistence.JooqPersistenceModule; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; @@ -60,7 +56,6 @@ import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; import org.apache.hadoop.test.LambdaTestUtils; import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; -import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao; import org.jooq.Configuration; import org.junit.Assert; @@ -72,7 +67,6 @@ import static org.mockito.Mockito.when; import javax.ws.rs.core.Response; -import java.io.File; import java.io.IOException; import java.util.UUID; import java.util.concurrent.Callable; @@ -98,25 +92,6 @@ public class TestEndpoints extends AbstractOMMetadataManagerTest { private Pipeline pipeline; private void initializeInjector() throws IOException { - - File tempDir = temporaryFolder.newFolder(); - - AbstractSqlDatabaseTest.DataSourceConfigurationProvider - configurationProvider = - new AbstractSqlDatabaseTest.DataSourceConfigurationProvider(tempDir); - - JooqPersistenceModule persistenceModule = - new JooqPersistenceModule(configurationProvider); - - Injector sqlInjector = Guice.createInjector(persistenceModule, - new AbstractModule() { - @Override - public void configure() { - bind(DataSourceConfiguration.class) - .toProvider(configurationProvider); - } - }); - reconOMMetadataManager = getTestMetadataManager( initializeNewOmMetadataManager()); OzoneManagerServiceProviderImpl omServiceProviderMock = @@ -134,7 +109,7 @@ protected void configure() { pipelineId = pipeline.getId().getId().toString(); Configuration sqlConfiguration = - sqlInjector.getInstance((Configuration.class)); + parentInjector.getInstance((Configuration.class)); ContainerInfo containerInfo = new ContainerInfo.Builder() .setContainerID(containerId) @@ -147,17 +122,14 @@ protected void configure() { ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipeline); - ReconTaskSchemaDefinition taskSchemaDefinition = sqlInjector + ReconTaskSchemaDefinition taskSchemaDefinition = parentInjector .getInstance(ReconTaskSchemaDefinition.class); taskSchemaDefinition.initializeSchema(); ReconTaskStatusDao reconTaskStatusDao = new ReconTaskStatusDao(sqlConfiguration); - MissingContainersDao missingContainersDao = - new MissingContainersDao(sqlConfiguration); bind(ReconTaskStatusDao.class).toInstance(reconTaskStatusDao); - bind(MissingContainersDao.class).toInstance(missingContainersDao); StorageContainerLocationProtocol mockScmClient = mock( StorageContainerLocationProtocol.class); @@ -431,6 +403,6 @@ private void waitAndCheckConditionAfterHeartbeat(Callable check) .setDatanodeDetails(datanodeDetailsProto) .build(); reconScm.getDatanodeProtocolServer().sendHeartbeat(heartbeatRequestProto); - LambdaTestUtils.await(30000, 2000, check); + LambdaTestUtils.await(30000, 1000, check); } } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java index d546a33316c..639373c086c 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,18 +21,19 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.io.IOException; -import java.sql.SQLException; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.ozone.recon.persistence.AbstractSqlDatabaseTest; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.test.LambdaTestUtils; import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition; import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; @@ -49,7 +50,7 @@ public class TestMissingContainerTask extends AbstractSqlDatabaseTest { @Test - public void testRun() throws IOException, SQLException, InterruptedException { + public void testRun() throws Exception { Configuration sqlConfiguration = getInjector().getInstance((Configuration.class)); @@ -64,13 +65,23 @@ public void testRun() throws IOException, SQLException, InterruptedException { ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class); ContainerManager containerManagerMock = mock(ContainerManager.class); + ContainerReplica unhealthyReplicaMock = mock(ContainerReplica.class); + when(unhealthyReplicaMock.getState()).thenReturn(State.UNHEALTHY); + ContainerReplica healthyReplicaMock = mock(ContainerReplica.class); + when(healthyReplicaMock.getState()).thenReturn(State.CLOSED); when(scmMock.getContainerManager()).thenReturn(containerManagerMock); when(containerManagerMock.getContainerIDs()) .thenReturn(getMockContainerIDs(3)); + // return one HEALTHY and one UNHEALTHY replica for container ID 1 when(containerManagerMock.getContainerReplicas(new ContainerID(1L))) - .thenReturn(Collections.singleton(mock(ContainerReplica.class))); + .thenReturn(Collections.unmodifiableSet( + new HashSet<>( + Arrays.asList(healthyReplicaMock, unhealthyReplicaMock) + ))); + // return one UNHEALTHY replica for container ID 2 when(containerManagerMock.getContainerReplicas(new ContainerID(2L))) - .thenReturn(Collections.singleton(mock(ContainerReplica.class))); + .thenReturn(Collections.singleton(unhealthyReplicaMock)); + // return 0 replicas for container ID 3 when(containerManagerMock.getContainerReplicas(new ContainerID(3L))) .thenReturn(Collections.emptySet()); @@ -89,17 +100,20 @@ public void testRun() throws IOException, SQLException, InterruptedException { missingContainersDao); missingContainerTask.register(); missingContainerTask.start(); - Thread.sleep(5000L); + LambdaTestUtils.await(6000, 1000, () -> + (missingContainersTableHandle.findAll().size() == 2)); all = missingContainersTableHandle.findAll(); - Assert.assertEquals(1, all.size()); - Assert.assertEquals(3, all.get(0).getContainerId().longValue()); - + // Container IDs 2 and 3 should be present in the missing containers table + Set missingContainerIDs = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(2L, 3L)) + ); + Assert.assertTrue(all.stream().allMatch(r -> + missingContainerIDs.contains(r.getContainerId()))); ReconTaskStatus taskStatus = reconTaskStatusDao.findById(missingContainerTask.getTaskName()); Assert.assertTrue(taskStatus.getLastUpdatedTimestamp() > currentTime); - } private Set getMockContainerIDs(int num) { diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java index 834355e3e37..d147e58ade5 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -33,6 +33,8 @@ import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; import org.apache.hadoop.ozone.recon.spi.impl.ReconContainerDBProvider; import org.apache.hadoop.hdds.utils.db.DBStore; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; +import org.jooq.Configuration; import org.junit.Assert; import org.junit.rules.TemporaryFolder; @@ -71,7 +73,7 @@ default Injector getInjector( JooqPersistenceModule jooqPersistenceModule = new JooqPersistenceModule(configurationProvider); - return Guice.createInjector(jooqPersistenceModule, + Injector baseInjector = Guice.createInjector(jooqPersistenceModule, new AbstractModule() { @Override protected void configure() { @@ -93,13 +95,24 @@ protected void configure() { bind(DBStore.class).toProvider(ReconContainerDBProvider.class). in(Singleton.class); - bind(ContainerDBServiceProvider.class).to( - ContainerDBServiceProviderImpl.class).in(Singleton.class); } catch (IOException e) { Assert.fail(); } } }); + + return baseInjector.createChildInjector(new AbstractModule() { + @Override + protected void configure() { + Configuration sqlConfiguration = + baseInjector.getInstance((Configuration.class)); + MissingContainersDao missingContainersDao = + new MissingContainersDao(sqlConfiguration); + bind(MissingContainersDao.class).toInstance(missingContainersDao); + bind(ContainerDBServiceProvider.class).to( + ContainerDBServiceProviderImpl.class).in(Singleton.class); + } + }); } /** From 07fcb79e8253c19d9537772ab8f3d82c51a0220f Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Thu, 26 Mar 2020 19:59:45 +0100 Subject: [PATCH 14/38] HDDS-3284. ozonesecure-mr test fails due to lack of disk space (#725) --- hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config index 3786bba0a10..f0b7f5cb22d 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config @@ -99,6 +99,8 @@ YARN-SITE.XML_yarn.resourcemanager.system.metrics.publisher.enabled=true YARN-SITE.XML_yarn.log-aggregation-enable=true YARN-SITE.XML_yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds=3600 YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600 +YARN-SITE.XML_yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage=99 +YARN-SITE.XML_yarn.nodemanager.disk-health-checker.enable=false # Yarn LinuxContainer requires the /opt/hadoop/etc/hadoop to be owned by root and not modifiable by other users, # which prevents start.sh from changing the configurations based on docker-config From 3d285686944480499ab789bbe18866ae5874979c Mon Sep 17 00:00:00 2001 From: Neo Yang Date: Fri, 27 Mar 2020 13:17:42 +0800 Subject: [PATCH 15/38] HDDS-3074. Make the configuration of container scrub consistent. (#722) --- .../container/ozoneimpl/ContainerScrubberConfiguration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java index c7c55de45e1..bfc5c50b255 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java @@ -25,7 +25,7 @@ /** * This class defines configuration parameters for container scrubber. **/ -@ConfigGroup(prefix = "hdds.containerscrub") +@ConfigGroup(prefix = "hdds.container.scrub") public class ContainerScrubberConfiguration { @Config(key = "enabled", From 7d132ce38d5d8aeb3b72e770f99881888c2753ee Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Fri, 27 Mar 2020 15:29:59 +0800 Subject: [PATCH 16/38] HDDS-3179. Pipeline placement based on Topology does not have fallback (#678) --- .../hadoop/hdds/protocol/DatanodeDetails.java | 2 +- .../scm/pipeline/PipelinePlacementPolicy.java | 37 +++-- .../hdds/scm/container/MockNodeManager.java | 10 +- .../pipeline/TestPipelinePlacementPolicy.java | 145 ++++++++++++++---- 4 files changed, 145 insertions(+), 49 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index a235a4b2b46..28ed36d964b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -70,7 +70,7 @@ private DatanodeDetails(String uuid, String ipAddress, String hostName, this.certSerialId = certSerialId; } - protected DatanodeDetails(DatanodeDetails datanodeDetails) { + public DatanodeDetails(DatanodeDetails datanodeDetails) { super(datanodeDetails.getHostName(), datanodeDetails.getNetworkLocation(), datanodeDetails.getCost()); this.uuid = datanodeDetails.uuid; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 0f30449c975..e96b12026c3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -99,9 +99,8 @@ boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { try { pipeline = stateManager.getPipeline(pid); } catch (PipelineNotFoundException e) { - LOG.error("Pipeline not found in pipeline state manager during" + - " pipeline creation. PipelineID: " + pid + - " exception: " + e.getMessage()); + LOG.debug("Pipeline not found in pipeline state manager during" + + " pipeline creation. PipelineID: {}", pid, e); continue; } if (pipeline != null && @@ -282,26 +281,32 @@ public List getResultSet( LOG.debug("Second node chosen: {}", nextNode); } } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + - "rack based on rack awareness."); - } + LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + + "rack based on rack awareness. anchor: {}", anchor); } // Then choose nodes close to anchor based on network topology int nodesToFind = nodesRequired - results.size(); for (int x = 0; x < nodesToFind; x++) { // Pick remaining nodes based on the existence of rack awareness. - DatanodeDetails pick = rackAwareness - ? chooseNodeFromNetworkTopology( - nodeManager.getClusterNetworkTopologyMap(), anchor, exclude) - : fallBackPickNodes(healthyNodes, exclude); + DatanodeDetails pick = null; + if (rackAwareness) { + pick = chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, exclude); + } + // fall back protection + if (pick == null) { + pick = fallBackPickNodes(healthyNodes, exclude); + if (rackAwareness) { + LOG.debug("Failed to choose node based on topology. Fallback " + + "picks node as: {}", pick); + } + } + if (pick != null) { results.add(pick); exclude.add(pick); - if (LOG.isDebugEnabled()) { - LOG.debug("Remaining node chosen: {}", pick); - } + LOG.debug("Remaining node chosen: {}", pick); } } @@ -414,6 +419,10 @@ protected DatanodeDetails chooseNodeFromNetworkTopology( Node pick = networkTopology.chooseRandom( anchor.getNetworkLocation(), excluded); DatanodeDetails pickedNode = (DatanodeDetails) pick; + if (pickedNode == null) { + LOG.debug("Pick node is null, excluded nodes {}, anchor {}.", + excluded, anchor); + } return pickedNode; } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index cbeef7f67ab..f15bfdd7bcd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -94,6 +94,7 @@ public class MockNodeManager implements NodeManager { private ConcurrentMap> dnsToUuidMap; public MockNodeManager(NetworkTopologyImpl clusterMap, + List nodes, boolean initializeFakeNodes, int nodeCount) { this.healthyNodes = new LinkedList<>(); this.staleNodes = new LinkedList<>(); @@ -104,6 +105,13 @@ public MockNodeManager(NetworkTopologyImpl clusterMap, this.dnsToUuidMap = new ConcurrentHashMap<>(); this.aggregateStat = new SCMNodeStat(); this.clusterMap = clusterMap; + if (!nodes.isEmpty()) { + for (int x = 0; x < nodes.size(); x++) { + DatanodeDetails node = nodes.get(x); + register(node, null, null); + populateNodeMetric(node, x); + } + } if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); @@ -116,7 +124,7 @@ public MockNodeManager(NetworkTopologyImpl clusterMap, } public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { - this(new NetworkTopologyImpl(new OzoneConfiguration()), + this(new NetworkTopologyImpl(new OzoneConfiguration()), new ArrayList<>(), initializeFakeNodes, nodeCount); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index daad80834c5..fafc4b0acec 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.MockNodeManager; @@ -35,6 +36,9 @@ import java.util.stream.Collectors; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; +import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; /** * Test for PipelinePlacementPolicy. @@ -43,25 +47,55 @@ public class TestPipelinePlacementPolicy { private MockNodeManager nodeManager; private OzoneConfiguration conf; private PipelinePlacementPolicy placementPolicy; + private NetworkTopologyImpl cluster; private static final int PIPELINE_PLACEMENT_MAX_NODES_COUNT = 10; + private List nodesWithOutRackAwareness = new ArrayList<>(); + private List nodesWithRackAwareness = new ArrayList<>(); + @Before public void init() throws Exception { - nodeManager = new MockNodeManager(true, - PIPELINE_PLACEMENT_MAX_NODES_COUNT); + cluster = initTopology(); + // start with nodes with rack awareness. + nodeManager = new MockNodeManager(cluster, getNodesWithRackAwareness(), + false, PIPELINE_PLACEMENT_MAX_NODES_COUNT); conf = new OzoneConfiguration(); conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); placementPolicy = new PipelinePlacementPolicy( nodeManager, new PipelineStateManager(), conf); } + private NetworkTopologyImpl initTopology() { + NodeSchema[] schemas = new NodeSchema[] + {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA}; + NodeSchemaManager.getInstance().init(schemas, true); + NetworkTopologyImpl topology = + new NetworkTopologyImpl(NodeSchemaManager.getInstance()); + return topology; + } + + private List getNodesWithRackAwareness() { + List datanodes = new ArrayList<>(); + for (Node node : NODES) { + DatanodeDetails datanode = overwriteLocationInNode( + getNodesWithoutRackAwareness(), node); + nodesWithRackAwareness.add(datanode); + datanodes.add(datanode); + } + return datanodes; + } + + private DatanodeDetails getNodesWithoutRackAwareness() { + DatanodeDetails node = MockDatanodeDetails.randomDatanodeDetails(); + nodesWithOutRackAwareness.add(node); + return node; + } + @Test - public void testChooseNodeBasedOnNetworkTopology() { - List healthyNodes = - nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); - DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + public void testChooseNodeBasedOnNetworkTopology() throws SCMException { + DatanodeDetails anchor = placementPolicy.chooseNode(nodesWithRackAwareness); // anchor should be removed from healthyNodes after being chosen. - Assert.assertFalse(healthyNodes.contains(anchor)); + Assert.assertFalse(nodesWithRackAwareness.contains(anchor)); List excludedNodes = new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT); @@ -69,10 +103,42 @@ public void testChooseNodeBasedOnNetworkTopology() { DatanodeDetails nextNode = placementPolicy.chooseNodeFromNetworkTopology( nodeManager.getClusterNetworkTopologyMap(), anchor, excludedNodes); Assert.assertFalse(excludedNodes.contains(nextNode)); - // nextNode should not be the same as anchor. + // next node should not be the same as anchor. Assert.assertTrue(anchor.getUuid() != nextNode.getUuid()); + // next node should be on the same rack based on topology. + Assert.assertEquals(anchor.getNetworkLocation(), + nextNode.getNetworkLocation()); } + @Test + public void testChooseNodeWithSingleNodeRack() throws SCMException { + // There is only one node on 3 racks altogether. + List datanodes = new ArrayList<>(); + for (Node node : SINGLE_NODE_RACK) { + DatanodeDetails datanode = overwriteLocationInNode( + MockDatanodeDetails.randomDatanodeDetails(), node); + datanodes.add(datanode); + } + MockNodeManager localNodeManager = new MockNodeManager(initTopology(), + datanodes, false, datanodes.size()); + PipelinePlacementPolicy localPlacementPolicy = new PipelinePlacementPolicy( + localNodeManager, new PipelineStateManager(), conf); + int nodesRequired = HddsProtos.ReplicationFactor.THREE.getNumber(); + List results = localPlacementPolicy.chooseDatanodes( + new ArrayList<>(datanodes.size()), + new ArrayList<>(datanodes.size()), + nodesRequired, 0); + + Assert.assertEquals(nodesRequired, results.size()); + // 3 nodes should be on different racks. + Assert.assertNotEquals(results.get(0).getNetworkLocation(), + results.get(1).getNetworkLocation()); + Assert.assertNotEquals(results.get(0).getNetworkLocation(), + results.get(2).getNetworkLocation()); + Assert.assertNotEquals(results.get(1).getNetworkLocation(), + results.get(2).getNetworkLocation()); + } + @Test public void testChooseNodeBasedOnRackAwareness() { List healthyNodes = overWriteLocationInNodes( @@ -84,8 +150,9 @@ public void testChooseNodeBasedOnRackAwareness() { healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), topologyWithDifRacks, anchor); Assert.assertNotNull(nextNode); - Assert.assertFalse(anchor.getNetworkLocation().equals( - nextNode.getNetworkLocation())); + // next node should be on a different rack. + Assert.assertNotEquals(anchor.getNetworkLocation(), + nextNode.getNetworkLocation()); } @Test @@ -115,25 +182,25 @@ public void testFallBackPickNodes() { @Test public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ - List healthyNodes = - nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); - DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); - DatanodeDetails randomNode = placementPolicy.chooseNode(healthyNodes); + DatanodeDetails anchor = placementPolicy + .chooseNode(nodesWithOutRackAwareness); + DatanodeDetails randomNode = placementPolicy + .chooseNode(nodesWithOutRackAwareness); // rack awareness is not enabled. Assert.assertTrue(anchor.getNetworkLocation().equals( randomNode.getNetworkLocation())); NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( - healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), - topology, anchor); + nodesWithOutRackAwareness, new ArrayList<>( + PIPELINE_PLACEMENT_MAX_NODES_COUNT), topology, anchor); // RackAwareness should not be able to choose any node. Assert.assertNull(nextNode); // PlacementPolicy should still be able to pick a set of 3 nodes. int numOfNodes = HddsProtos.ReplicationFactor.THREE.getNumber(); List results = placementPolicy - .getResultSet(numOfNodes, healthyNodes); + .getResultSet(numOfNodes, nodesWithOutRackAwareness); Assert.assertEquals(numOfNodes, results.size()); // All nodes are on same rack. @@ -146,14 +213,20 @@ public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ private final static Node[] NODES = new NodeImpl[] { new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), new NodeImpl("h2", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h3", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h4", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h5", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h6", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h7", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h8", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h4", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h5", "/r3", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h6", "/r3", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h7", "/r4", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h8", "/r4", NetConstants.NODE_COST_DEFAULT), }; + // 3 racks with single node. + private final static Node[] SINGLE_NODE_RACK = new NodeImpl[] { + new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h2", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r3", NetConstants.NODE_COST_DEFAULT) + }; private NetworkTopology createNetworkTopologyOnDifRacks() { NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); @@ -163,20 +236,26 @@ private NetworkTopology createNetworkTopologyOnDifRacks() { return topology; } + private DatanodeDetails overwriteLocationInNode( + DatanodeDetails datanode, Node node) { + DatanodeDetails result = DatanodeDetails.newBuilder() + .setUuid(datanode.getUuidString()) + .setHostName(datanode.getHostName()) + .setIpAddress(datanode.getIpAddress()) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) + .setNetworkLocation(node.getNetworkLocation()).build(); + return result; + } + private List overWriteLocationInNodes( List datanodes) { List results = new ArrayList<>(datanodes.size()); for (int i = 0; i < datanodes.size(); i++) { - DatanodeDetails datanode = datanodes.get(i); - DatanodeDetails result = DatanodeDetails.newBuilder() - .setUuid(datanode.getUuidString()) - .setHostName(datanode.getHostName()) - .setIpAddress(datanode.getIpAddress()) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) - .setNetworkLocation(NODES[i].getNetworkLocation()).build(); - results.add(result); + DatanodeDetails datanode = overwriteLocationInNode( + datanodes.get(i), NODES[i]); + results.add(datanode); } return results; } From eece60420285330e21153d73d682c5eb3bc5458e Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Fri, 27 Mar 2020 12:25:33 -0700 Subject: [PATCH 17/38] HDDS-3281. Add timeouts to all robot tests (#723) --- .../dist/src/main/smoketest/auditparser/auditparser.robot | 1 + hadoop-ozone/dist/src/main/smoketest/basic/basic.robot | 1 + hadoop-ozone/dist/src/main/smoketest/env-compose.robot | 2 +- hadoop-ozone/dist/src/main/smoketest/freon/freon.robot | 1 + hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot | 1 + .../dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot | 2 +- hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot | 1 + hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot | 1 + hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot | 1 + hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/webui.robot | 1 + hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot | 1 + hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot | 1 + .../dist/src/main/smoketest/security/ozone-secure-fs.robot | 1 + .../dist/src/main/smoketest/security/ozone-secure-s3.robot | 1 + 24 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot index 1caae755694..4e90a44b529 100644 --- a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot +++ b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${user} hadoop diff --git a/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot b/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot index c7b43ecd069..bbd19456ed1 100644 --- a/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot +++ b/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot @@ -17,6 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${DATANODE_HOST} datanode diff --git a/hadoop-ozone/dist/src/main/smoketest/env-compose.robot b/hadoop-ozone/dist/src/main/smoketest/env-compose.robot index d529d7f02f2..d21eacaea50 100644 --- a/hadoop-ozone/dist/src/main/smoketest/env-compose.robot +++ b/hadoop-ozone/dist/src/main/smoketest/env-compose.robot @@ -16,7 +16,7 @@ *** Settings *** Documentation High level utilities to execute commands and tests in docker-compose based environments. Resource commonlib.robot - +Test Timeout 5 minutes *** Keywords *** diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot b/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot index 6bf4dbbcab0..83cc8655402 100644 --- a/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot +++ b/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot @@ -17,6 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Test Cases *** Freon Randomkey Generator diff --git a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot index c2bf7dba749..91e41fce609 100644 --- a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot +++ b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library BuiltIn Library String Resource ../commonlib.robot +Test Timeout 5 minutes Suite Setup Generate volume *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot b/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot index d468accff79..66804f9b512 100644 --- a/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot +++ b/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot @@ -17,7 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot - +Test Timeout 5 minutes *** Test Cases *** diff --git a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot index e890b6812fe..8c5a706961f 100644 --- a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot +++ b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library SSHLibrary Library Collections Resource ../commonlib.robot +Test Timeout 8 minutes *** Variables *** ${SECURITY_ENABLED} false diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot index 8d12a526ea4..3336b39603b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot @@ -18,6 +18,7 @@ Documentation Test ozone fs with hadoopfs Library OperatingSystem Library String Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${DATANODE_HOST} datanode diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot index 89472f2c3f5..8be67935b30 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot @@ -17,6 +17,7 @@ Documentation Ozonefs test Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot b/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot index 28910ded575..621bbd0de83 100644 --- a/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot +++ b/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://recon:9888 diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot index d9c1671bcb3..004a4964591 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Keywords *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot b/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot index 8762d5dac6f..8af0b4c2ba2 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource ./commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot b/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot index 76cbbb85df3..17762bc3108 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot b/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot index ef7bc2d017e..76668716cdb 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot b/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot index 0b7f5d4aa8b..709c226559b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot index 292b3330657..c1b4953e152 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot index 33fda108a24..b3faf7e1aa3 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot index 95b71f4ceb6..6e22d4cfb70 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot index f6146611144..40bcccb42de 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot b/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot index 4b2f88e25b8..180b6ed56dc 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource ./commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot index ed1173d7999..57fa9ac39e4 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot index f411e0c3af6..77c22787f46 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot index 3df74f9c36a..4e368591d30 100644 --- a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://s3g:9878 diff --git a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot index 9f5e1224503..5103e80279c 100644 --- a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot @@ -20,6 +20,7 @@ Library String Library BuiltIn Resource ../commonlib.robot Resource ../s3/commonawslib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://s3g:9878 From a661a75f5d1fcbe964c7fb0d53f23cdfbaea2ecb Mon Sep 17 00:00:00 2001 From: Rakesh Radhakrishnan Date: Sat, 28 Mar 2020 09:47:54 +0530 Subject: [PATCH 18/38] HDDS-3288: Update default RPC handler SCM/OM count to 100 (#729) --- .../main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 4 ++-- .../main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index fefcfca08aa..c397bc5a472 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -221,7 +221,7 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_HANDLER_COUNT_KEY = "ozone.scm.handler.count.key"; - public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 10; + public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 100; public static final String OZONE_SCM_SECURITY_HANDLER_COUNT_KEY = "ozone.scm.security.handler.count.key"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ca107c0e869..65db7dfcd4e 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -505,7 +505,7 @@ ozone.om.handler.count.key - 20 + 100 OM, PERFORMANCE The number of RPC handler threads for OM service endpoints. @@ -918,7 +918,7 @@ ozone.scm.handler.count.key - 10 + 100 OZONE, MANAGEMENT, PERFORMANCE The number of RPC handler threads for each SCM service diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 51ff17f7e44..f46b30854e2 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -37,7 +37,7 @@ private OMConfigKeys() { public static final String OZONE_OM_HANDLER_COUNT_KEY = "ozone.om.handler.count.key"; - public static final int OZONE_OM_HANDLER_COUNT_DEFAULT = 20; + public static final int OZONE_OM_HANDLER_COUNT_DEFAULT = 100; public static final String OZONE_OM_INTERNAL_SERVICE_ID = "ozone.om.internal.service.id"; From 099ab62176107204a156b01472c6af69d255c38e Mon Sep 17 00:00:00 2001 From: Siddharth Date: Sat, 28 Mar 2020 11:29:34 -0700 Subject: [PATCH 19/38] HDDS-3273. getConf does not return all OM addresses. (#727) --- .../java/org/apache/hadoop/ozone/OmUtils.java | 32 +++++++++++++++++++ .../hadoop/ozone/freon/OzoneGetConf.java | 6 +++- .../org/apache/hadoop/ozone/TestOmUtils.java | 25 +++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java index 87522e3d650..3552e79f413 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java @@ -29,8 +29,12 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.stream.Collectors; @@ -89,6 +93,34 @@ public static InetSocketAddress getOmAddress(Configuration conf) { return NetUtils.createSocketAddr(getOmRpcAddress(conf)); } + /** + * Return list of OM addresses by service ids - when HA is enabled. + * + * @param conf {@link Configuration} + * @return {service.id -> [{@link InetSocketAddress}]} + */ + public static Map> getOmHAAddressesById( + Configuration conf) { + Map> result = new HashMap<>(); + for (String serviceId : conf.getTrimmedStringCollection( + OZONE_OM_SERVICE_IDS_KEY)) { + if (!result.containsKey(serviceId)) { + result.put(serviceId, new ArrayList<>()); + } + for (String nodeId : getOMNodeIds(conf, serviceId)) { + String rpcAddr = getOmRpcAddress(conf, + addKeySuffixes(OZONE_OM_ADDRESS_KEY, serviceId, nodeId)); + if (rpcAddr != null) { + result.get(serviceId).add(NetUtils.createSocketAddr(rpcAddr)); + } else { + LOG.warn("Address undefined for nodeId: {} for service {}", nodeId, + serviceId); + } + } + } + return result; + } + /** * Retrieve the socket address that is used by OM. * @param conf diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java index 3c60e5956d4..83283d473eb 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java @@ -253,7 +253,11 @@ static class OzoneManagersCommandHandler extends CommandHandler { @Override public int doWorkInternal(OzoneGetConf tool, String[] args) throws IOException { - tool.printOut(OmUtils.getOmAddress(tool.getConf()).getHostName()); + if (OmUtils.isServiceIdsDefined(tool.getConf())) { + tool.printOut(OmUtils.getOmHAAddressesById(tool.getConf()).toString()); + } else { + tool.printOut(OmUtils.getOmAddress(tool.getConf()).getHostName()); + } return 0; } } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java index 7f374102754..cdbb786a77c 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; import org.apache.hadoop.io.IOUtils; import org.junit.Rule; @@ -31,9 +32,13 @@ import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.net.InetSocketAddress; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -113,6 +118,26 @@ public void createOMDirThrowsIfCannotCreate() throws IOException { // expecting exception } + @Test + public void testGetOmHAAddressesById() { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OZONE_OM_SERVICE_IDS_KEY, "ozone1"); + conf.set("ozone.om.nodes.ozone1", "node1,node2,node3"); + conf.set("ozone.om.address.ozone1.node1", "1.1.1.1"); + conf.set("ozone.om.address.ozone1.node2", "1.1.1.2"); + conf.set("ozone.om.address.ozone1.node3", "1.1.1.3"); + Map> addresses = + OmUtils.getOmHAAddressesById(conf); + assertFalse(addresses.isEmpty()); + List rpcAddrs = addresses.get("ozone1"); + assertFalse(rpcAddrs.isEmpty()); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.1"))); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.2"))); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.3"))); + } } class TestDBCheckpoint implements DBCheckpoint { From 9c829fbd30b0e47f24903297c68ab5c9a53d1c24 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 23 Mar 2020 04:11:07 -0700 Subject: [PATCH 20/38] HDDS-3234. Fix retry interval default in Ozone client. (#698) --- .../src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index c88169893f3..fefcfca08aa 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -117,7 +117,7 @@ public final class ScmConfigKeys { "dfs.ratis.client.request.retry.interval"; public static final TimeDuration DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT = - TimeDuration.valueOf(1000, TimeUnit.MILLISECONDS); + TimeDuration.valueOf(15000, TimeUnit.MILLISECONDS); public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY = "dfs.ratis.server.retry-cache.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 69f62ad9374..ad400d766af 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -252,7 +252,7 @@ dfs.ratis.client.request.retry.interval - 1000ms + 15000ms OZONE, RATIS, MANAGEMENT Interval between successive retries for a ratis client request. From 021bed6ba6e5db56b1ba752ed117431a813e4715 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 23 Mar 2020 16:54:47 +0530 Subject: [PATCH 21/38] HDDS-3235.Change to default of max retry count for Ozone client (#699) --- .../src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 2fa9ff91f6f..c71e0d66e52 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -143,7 +143,7 @@ public final class OzoneConfigKeys { public static final String OZONE_CLIENT_MAX_RETRIES = "ozone.client.max.retries"; - public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 100; + public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 5; public static final String OZONE_CLIENT_RETRY_INTERVAL = "ozone.client.retry.interval"; public static final TimeDuration OZONE_CLIENT_RETRY_INTERVAL_DEFAULT = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ad400d766af..ca107c0e869 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -412,7 +412,7 @@ ozone.client.max.retries - 100 + 5 OZONE, CLIENT Maximum number of retries by Ozone Client on encountering exception while writing a key. From da528e02f3eb688ac79ec3cf307b784516cb0c3b Mon Sep 17 00:00:00 2001 From: Mukul Kumar Singh Date: Tue, 24 Mar 2020 14:38:58 +0530 Subject: [PATCH 22/38] HDDS-2878. Refactor MiniOzoneLoadGenerator to add more load generators to chaos testing. (#438) --- .../hadoop/ozone/MiniOzoneChaosCluster.java | 6 + .../hadoop/ozone/MiniOzoneLoadGenerator.java | 265 ++++-------------- .../ozone/TestMiniChaosOzoneCluster.java | 31 +- .../loadgenerators/AgedLoadGenerator.java | 98 +++++++ .../ozone/loadgenerators/DataBuffer.java | 53 ++++ .../FilesystemLoadGenerator.java | 69 +++++ .../ozone/loadgenerators/LoadExecutors.java | 101 +++++++ .../ozone/loadgenerators/LoadGenerator.java | 37 +++ .../loadgenerators/RandomLoadGenerator.java | 68 +++++ .../apache/hadoop/ozone/utils/LoadBucket.java | 23 +- .../src/test/resources/log4j.properties | 1 + 11 files changed, 509 insertions(+), 243 deletions(-) create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java create mode 100644 hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 67923cc1571..22cb3b4dc14 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -242,6 +242,12 @@ protected void initializeConfiguration() throws IOException { 1, TimeUnit.SECONDS); conf.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 1, TimeUnit.SECONDS); + conf.setInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY, + 4); + conf.setInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_KEY, + 2); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); conf.setInt("hdds.scm.replication.thread.interval", 10 * 1000); conf.setInt("hdds.scm.replication.event.timeout", 20 * 1000); diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java index 521f172ae47..d1256b1670b 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java @@ -17,26 +17,23 @@ */ package org.apache.hadoop.ozone; -import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.conf.StorageUnit; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.loadgenerators.FilesystemLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.AgedLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.RandomLoadGenerator; +import org.apache.hadoop.ozone.loadgenerators.DataBuffer; +import org.apache.hadoop.ozone.loadgenerators.LoadExecutors; +import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; import org.apache.hadoop.ozone.utils.LoadBucket; -import org.apache.hadoop.ozone.utils.TestProbability; -import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; -import java.util.Optional; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.CompletableFuture; +import java.util.function.Function; import java.util.concurrent.TimeUnit; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; /** * A Simple Load generator for testing. @@ -46,213 +43,59 @@ public class MiniOzoneLoadGenerator { private static final Logger LOG = LoggerFactory.getLogger(MiniOzoneLoadGenerator.class); - private static String keyNameDelimiter = "_"; - - private ThreadPoolExecutor writeExecutor; - private int numThreads; - // number of buffer to be allocated, each is allocated with length which - // is multiple of 2, each buffer is populated with random data. - private int numBuffers; - private List buffers; - - private AtomicBoolean isIOThreadRunning; - - private final List ozoneBuckets; - - private final AtomicInteger agedFileWrittenIndex; - private final ExecutorService agedFileExecutor; - private final LoadBucket agedLoadBucket; - private final TestProbability agedWriteProbability; - - private final ThreadPoolExecutor fsExecutor; - private final LoadBucket fsBucket; - - MiniOzoneLoadGenerator(List bucket, - LoadBucket agedLoadBucket, LoadBucket fsBucket, - int numThreads, int numBuffers) { - this.ozoneBuckets = bucket; - this.numThreads = numThreads; - this.numBuffers = numBuffers; - this.writeExecutor = createExecutor(); - - this.agedFileWrittenIndex = new AtomicInteger(0); - this.agedFileExecutor = Executors.newSingleThreadExecutor(); - this.agedLoadBucket = agedLoadBucket; - this.agedWriteProbability = TestProbability.valueOf(10); - - this.fsExecutor = createExecutor(); - this.fsBucket = fsBucket; - - this.isIOThreadRunning = new AtomicBoolean(false); - - // allocate buffers and populate random data. - buffers = new ArrayList<>(); - for (int i = 0; i < numBuffers; i++) { - int size = (int) StorageUnit.KB.toBytes(1 << i); - ByteBuffer buffer = ByteBuffer.allocate(size); - buffer.put(RandomUtils.nextBytes(size)); - buffers.add(buffer); - } - } - - private ThreadPoolExecutor createExecutor() { - ThreadPoolExecutor executor = new ThreadPoolExecutor(numThreads, numThreads, - 100, TimeUnit.SECONDS, new ArrayBlockingQueue<>(1024), - new ThreadPoolExecutor.CallerRunsPolicy()); - executor.prestartAllCoreThreads(); - return executor; - - } - - // Start IO load on an Ozone bucket. - private void load(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("Started Mixed IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { - LoadBucket bucket = - ozoneBuckets.get((int) (Math.random() * ozoneBuckets.size())); - try { - int index = RandomUtils.nextInt(); - ByteBuffer buffer = getBuffer(index); - String keyName = getKeyName(index, threadName); - bucket.writeKey(buffer, keyName); - - bucket.readKey(buffer, keyName); - - bucket.deleteKey(keyName); - } catch (Exception e) { - LOG.error("LOADGEN: Exiting due to exception", e); - break; - } + private final List loadExecutors; + + private final OzoneVolume volume; + private final OzoneConfiguration conf; + + MiniOzoneLoadGenerator(OzoneVolume volume, int numClients, int numThreads, + int numBuffers, OzoneConfiguration conf) + throws Exception { + DataBuffer buffer = new DataBuffer(numBuffers); + loadExecutors = new ArrayList<>(); + this.volume = volume; + this.conf = conf; + + // Random Load + String mixBucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); + volume.createBucket(mixBucketName); + List ozoneBuckets = new ArrayList<>(numClients); + for (int i = 0; i < numClients; i++) { + ozoneBuckets.add(new LoadBucket(volume.getBucket(mixBucketName), + conf)); } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); - } - - private Optional randomKeyToRead() { - int currentIndex = agedFileWrittenIndex.get(); - return currentIndex != 0 - ? Optional.of(RandomUtils.nextInt(0, currentIndex)) - : Optional.empty(); - } - - private void startAgedLoad(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("AGED LOADGEN: Started Aged IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { + RandomLoadGenerator loadGenerator = + new RandomLoadGenerator(buffer, ozoneBuckets); + loadExecutors.add(new LoadExecutors(numThreads, loadGenerator)); - String keyName = null; - try { - if (agedWriteProbability.isTrue()) { - int index = agedFileWrittenIndex.getAndIncrement(); - ByteBuffer buffer = getBuffer(index); - keyName = getKeyName(index, threadName); + // Aged Load + addLoads(numThreads, + bucket -> new AgedLoadGenerator(buffer, bucket)); - agedLoadBucket.writeKey(buffer, keyName); - } else { - Optional index = randomKeyToRead(); - if (index.isPresent()) { - ByteBuffer buffer = getBuffer(index.get()); - keyName = getKeyName(index.get(), threadName); - agedLoadBucket.readKey(buffer, keyName); - } - } - } catch (Throwable t) { - LOG.error("AGED LOADGEN: {} Exiting due to exception", keyName, t); - break; - } - } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); + //Filesystem Load + addLoads(numThreads, + bucket -> new FilesystemLoadGenerator(buffer, bucket)); } - // Start IO load on an Ozone bucket. - private void startFsLoad(long runTimeMillis) { - long threadID = Thread.currentThread().getId(); - LOG.info("Started Filesystem IO Thread:{}.", threadID); - String threadName = Thread.currentThread().getName(); - long startTime = Time.monotonicNow(); - - while (isIOThreadRunning.get() && - (Time.monotonicNow() < startTime + runTimeMillis)) { - try { - int index = RandomUtils.nextInt(); - ByteBuffer buffer = getBuffer(index); - String keyName = getKeyName(index, threadName); - fsBucket.writeKey(true, buffer, keyName); - - fsBucket.readKey(true, buffer, keyName); - - fsBucket.deleteKey(true, keyName); - } catch (Exception e) { - LOG.error("LOADGEN: Exiting due to exception", e); - break; - } - } - // This will terminate other threads too. - isIOThreadRunning.set(false); - LOG.info("Terminating IO thread:{}.", threadID); + private void addLoads(int numThreads, + Function function) + throws Exception { + String bucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); + volume.createBucket(bucketName); + LoadBucket bucket = new LoadBucket(volume.getBucket(bucketName), conf); + LoadGenerator loadGenerator = function.apply(bucket); + loadExecutors.add(new LoadExecutors(numThreads, loadGenerator)); } void startIO(long time, TimeUnit timeUnit) { - List> writeFutures = new ArrayList<>(); - LOG.info("Starting MiniOzoneLoadGenerator for time {}:{} with {} buffers " + - "and {} threads", time, timeUnit, numBuffers, numThreads); - if (isIOThreadRunning.compareAndSet(false, true)) { - // Start the IO thread - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> load(timeUnit.toMillis(time)), - writeExecutor)); - } - - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> startAgedLoad( - timeUnit.toMillis(time)), agedFileExecutor)); - } - - for (int i = 0; i < numThreads; i++) { - writeFutures.add( - CompletableFuture.runAsync(() -> startFsLoad( - timeUnit.toMillis(time)), fsExecutor)); - } - - // Wait for IO to complete - for (CompletableFuture f : writeFutures) { - try { - f.get(); - } catch (Throwable t) { - LOG.error("startIO failed with exception", t); - } - } - } - } - - public void shutdownLoadGenerator() { - try { - writeExecutor.shutdown(); - writeExecutor.awaitTermination(1, TimeUnit.DAYS); - } catch (Exception e) { - LOG.error("error while closing ", e); - } - } - - private ByteBuffer getBuffer(int keyIndex) { - return buffers.get(keyIndex % numBuffers); + LOG.info("Starting MiniOzoneLoadGenerator for time {}:{}", time, timeUnit); + long runTime = timeUnit.toMillis(time); + // start and wait for executors to finish + loadExecutors.forEach(le -> le.startLoad(runTime)); + loadExecutors.forEach(LoadExecutors::waitForCompletion); } - private String getKeyName(int keyIndex, String threadName) { - return threadName + keyNameDelimiter + keyIndex; + void shutdownLoadGenerator() { + loadExecutors.forEach(LoadExecutors::shutdown); } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 5d20a15f3bf..0fa9a149a85 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -20,7 +20,6 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.client.ObjectStore; -import org.apache.hadoop.ozone.utils.LoadBucket; import org.apache.hadoop.ozone.client.OzoneVolume; import org.junit.BeforeClass; import org.junit.AfterClass; @@ -30,8 +29,6 @@ import picocli.CommandLine.Option; import picocli.CommandLine; -import java.util.ArrayList; -import java.util.List; import java.util.concurrent.TimeUnit; /** @@ -77,35 +74,13 @@ public static void init() throws Exception { cluster.waitForClusterToBeReady(); String volumeName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); - String bucketName = RandomStringUtils.randomAlphabetic(10).toLowerCase(); ObjectStore store = cluster.getRpcClient().getObjectStore(); store.createVolume(volumeName); OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - List ozoneBuckets = new ArrayList<>(numClients); - for (int i = 0; i < numClients; i++) { - ozoneBuckets.add(new LoadBucket(volume.getBucket(bucketName), - configuration)); - } - - String agedBucketName = - RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - volume.createBucket(agedBucketName); - LoadBucket agedLoadBucket = - new LoadBucket(volume.getBucket(agedBucketName), configuration); - - String fsBucketName = - RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - volume.createBucket(fsBucketName); - LoadBucket fsBucket = - new LoadBucket(volume.getBucket(fsBucketName), configuration); - loadGenerator = - new MiniOzoneLoadGenerator(ozoneBuckets, agedLoadBucket, fsBucket, - numThreads, numBuffers); + new MiniOzoneLoadGenerator(volume, numClients, numThreads, + numBuffers, configuration); } /** @@ -140,6 +115,6 @@ public static void main(String... args) { @Test public void testReadWriteWithChaosCluster() { cluster.startChaos(5, 10, TimeUnit.SECONDS); - loadGenerator.startIO(1, TimeUnit.MINUTES); + loadGenerator.startIO(120, TimeUnit.SECONDS); } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java new file mode 100644 index 00000000000..766343d6899 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/AgedLoadGenerator.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.apache.hadoop.ozone.utils.TestProbability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Aged Load Generator for Ozone. + * + * This Load Generator reads and write key to an Ozone bucket. + * + * The default writes to read ratio is 10:90. + */ +public class AgedLoadGenerator extends LoadGenerator { + + private static final Logger LOG = + LoggerFactory.getLogger(AgedLoadGenerator.class); + private static String agedSuffix = "aged"; + + private final AtomicInteger agedFileWrittenIndex; + private final AtomicInteger agedFileAllocationIndex; + private final LoadBucket agedLoadBucket; + private final TestProbability agedWriteProbability; + private final DataBuffer dataBuffer; + + public AgedLoadGenerator(DataBuffer data, LoadBucket agedLoadBucket) { + this.dataBuffer = data; + this.agedFileWrittenIndex = new AtomicInteger(0); + this.agedFileAllocationIndex = new AtomicInteger(0); + this.agedLoadBucket = agedLoadBucket; + this.agedWriteProbability = TestProbability.valueOf(10); + } + + @Override + public String generateLoad() throws Exception { + if (agedWriteProbability.isTrue()) { + synchronized (agedFileAllocationIndex) { + int index = agedFileAllocationIndex.getAndIncrement(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, agedSuffix); + agedLoadBucket.writeKey(buffer, keyName); + agedFileWrittenIndex.getAndIncrement(); + return keyName; + } + } else { + Optional index = randomKeyToRead(); + if (index.isPresent()) { + ByteBuffer buffer = dataBuffer.getBuffer(index.get()); + String keyName = getKeyName(index.get(), agedSuffix); + agedLoadBucket.readKey(buffer, keyName); + return keyName; + } else { + return "NoKey"; + } + } + } + + private Optional randomKeyToRead() { + int currentIndex = agedFileWrittenIndex.get(); + return currentIndex != 0 + ? Optional.of(RandomUtils.nextInt(0, currentIndex)) + : Optional.empty(); + } + + @Override + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "Aged"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java new file mode 100644 index 00000000000..43126eec92e --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/DataBuffer.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.conf.StorageUnit; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +/** + * List of buffers used by the load generators. + */ +public class DataBuffer { + private List buffers; + // number of buffer to be allocated, each is allocated with length which + // is multiple of 2, each buffer is populated with random data. + private int numBuffers; + + public DataBuffer(int numBuffers) { + // allocate buffers and populate random data. + this.numBuffers = numBuffers; + this.buffers = new ArrayList<>(); + for (int i = 0; i < numBuffers; i++) { + int size = (int) StorageUnit.KB.toBytes(1 << i); + ByteBuffer buffer = ByteBuffer.allocate(size); + buffer.put(RandomUtils.nextBytes(size)); + this.buffers.add(buffer); + } + } + + public ByteBuffer getBuffer(int keyIndex) { + return buffers.get(keyIndex % numBuffers); + } + +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java new file mode 100644 index 00000000000..557c73bd5df --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/FilesystemLoadGenerator.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; + +/** + * Filesystem load generator for Ozone. + * + * This load generator read, writes and deletes data using the filesystem + * apis. + */ +public class FilesystemLoadGenerator extends LoadGenerator { + private static final Logger LOG = + LoggerFactory.getLogger(FilesystemLoadGenerator.class); + + + private final LoadBucket fsBucket; + private final DataBuffer dataBuffer; + + public FilesystemLoadGenerator(DataBuffer dataBuffer, LoadBucket fsBucket) { + this.dataBuffer = dataBuffer; + this.fsBucket = fsBucket; + } + + @Override + public String generateLoad() throws Exception { + int index = RandomUtils.nextInt(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, name()); + fsBucket.writeKey(true, buffer, keyName); + + fsBucket.readKey(true, buffer, keyName); + + fsBucket.deleteKey(true, keyName); + return keyName; + } + + @Override + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "FileSystem"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java new file mode 100644 index 00000000000..5e34fb45590 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadExecutors.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Load executors for Ozone, this class provides a plugable + * executor for different load generators. + */ +public class LoadExecutors { + private static final Logger LOG = + LoggerFactory.getLogger(LoadExecutors.class); + + private final LoadGenerator generator; + private final int numThreads; + private final ExecutorService executor; + private final List> futures = new ArrayList<>(); + + public LoadExecutors(int numThreads, LoadGenerator generator) { + this.numThreads = numThreads; + this.generator = generator; + this.executor = Executors.newFixedThreadPool(numThreads); + } + + private void load(long runTimeMillis) { + long threadID = Thread.currentThread().getId(); + LOG.info("{} LOADGEN: Started Aged IO Thread:{}.", + generator.name(), threadID); + long startTime = Time.monotonicNow(); + + while (Time.monotonicNow() - startTime < runTimeMillis) { + + String keyName = null; + try { + keyName = generator.generateLoad(); + } catch (Throwable t) { + LOG.error("{} LOADGEN: {} Exiting due to exception", + generator.name(), keyName, t); + ExitUtil.terminate(new ExitUtil.ExitException(1, t)); + break; + } + } + } + + + public void startLoad(long time) { + LOG.info("Starting {} threads for {}", numThreads, generator.name()); + generator.initialize(); + for (int i = 0; i < numThreads; i++) { + futures.add(CompletableFuture.runAsync( + () -> load(time), executor)); + } + } + + public void waitForCompletion() { + // Wait for IO to complete + for (CompletableFuture f : futures) { + try { + f.get(); + } catch (Throwable t) { + LOG.error("startIO failed with exception", t); + } + } + } + + public void shutdown() { + try { + executor.shutdown(); + executor.awaitTermination(1, TimeUnit.DAYS); + } catch (Exception e) { + LOG.error("error while closing ", e); + } + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java new file mode 100644 index 00000000000..014a46f40a3 --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/LoadGenerator.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +/** + * Interface for load generator. + */ +public abstract class LoadGenerator { + + private final String keyNameDelimiter = "_"; + + public abstract void initialize(); + + public abstract String generateLoad() throws Exception; + + public abstract String name(); + + String getKeyName(int keyIndex, String prefix) { + return prefix + keyNameDelimiter + keyIndex; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java new file mode 100644 index 00000000000..a9fc41c2fcb --- /dev/null +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/loadgenerators/RandomLoadGenerator.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.loadgenerators; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.ozone.utils.LoadBucket; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.List; + +/** + * Random load generator which writes, read and deletes keys from + * the bucket. + */ +public class RandomLoadGenerator extends LoadGenerator { + private static final Logger LOG = + LoggerFactory.getLogger(RandomLoadGenerator.class); + + private final List ozoneBuckets; + private final DataBuffer dataBuffer; + + public RandomLoadGenerator(DataBuffer dataBuffer, List buckets) { + this.ozoneBuckets = buckets; + this.dataBuffer = dataBuffer; + } + + @Override + public String generateLoad() throws Exception { + LoadBucket bucket = + ozoneBuckets.get((int) (Math.random() * ozoneBuckets.size())); + int index = RandomUtils.nextInt(); + ByteBuffer buffer = dataBuffer.getBuffer(index); + String keyName = getKeyName(index, name()); + bucket.writeKey(buffer, keyName); + + bucket.readKey(buffer, keyName); + + bucket.deleteKey(keyName); + return keyName; + } + + public void initialize() { + // Nothing to do here + } + + @Override + public String name() { + return "Random"; + } +} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java index 8b44fdc5786..2fb92d1885c 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/utils/LoadBucket.java @@ -110,7 +110,7 @@ abstract class Op { } public void execute() throws Exception { - LOG.info("Going to {} key {}", this.opName, keyName); + LOG.info("Going to {}", this); try { if (fsOp) { Path p = new Path("/", keyName); @@ -119,9 +119,9 @@ public void execute() throws Exception { doBucketOp(keyName); } doPostOp(); - LOG.trace("Done: {} key {}", this.opName, keyName); + LOG.trace("Done: {}", this); } catch (Throwable t) { - LOG.error("Unable to {} key:{}", this.opName, keyName, t); + LOG.error("Unable to {}", this, t); throw t; } } @@ -132,7 +132,7 @@ public void execute() throws Exception { @Override public String toString() { - return "opType=" + opName; + return "opType=" + opName + " keyName=" + keyName; } } @@ -167,6 +167,11 @@ void doPostOp() throws IOException { os.close(); } } + + @Override + public String toString() { + return super.toString() + " buffer:" + buffer.limit(); + } } /** @@ -213,6 +218,11 @@ void doPostOp() throws IOException { is.close(); } } + + @Override + public String toString() { + return super.toString() + " buffer:" + buffer.limit(); + } } /** @@ -237,5 +247,10 @@ void doBucketOp(String key) throws IOException { void doPostOp() { // Nothing to do here } + + @Override + public String toString() { + return super.toString(); + } } } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties index a7684a5c317..11c6bf61431 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties @@ -24,6 +24,7 @@ log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR log4j.logger.org.apache.ratis.grpc.client.GrpcClientProtocolClient=WARN log4j.logger.org.apache.hadoop.ozone.utils=DEBUG,stdout,CHAOS +log4j.logger.org.apache.hadoop.ozone.loadgenerator=DEBUG,stdout,CHAOS log4j.appender.CHAOS.File=${chaoslogfilename} log4j.appender.CHAOS=org.apache.log4j.FileAppender log4j.appender.CHAOS.layout=org.apache.log4j.PatternLayout From 725ceed9785700ddcfab694436a8d424174ad8d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elek=20M=C3=A1rton?= Date: Tue, 24 Mar 2020 16:00:10 +0100 Subject: [PATCH 23/38] Revert "HDDS-3142. Create isolated enviornment for OM to test it without SCM. (#656)" This reverts commit 281faf3a2ee4cd624ece0ed05165c164d5b4774f. --- dev-support/byteman/mock-scm.btm | 34 ------ .../hdds/freon/FakeClusterTopology.java | 92 ---------------- .../FakeScmBlockLocationProtocolClient.java | 100 ------------------ ...akeScmContainerLocationProtocolClient.java | 76 ------------- .../hadoop/hdds/freon/package-info.java | 24 ----- ...ocationProtocolClientSideTranslatorPB.java | 7 +- .../hadoop/hdds/utils/db/DBProfile.java | 1 + 7 files changed, 2 insertions(+), 332 deletions(-) delete mode 100644 dev-support/byteman/mock-scm.btm delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java delete mode 100644 hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java diff --git a/dev-support/byteman/mock-scm.btm b/dev-support/byteman/mock-scm.btm deleted file mode 100644 index a291e28f215..00000000000 --- a/dev-support/byteman/mock-scm.btm +++ /dev/null @@ -1,34 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -RULE mock scm block client -CLASS org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB -METHOD submitRequest -AT ENTRY -BIND client:org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB = $0; - result:org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$SCMBlockLocationResponse = org.apache.hadoop.hdds.freon.FakeScmBlockLocationProtocolClient.submitRequest($1); -IF true -DO return result; -ENDRULE - -RULE mock scm container client -CLASS org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB -METHOD submitRpcRequest -AT ENTRY -BIND client:org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB = $0; - result:org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos$ScmContainerLocationResponse = org.apache.hadoop.hdds.freon.FakeScmContainerLocationProtocolClient.submitRequest($1); -IF true -DO return result; -ENDRULE \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java deleted file mode 100644 index 347323a5f95..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeClusterTopology.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.UUID; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Pipeline; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Port; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.hdds.scm.pipeline.PipelineID; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Class to store pre-generated topology information for load-tests. - */ -public class FakeClusterTopology { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeClusterTopology.class); - - public static final FakeClusterTopology INSTANCE = new FakeClusterTopology(); - - private List datanodes = new ArrayList<>(); - - private List pipelines = new ArrayList<>(); - - private Random random = new Random(); - - public FakeClusterTopology() { - try { - for (int i = 0; i < 9; i++) { - datanodes.add(createDatanode(i)); - if ((i + 1) % 3 == 0) { - pipelines.add(Pipeline.newBuilder() - .setId(PipelineID.randomId().getProtobuf()) - .setFactor(ReplicationFactor.THREE) - .setType(ReplicationType.RATIS) - .addMembers(getDatanode(i - 2)) - .addMembers(getDatanode(i - 1)) - .addMembers(getDatanode(i)) - .build()); - } - } - } catch (Exception ex) { - LOGGER.error("Can't initialize FakeClusterTopology", ex); - } - } - - private DatanodeDetailsProto createDatanode(int index) { - return DatanodeDetailsProto.newBuilder() - .setUuid(UUID.randomUUID().toString()) - .setHostName("localhost") - .setIpAddress("127.0.0.1") - .addPorts( - Port.newBuilder().setName("RATIS").setValue(1234)) - .build(); - } - - public DatanodeDetailsProto getDatanode(int i) { - return datanodes.get(i); - } - - public Pipeline getRandomPipeline() { - return pipelines.get(random.nextInt(pipelines.size())); - } - - public List getAllDatanodes() { - return datanodes; - } -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java deleted file mode 100644 index f1e7e0f8893..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmBlockLocationProtocolClient.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.io.IOException; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerBlockID; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.GetScmInfoResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto.Builder; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationRequest; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Status; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Type; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Fake SCM client to return a simulated block location. - */ -public final class FakeScmBlockLocationProtocolClient { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeScmBlockLocationProtocolClient.class); - - public static final int BLOCK_PER_CONTAINER = 1000; - - private static AtomicLong counter = new AtomicLong(); - - private FakeScmBlockLocationProtocolClient() { - } - - public static SCMBlockLocationResponse submitRequest( - SCMBlockLocationRequest req) - throws IOException { - try { - if (req.getCmdType() == Type.GetScmInfo) { - return SCMBlockLocationResponse.newBuilder() - .setCmdType(req.getCmdType()) - .setStatus(Status.OK) - .setSuccess(true) - .setGetScmInfoResponse( - GetScmInfoResponseProto.newBuilder() - .setScmId("scm-id") - .setClusterId("cluster-id") - .build() - ) - .build(); - } else if (req.getCmdType() == Type.AllocateScmBlock) { - Builder allocateBlockResponse = - AllocateScmBlockResponseProto.newBuilder(); - for (int i = 0; - i < req.getAllocateScmBlockRequest().getNumBlocks(); i++) { - long seq = counter.incrementAndGet(); - - allocateBlockResponse.addBlocks(AllocateBlockResponse.newBuilder() - .setPipeline(FakeClusterTopology.INSTANCE.getRandomPipeline()) - .setContainerBlockID(ContainerBlockID.newBuilder() - .setContainerID(seq / BLOCK_PER_CONTAINER) - .setLocalID(seq)) - ); - } - return SCMBlockLocationResponse.newBuilder() - .setCmdType(req.getCmdType()) - .setStatus(Status.OK) - .setSuccess(true) - .setAllocateScmBlockResponse( - allocateBlockResponse - ) - .build(); - } else { - throw new IllegalArgumentException( - "Unsupported request. Fake answer is not implemented for " + req - .getCmdType()); - } - } catch (Exception ex) { - LOGGER.error("Error on creating fake SCM response", ex); - return null; - } - } - -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java deleted file mode 100644 index 771f357d56f..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/FakeScmContainerLocationProtocolClient.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.freon; - -import java.io.IOException; - -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.Node; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeQueryResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeQueryResponseProto.Builder; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse.Status; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Fake SCM client to return a simulated block location. - */ -public final class FakeScmContainerLocationProtocolClient { - - private static final Logger LOGGER = - LoggerFactory.getLogger(FakeScmContainerLocationProtocolClient.class); - - private FakeScmContainerLocationProtocolClient() { - } - - public static ScmContainerLocationResponse submitRequest( - ScmContainerLocationRequest req) - throws IOException { - try { - if (req.getCmdType() == Type.QueryNode) { - Builder builder = NodeQueryResponseProto.newBuilder(); - for (DatanodeDetailsProto datanode : FakeClusterTopology.INSTANCE - .getAllDatanodes()) { - builder.addDatanodes(Node.newBuilder() - .setNodeID(datanode) - .addNodeStates(NodeState.HEALTHY) - .build()); - } - - return ScmContainerLocationResponse.newBuilder() - .setCmdType(Type.QueryNode) - .setStatus(Status.OK) - .setNodeQueryResponse(builder.build()) - .build(); - } else { - throw new IllegalArgumentException( - "Unsupported request. Fake answer is not implemented for " + req - .getCmdType()); - } - } catch (Exception ex) { - LOGGER.error("Error on creating fake SCM response", ex); - return null; - } - } - -} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java deleted file mode 100644 index 381c81100e4..00000000000 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/freon/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *

- * Freon related helper classes used for load testing. - */ - -/** - * Freon related helper classes used for load testing. - */ -package org.apache.hadoop.hdds.freon; \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index dffae117de2..7582b4402c8 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -111,18 +111,13 @@ private ScmContainerLocationResponse submitRequest( builderConsumer.accept(builder); ScmContainerLocationRequest wrapper = builder.build(); - response = submitRpcRequest(wrapper); + response = rpcProxy.submitRequest(NULL_RPC_CONTROLLER, wrapper); } catch (ServiceException ex) { throw ProtobufHelper.getRemoteException(ex); } return response; } - private ScmContainerLocationResponse submitRpcRequest( - ScmContainerLocationRequest wrapper) throws ServiceException { - return rpcProxy.submitRequest(NULL_RPC_CONTROLLER, wrapper); - } - /** * Asks SCM where a container should be allocated. SCM responds with the set * of datanodes that should be used creating this container. Ozone/SCM only diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java index b9b7ef76fc8..347b83b48cf 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBProfile.java @@ -65,6 +65,7 @@ public ColumnFamilyOptions getColumnFamilyOptions() { new BlockBasedTableConfig() .setBlockCache(new LRUCache(blockCacheSize)) .setBlockSize(blockSize) + .setCacheIndexAndFilterBlocks(true) .setPinL0FilterAndIndexBlocksInCache(true) .setFilterPolicy(new BloomFilter())); } From d4e4be76b6336514b3e824b82271af8279fbdf29 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Singh Date: Wed, 25 Mar 2020 15:43:36 +0530 Subject: [PATCH 24/38] HDDS-3250. Create a separate log file for Warnings and Errors in MiniOzoneChaosCluster. (#711) --- .../mini-chaos-tests/src/test/bin/start-chaos.sh | 4 ++++ .../mini-chaos-tests/src/test/resources/log4j.properties | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh index 1546bbd490d..c02fa962223 100755 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/bin/start-chaos.sh @@ -19,6 +19,7 @@ date=$(date +"%Y-%m-%d-%H-%M-%S-%Z") logfiledirectory="/tmp/chaos-${date}/" completesuffix="complete.log" chaossuffix="chaos.log" +problemsuffix="problem.log" compilesuffix="compile.log" heapformat="dump.hprof" @@ -30,6 +31,8 @@ chaosfilename="${logfiledirectory}${chaossuffix}" compilefilename="${logfiledirectory}${compilesuffix}" #log goes to something like /tmp/2019-12-04--00-01-26-IST/dump.hprof heapdumpfile="${logfiledirectory}${heapformat}" +#log goes to something like /tmp/2019-12-04--00-01-26-IST/problem.log +problemfilename="${logfiledirectory}${problemsuffix}" #TODO: add gc log file details as well MVN_OPTS="-XX:+HeapDumpOnOutOfMemoryError " @@ -46,6 +49,7 @@ mvn exec:java \ -Dexec.mainClass="org.apache.hadoop.ozone.TestMiniChaosOzoneCluster" \ -Dexec.classpathScope=test \ -Dchaoslogfilename=${chaosfilename} \ + -Dproblemlogfilename=${problemfilename} \ -Dorg.apache.ratis.thirdparty.io.netty.allocator.useCacheForAllThreads=false \ -Dio.netty.leakDetection.level=advanced \ -Dio.netty.leakDetectionLevel=advanced \ diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties index 11c6bf61431..f491fadeb5f 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/resources/log4j.properties @@ -11,7 +11,7 @@ # limitations under the License. # log4j configuration used during build and unit tests -log4j.rootLogger=INFO,stdout +log4j.rootLogger=INFO,stdout,PROBLEM log4j.threshold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout @@ -29,4 +29,11 @@ log4j.appender.CHAOS.File=${chaoslogfilename} log4j.appender.CHAOS=org.apache.log4j.FileAppender log4j.appender.CHAOS.layout=org.apache.log4j.PatternLayout log4j.appender.CHAOS.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + +log4j.appender.PROBLEM.File=${problemlogfilename} +log4j.appender.PROBLEM.Threshold=WARN +log4j.appender.PROBLEM=org.apache.log4j.FileAppender +log4j.appender.PROBLEM.layout=org.apache.log4j.PatternLayout +log4j.appender.PROBLEM.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + log4j.additivity.org.apache.hadoop.ozone.utils=false \ No newline at end of file From 28d5d6ab4b177399184e3037c287c0cfcb36153c Mon Sep 17 00:00:00 2001 From: avijayanhwx <14299376+avijayanhwx@users.noreply.github.com> Date: Wed, 25 Mar 2020 09:20:25 -0700 Subject: [PATCH 25/38] HDDS-3243. Recon should not have the ability to send Create/Close Container commands to Datanode. (#712) --- .../ozone/recon/TestReconAsPassiveScm.java | 11 +++++++++ .../ozone/recon/scm/ReconNodeManager.java | 23 ++++++++++++++++++- .../ReconStorageContainerManagerFacade.java | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java index faf2c0aa302..eabf667e2d2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.RATIS; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CLOSE_CONTAINER; import static org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer.runTestOzoneContainerViaDataNode; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -41,7 +42,9 @@ import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.recon.scm.ReconNodeManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Assert; @@ -126,6 +129,14 @@ public void testDatanodeRegistrationAndReports() throws Exception { // Verify Recon picked up the new container that was created. assertEquals(scmContainerManager.getContainerIDs(), reconContainerManager.getContainerIDs()); + + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs(ReconNodeManager.LOG); + reconScm.getEventQueue().fireEvent(CLOSE_CONTAINER, + containerInfo.containerID()); + GenericTestUtils.waitFor(() -> logCapturer.getOutput() + .contains("Ignoring unsupported command closeContainerCommand"), + 1000, 20000); } @Test(timeout = 120000) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java index 2febf508571..9a3d5181f15 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.recon.scm; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.reregisterCommand; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_SCM_NODE_DB; @@ -27,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.node.SCMNodeManager; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; @@ -41,21 +44,26 @@ import org.apache.hadoop.hdds.utils.MetadataStore; import org.apache.hadoop.hdds.utils.MetadataStoreBuilder; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.ozone.recon.ReconUtils; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableSet; + /** * Recon SCM's Node manager that includes persistence. */ public class ReconNodeManager extends SCMNodeManager { - private static final Logger LOG = LoggerFactory + public static final Logger LOG = LoggerFactory .getLogger(ReconNodeManager.class); private final MetadataStore nodeStore; + private final static Set ALLOWED_COMMANDS = + ImmutableSet.of(reregisterCommand); /** * Map that contains mapping between datanodes @@ -132,6 +140,19 @@ public long getLastHeartbeat(DatanodeDetails datanodeDetails) { return datanodeHeartbeatMap.getOrDefault(datanodeDetails.getUuid(), 0L); } + @Override + public void onMessage(CommandForDatanode commandForDatanode, + EventPublisher ignored) { + if (ALLOWED_COMMANDS.contains( + commandForDatanode.getCommand().getType())) { + super.onMessage(commandForDatanode, ignored); + } else { + LOG.info("Ignoring unsupported command {} for Datanode {}.", + commandForDatanode.getCommand().getType(), + commandForDatanode.getDatanodeId()); + } + } + /** * Send heartbeat to indicate the datanode is alive and doing well. * diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 7964d6ee17f..9cb6a3193f0 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -129,6 +129,7 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, ContainerActionsHandler actionsHandler = new ContainerActionsHandler(); ReconNewNodeHandler newNodeHandler = new ReconNewNodeHandler(nodeManager); + eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, nodeManager); eventQueue.addHandler(SCMEvents.NODE_REPORT, nodeReportHandler); eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler); eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionHandler); From 2f27380b0236d019eed15874342ab6832628c4d6 Mon Sep 17 00:00:00 2001 From: Vivek Ratnavel Subramanian Date: Wed, 25 Mar 2020 14:46:27 -0700 Subject: [PATCH 26/38] HDDS-3164. Add Recon endpoint to serve missing containers and its metadata. (#714) --- ...KeyService.java => ContainerEndpoint.java} | 54 +++++- .../api/types/MissingContainerMetadata.java | 76 ++++++++ .../api/types/MissingContainersResponse.java | 53 ++++++ .../recon/fsck/MissingContainerTask.java | 10 +- .../recon/spi/ContainerDBServiceProvider.java | 9 + .../impl/ContainerDBServiceProviderImpl.java | 13 +- .../webapps/recon/ozone-recon-web/api/db.json | 8 +- ...ervice.java => TestContainerEndpoint.java} | 173 +++++++++++++----- .../hadoop/ozone/recon/api/TestEndpoints.java | 34 +--- .../recon/fsck/TestMissingContainerTask.java | 36 ++-- .../types/GuiceInjectorUtilsForTests.java | 21 ++- 11 files changed, 389 insertions(+), 98 deletions(-) rename hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/{ContainerKeyService.java => ContainerEndpoint.java} (80%) create mode 100644 hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java create mode 100644 hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java rename hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/{TestContainerKeyService.java => TestContainerEndpoint.java} (67%) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java similarity index 80% rename from hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java rename to hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 17ae2b48102..b33db8dab89 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerKeyService.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -20,9 +20,11 @@ import java.io.IOException; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; @@ -37,6 +39,9 @@ import javax.inject.Inject; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; @@ -46,7 +51,10 @@ import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeyMetadata.ContainerBlockMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; +import org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata; +import org.apache.hadoop.ozone.recon.api.types.MissingContainersResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconContainerManager; import org.apache.hadoop.ozone.recon.spi.ContainerDBServiceProvider; import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT; @@ -60,7 +68,7 @@ */ @Path("/containers") @Produces(MediaType.APPLICATION_JSON) -public class ContainerKeyService { +public class ContainerEndpoint { @Inject private ContainerDBServiceProvider containerDBServiceProvider; @@ -68,6 +76,14 @@ public class ContainerKeyService { @Inject private ReconOMMetadataManager omMetadataManager; + private ReconContainerManager containerManager; + + @Inject + public ContainerEndpoint(OzoneStorageContainerManager reconSCM) { + this.containerManager = + (ReconContainerManager) reconSCM.getContainerManager(); + } + /** * Return @{@link org.apache.hadoop.ozone.recon.api.types.ContainerMetadata} * for the containers starting from the given "prev-key" query param for the @@ -173,7 +189,6 @@ public Response getKeysForContainer( keyMetadata.getBlockIds().put(containerKeyPrefix.getKeyVersion(), blockIds); } - } } @@ -188,6 +203,41 @@ public Response getKeysForContainer( return Response.ok(keysResponse).build(); } + /** + * Return + * {@link org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata} + * for all missing containers. + * + * @return {@link Response} + */ + @GET + @Path("/missing") + public Response getMissingContainers() { + List missingContainers = new ArrayList<>(); + containerDBServiceProvider.getMissingContainers().forEach(container -> { + long containerID = container.getContainerId(); + try { + ContainerInfo containerInfo = + containerManager.getContainer(new ContainerID(containerID)); + long keyCount = containerInfo.getNumberOfKeys(); + UUID pipelineID = containerInfo.getPipelineID().getId(); + + // TODO: Find out which datanodes had replicas of this container + // and populate this list + List datanodes = Collections.emptyList(); + missingContainers.add(new MissingContainerMetadata(containerID, + container.getMissingSince(), keyCount, pipelineID, datanodes)); + } catch (IOException ioEx) { + throw new WebApplicationException(ioEx, + Response.Status.INTERNAL_SERVER_ERROR); + } + }); + MissingContainersResponse response = + new MissingContainersResponse(missingContainers.size(), + missingContainers); + return Response.ok(response).build(); + } + /** * Helper function to extract the blocks for a given container from a given * OM Key. diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java new file mode 100644 index 00000000000..f24bc5723ac --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainerMetadata.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.recon.api.types; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import java.util.List; +import java.util.UUID; + +/** + * Metadata object that represents a Missing Container. + */ +@XmlAccessorType(XmlAccessType.FIELD) +public class MissingContainerMetadata { + + @XmlElement(name = "containerID") + private long containerID; + + @XmlElement(name = "missingSince") + private long missingSince; + + @XmlElement(name = "keys") + private long keys; + + @XmlElement(name = "pipelineID") + private UUID pipelineID; + + @XmlElement(name = "datanodes") + private List datanodes; + + public MissingContainerMetadata(long containerID, long missingSince, + long keys, UUID pipelineID, + List datanodes) { + this.containerID = containerID; + this.missingSince = missingSince; + this.keys = keys; + this.pipelineID = pipelineID; + this.datanodes = datanodes; + } + + public long getContainerID() { + return containerID; + } + + public long getKeys() { + return keys; + } + + public List getDatanodes() { + return datanodes; + } + + public long getMissingSince() { + return missingSince; + } + + public UUID getPipelineID() { + return pipelineID; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java new file mode 100644 index 00000000000..dd8888c6fe4 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/MissingContainersResponse.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Collection; + +/** + * Class that represents the API Response structure of Missing Containers. + */ +public class MissingContainersResponse { + /** + * Total count of the missing containers. + */ + @JsonProperty("totalCount") + private long totalCount; + + /** + * A collection of missing containers. + */ + @JsonProperty("containers") + private Collection containers; + + public MissingContainersResponse(long totalCount, + Collection + containers) { + this.totalCount = totalCount; + this.containers = containers; + } + + public long getTotalCount() { + return totalCount; + } + + public Collection getContainers() { + return containers; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java index 60d601e1fe2..6db20259f3c 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/MissingContainerTask.java @@ -22,6 +22,7 @@ import javax.inject.Inject; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; @@ -83,8 +84,15 @@ private void processContainer(ContainerID containerID, long currentTime) { try { Set containerReplicas = containerManager.getContainerReplicas(containerID); - if (CollectionUtils.isEmpty(containerReplicas)) { + // check if a container has 0 replicas or if all available replicas + // are marked UNHEALTHY. + boolean isAllUnhealthy = + containerReplicas.stream().allMatch(replica -> + replica.getState().equals(State.UNHEALTHY)); + if (CollectionUtils.isEmpty(containerReplicas) || isAllUnhealthy) { if (!missingContainersDao.existsById(containerID.getId())) { + LOG.info("Found a missing container with ID {}. Adding it to the " + + "database", containerID.getId()); MissingContainers newRecord = new MissingContainers(containerID.getId(), currentTime); missingContainersDao.insert(newRecord); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java index 449eb7d8561..03b66239f02 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ContainerDBServiceProvider.java @@ -19,12 +19,14 @@ package org.apache.hadoop.ozone.recon.spi; import java.io.IOException; +import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; import org.apache.hadoop.ozone.recon.api.types.ContainerMetadata; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; /** * The Recon Container DB Service interface. @@ -161,4 +163,11 @@ void deleteContainerMapping(ContainerKeyPrefix containerKeyPrefix) * @param count no. of new containers to add to containers total count. */ void incrementContainerCountBy(long count); + + /** + * Get all the missing containers. + * + * @return List of MissingContainers. + */ + List getMissingContainers(); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java index 7915e724766..11f8bfe00aa 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ContainerDBServiceProviderImpl.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.sql.Timestamp; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import javax.inject.Inject; @@ -47,7 +48,9 @@ import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.hadoop.ozone.recon.schema.tables.daos.GlobalStatsDao; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; import org.hadoop.ozone.recon.schema.tables.pojos.GlobalStats; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; import org.jooq.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +81,9 @@ public class ContainerDBServiceProviderImpl @Inject private ReconUtils reconUtils; + @Inject + private MissingContainersDao missingContainersDao; + @Inject public ContainerDBServiceProviderImpl(DBStore dbStore, Configuration sqlConfiguration) { @@ -141,9 +147,10 @@ private void initializeTables() { this.containerKeyCountTable = containerDbStore .getTable(CONTAINER_KEY_COUNT_TABLE, Long.class, Long.class); } catch (IOException e) { - LOG.error("Unable to create Container Key tables. {}", e); + LOG.error("Unable to create Container Key tables.", e); } } + /** * Concatenate the containerID and Key Prefix using a delimiter and store the * count into the container DB store. @@ -351,6 +358,10 @@ public Map getContainers(int limit, return containers; } + public List getMissingContainers() { + return missingContainersDao.findAll(); + } + @Override public void deleteContainerMapping(ContainerKeyPrefix containerKeyPrefix) throws IOException { diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json index a89427174ad..ab807df415c 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json @@ -361,7 +361,9 @@ "localhost1.storage.enterprise.com", "localhost3.storage.enterprise.com", "localhost5.storage.enterprise.com" - ] + ], + "missingSince": 1578491371528, + "pipelineId": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982" }, { "id": 2, @@ -370,7 +372,9 @@ "localhost1.storage.enterprise.com", "localhost3.storage.enterprise.com", "localhost5.storage.enterprise.com" - ] + ], + "missingSince": 1578491471528, + "pipelineId": "04a5d908-ff01-4ce6-ad75-f3ec73dfc8a2" } ] }, diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java similarity index 67% rename from hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java rename to hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index 9cca5a71000..5b373ccadf7 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerKeyService.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -29,13 +29,16 @@ import java.util.Iterator; import java.util.List; import java.util.Map; - -import javax.sql.DataSource; import javax.ws.rs.core.Response; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; @@ -45,14 +48,25 @@ import org.apache.hadoop.ozone.recon.api.types.ContainersResponse; import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; +import org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata; +import org.apache.hadoop.ozone.recon.api.types.MissingContainersResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconContainerManager; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.ContainerDBServiceProvider; +import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; +import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperTask; import org.apache.hadoop.hdds.utils.db.Table; +import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; import org.hadoop.ozone.recon.schema.StatsSchemaDefinition; -import org.jooq.impl.DSL; -import org.jooq.impl.DefaultConfiguration; +import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; +import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao; +import org.hadoop.ozone.recon.schema.tables.pojos.MissingContainers; +import org.jooq.Configuration; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -60,33 +74,87 @@ import com.google.inject.Injector; /** - * Test for container key service. + * Test for container endpoint. */ -public class TestContainerKeyService extends AbstractOMMetadataManagerTest { +public class TestContainerEndpoint extends AbstractOMMetadataManagerTest { private ContainerDBServiceProvider containerDbServiceProvider; - private Injector injector; - private OzoneManagerServiceProviderImpl ozoneManagerServiceProvider; - private ContainerKeyService containerKeyService; + private ContainerEndpoint containerEndpoint; private GuiceInjectorUtilsForTestsImpl guiceInjectorTest = new GuiceInjectorUtilsForTestsImpl(); private boolean isSetupDone = false; private ReconOMMetadataManager reconOMMetadataManager; + private MissingContainersDao missingContainersDao; + private ContainerID containerID = new ContainerID(1L); + private PipelineID pipelineID; + private long keyCount = 5L; private void initializeInjector() throws Exception { reconOMMetadataManager = getTestMetadataManager( initializeNewOmMetadataManager()); - ozoneManagerServiceProvider = getMockOzoneManagerServiceProvider(); + OzoneManagerServiceProviderImpl ozoneManagerServiceProvider = + mock(OzoneManagerServiceProviderImpl.class); Injector parentInjector = guiceInjectorTest.getInjector( ozoneManagerServiceProvider, reconOMMetadataManager, temporaryFolder); - injector = parentInjector.createChildInjector(new AbstractModule() { - @Override - protected void configure() { - containerKeyService = new ContainerKeyService(); - bind(ContainerKeyService.class).toInstance(containerKeyService); - } - }); + Pipeline pipeline = getRandomPipeline(); + pipelineID = pipeline.getId(); + + // Mock ReconStorageContainerManagerFacade and other SCM related methods + OzoneStorageContainerManager mockReconSCM = + mock(ReconStorageContainerManagerFacade.class); + ContainerManager mockContainerManager = + mock(ReconContainerManager.class); + + when(mockContainerManager.getContainer(containerID)).thenReturn( + new ContainerInfo.Builder() + .setContainerID(containerID.getId()) + .setNumberOfKeys(keyCount) + .setPipelineID(pipelineID) + .build() + ); + when(mockReconSCM.getContainerManager()) + .thenReturn(mockContainerManager); + + Injector injector = parentInjector.createChildInjector( + new AbstractModule() { + @Override + protected void configure() { + Configuration sqlConfiguration = + parentInjector.getInstance((Configuration.class)); + + try { + ReconTaskSchemaDefinition taskSchemaDefinition = parentInjector + .getInstance(ReconTaskSchemaDefinition.class); + taskSchemaDefinition.initializeSchema(); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + + ReconTaskStatusDao reconTaskStatusDao = + new ReconTaskStatusDao(sqlConfiguration); + + bind(ReconTaskStatusDao.class).toInstance(reconTaskStatusDao); + + StorageContainerServiceProvider mockScmServiceProvider = mock( + StorageContainerServiceProviderImpl.class); + bind(StorageContainerServiceProvider.class) + .toInstance(mockScmServiceProvider); + bind(OzoneStorageContainerManager.class) + .toInstance(mockReconSCM); + bind(ContainerEndpoint.class); + } + }); + containerEndpoint = injector.getInstance(ContainerEndpoint.class); + containerDbServiceProvider = injector.getInstance( + ContainerDBServiceProvider.class); + StatsSchemaDefinition schemaDefinition = injector.getInstance( + StatsSchemaDefinition.class); + schemaDefinition.initializeSchema(); + UtilizationSchemaDefinition utilizationSchemaDefinition = + injector.getInstance(UtilizationSchemaDefinition.class); + utilizationSchemaDefinition.initializeSchema(); + missingContainersDao = injector.getInstance(MissingContainersDao.class); } @Before @@ -94,17 +162,6 @@ public void setUp() throws Exception { // The following setup runs only once if (!isSetupDone) { initializeInjector(); - - DSL.using(new DefaultConfiguration().set( - injector.getInstance(DataSource.class))); - - containerDbServiceProvider = injector.getInstance( - ContainerDBServiceProvider.class); - - StatsSchemaDefinition schemaDefinition = injector.getInstance( - StatsSchemaDefinition.class); - schemaDefinition.initializeSchema(); - isSetupDone = true; } @@ -185,7 +242,7 @@ public void setUp() throws Exception { @Test public void testGetKeysForContainer() { - Response response = containerKeyService.getKeysForContainer(1L, -1, ""); + Response response = containerEndpoint.getKeysForContainer(1L, -1, ""); KeysResponse responseObject = (KeysResponse) response.getEntity(); KeysResponse.KeysResponseData data = responseObject.getKeysResponseData(); @@ -214,7 +271,7 @@ public void testGetKeysForContainer() { assertEquals(103, blockIds.get(0L).iterator().next().getLocalID()); assertEquals(104, blockIds.get(1L).iterator().next().getLocalID()); - response = containerKeyService.getKeysForContainer(3L, -1, ""); + response = containerEndpoint.getKeysForContainer(3L, -1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); keyMetadataList = data.getKeys(); @@ -222,7 +279,7 @@ public void testGetKeysForContainer() { assertEquals(0, data.getTotalCount()); // test if limit works as expected - response = containerKeyService.getKeysForContainer(1L, 1, ""); + response = containerEndpoint.getKeysForContainer(1L, 1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); keyMetadataList = data.getKeys(); @@ -233,7 +290,7 @@ public void testGetKeysForContainer() { @Test public void testGetKeysForContainerWithPrevKey() { // test if prev-key param works as expected - Response response = containerKeyService.getKeysForContainer( + Response response = containerEndpoint.getKeysForContainer( 1L, -1, "/sampleVol/bucketOne/key_one"); KeysResponse responseObject = @@ -253,7 +310,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals(2, keyMetadata.getVersions().size()); assertEquals(2, keyMetadata.getBlockIds().size()); - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 1L, -1, StringUtils.EMPTY); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -266,7 +323,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals("key_one", keyMetadata.getKey()); // test for negative cases - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 1L, -1, "/sampleVol/bucketOne/invalid_key"); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -274,7 +331,7 @@ public void testGetKeysForContainerWithPrevKey() { assertEquals(3, data.getTotalCount()); assertEquals(0, keyMetadataList.size()); - response = containerKeyService.getKeysForContainer( + response = containerEndpoint.getKeysForContainer( 5L, -1, ""); responseObject = (KeysResponse) response.getEntity(); data = responseObject.getKeysResponseData(); @@ -286,7 +343,7 @@ public void testGetKeysForContainerWithPrevKey() { @Test public void testGetContainers() { - Response response = containerKeyService.getContainers(-1, 0L); + Response response = containerEndpoint.getContainers(-1, 0L); ContainersResponse responseObject = (ContainersResponse) response.getEntity(); @@ -310,7 +367,7 @@ public void testGetContainers() { assertEquals(2L, containerMetadata.getNumberOfKeys()); // test if limit works as expected - response = containerKeyService.getContainers(1, 0L); + response = containerEndpoint.getContainers(1, 0L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -321,7 +378,7 @@ public void testGetContainers() { @Test public void testGetContainersWithPrevKey() { - Response response = containerKeyService.getContainers(1, 1L); + Response response = containerEndpoint.getContainers(1, 1L); ContainersResponse responseObject = (ContainersResponse) response.getEntity(); @@ -339,7 +396,7 @@ public void testGetContainersWithPrevKey() { assertEquals(1, containers.size()); assertEquals(2L, containerMetadata.getContainerID()); - response = containerKeyService.getContainers(-1, 0L); + response = containerEndpoint.getContainers(-1, 0L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -350,14 +407,14 @@ public void testGetContainersWithPrevKey() { assertEquals(1L, containerMetadata.getContainerID()); // test for negative cases - response = containerKeyService.getContainers(-1, 5L); + response = containerEndpoint.getContainers(-1, 5L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); assertEquals(0, containers.size()); assertEquals(2, data.getTotalCount()); - response = containerKeyService.getContainers(-1, -1L); + response = containerEndpoint.getContainers(-1, -1L); responseObject = (ContainersResponse) response.getEntity(); data = responseObject.getContainersResponseData(); containers = new ArrayList<>(data.getContainers()); @@ -365,9 +422,33 @@ public void testGetContainersWithPrevKey() { assertEquals(2, data.getTotalCount()); } - private OzoneManagerServiceProviderImpl getMockOzoneManagerServiceProvider() { - OzoneManagerServiceProviderImpl omServiceProviderMock = - mock(OzoneManagerServiceProviderImpl.class); - return omServiceProviderMock; + @Test + public void testGetMissingContainers() { + Response response = containerEndpoint.getMissingContainers(); + + MissingContainersResponse responseObject = + (MissingContainersResponse) response.getEntity(); + + assertEquals(0, responseObject.getTotalCount()); + assertEquals(Collections.EMPTY_LIST, responseObject.getContainers()); + + // Add missing containers to the database + long missingSince = System.currentTimeMillis(); + MissingContainers newRecord = + new MissingContainers(1L, missingSince); + missingContainersDao.insert(newRecord); + + response = containerEndpoint.getMissingContainers(); + responseObject = (MissingContainersResponse) response.getEntity(); + assertEquals(1, responseObject.getTotalCount()); + MissingContainerMetadata container = + responseObject.getContainers().stream().findFirst().orElse(null); + Assert.assertNotNull(container); + + assertEquals(containerID.getId(), container.getContainerID()); + assertEquals(keyCount, container.getKeys()); + assertEquals(pipelineID.getId(), container.getPipelineID()); + assertEquals(0, container.getDatanodes().size()); + assertEquals(missingSince, container.getMissingSince()); } } \ No newline at end of file diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java index 027db0daabd..6d5ea503f22 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestEndpoints.java @@ -19,7 +19,6 @@ package org.apache.hadoop.ozone.recon.api; import com.google.inject.AbstractModule; -import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Singleton; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -50,9 +49,6 @@ import org.apache.hadoop.ozone.recon.api.types.DatanodesResponse; import org.apache.hadoop.ozone.recon.api.types.PipelineMetadata; import org.apache.hadoop.ozone.recon.api.types.PipelinesResponse; -import org.apache.hadoop.ozone.recon.persistence.AbstractSqlDatabaseTest; -import org.apache.hadoop.ozone.recon.persistence.DataSourceConfiguration; -import org.apache.hadoop.ozone.recon.persistence.JooqPersistenceModule; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; @@ -60,7 +56,6 @@ import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; import org.apache.hadoop.test.LambdaTestUtils; import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; -import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao; import org.jooq.Configuration; import org.junit.Assert; @@ -72,7 +67,6 @@ import static org.mockito.Mockito.when; import javax.ws.rs.core.Response; -import java.io.File; import java.io.IOException; import java.util.UUID; import java.util.concurrent.Callable; @@ -98,25 +92,6 @@ public class TestEndpoints extends AbstractOMMetadataManagerTest { private Pipeline pipeline; private void initializeInjector() throws IOException { - - File tempDir = temporaryFolder.newFolder(); - - AbstractSqlDatabaseTest.DataSourceConfigurationProvider - configurationProvider = - new AbstractSqlDatabaseTest.DataSourceConfigurationProvider(tempDir); - - JooqPersistenceModule persistenceModule = - new JooqPersistenceModule(configurationProvider); - - Injector sqlInjector = Guice.createInjector(persistenceModule, - new AbstractModule() { - @Override - public void configure() { - bind(DataSourceConfiguration.class) - .toProvider(configurationProvider); - } - }); - reconOMMetadataManager = getTestMetadataManager( initializeNewOmMetadataManager()); OzoneManagerServiceProviderImpl omServiceProviderMock = @@ -134,7 +109,7 @@ protected void configure() { pipelineId = pipeline.getId().getId().toString(); Configuration sqlConfiguration = - sqlInjector.getInstance((Configuration.class)); + parentInjector.getInstance((Configuration.class)); ContainerInfo containerInfo = new ContainerInfo.Builder() .setContainerID(containerId) @@ -147,17 +122,14 @@ protected void configure() { ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipeline); - ReconTaskSchemaDefinition taskSchemaDefinition = sqlInjector + ReconTaskSchemaDefinition taskSchemaDefinition = parentInjector .getInstance(ReconTaskSchemaDefinition.class); taskSchemaDefinition.initializeSchema(); ReconTaskStatusDao reconTaskStatusDao = new ReconTaskStatusDao(sqlConfiguration); - MissingContainersDao missingContainersDao = - new MissingContainersDao(sqlConfiguration); bind(ReconTaskStatusDao.class).toInstance(reconTaskStatusDao); - bind(MissingContainersDao.class).toInstance(missingContainersDao); StorageContainerLocationProtocol mockScmClient = mock( StorageContainerLocationProtocol.class); @@ -431,6 +403,6 @@ private void waitAndCheckConditionAfterHeartbeat(Callable check) .setDatanodeDetails(datanodeDetailsProto) .build(); reconScm.getDatanodeProtocolServer().sendHeartbeat(heartbeatRequestProto); - LambdaTestUtils.await(30000, 2000, check); + LambdaTestUtils.await(30000, 1000, check); } } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java index d546a33316c..639373c086c 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestMissingContainerTask.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,18 +21,19 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.io.IOException; -import java.sql.SQLException; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.ozone.recon.persistence.AbstractSqlDatabaseTest; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.test.LambdaTestUtils; import org.hadoop.ozone.recon.schema.ReconTaskSchemaDefinition; import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition; import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; @@ -49,7 +50,7 @@ public class TestMissingContainerTask extends AbstractSqlDatabaseTest { @Test - public void testRun() throws IOException, SQLException, InterruptedException { + public void testRun() throws Exception { Configuration sqlConfiguration = getInjector().getInstance((Configuration.class)); @@ -64,13 +65,23 @@ public void testRun() throws IOException, SQLException, InterruptedException { ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class); ContainerManager containerManagerMock = mock(ContainerManager.class); + ContainerReplica unhealthyReplicaMock = mock(ContainerReplica.class); + when(unhealthyReplicaMock.getState()).thenReturn(State.UNHEALTHY); + ContainerReplica healthyReplicaMock = mock(ContainerReplica.class); + when(healthyReplicaMock.getState()).thenReturn(State.CLOSED); when(scmMock.getContainerManager()).thenReturn(containerManagerMock); when(containerManagerMock.getContainerIDs()) .thenReturn(getMockContainerIDs(3)); + // return one HEALTHY and one UNHEALTHY replica for container ID 1 when(containerManagerMock.getContainerReplicas(new ContainerID(1L))) - .thenReturn(Collections.singleton(mock(ContainerReplica.class))); + .thenReturn(Collections.unmodifiableSet( + new HashSet<>( + Arrays.asList(healthyReplicaMock, unhealthyReplicaMock) + ))); + // return one UNHEALTHY replica for container ID 2 when(containerManagerMock.getContainerReplicas(new ContainerID(2L))) - .thenReturn(Collections.singleton(mock(ContainerReplica.class))); + .thenReturn(Collections.singleton(unhealthyReplicaMock)); + // return 0 replicas for container ID 3 when(containerManagerMock.getContainerReplicas(new ContainerID(3L))) .thenReturn(Collections.emptySet()); @@ -89,17 +100,20 @@ public void testRun() throws IOException, SQLException, InterruptedException { missingContainersDao); missingContainerTask.register(); missingContainerTask.start(); - Thread.sleep(5000L); + LambdaTestUtils.await(6000, 1000, () -> + (missingContainersTableHandle.findAll().size() == 2)); all = missingContainersTableHandle.findAll(); - Assert.assertEquals(1, all.size()); - Assert.assertEquals(3, all.get(0).getContainerId().longValue()); - + // Container IDs 2 and 3 should be present in the missing containers table + Set missingContainerIDs = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(2L, 3L)) + ); + Assert.assertTrue(all.stream().allMatch(r -> + missingContainerIDs.contains(r.getContainerId()))); ReconTaskStatus taskStatus = reconTaskStatusDao.findById(missingContainerTask.getTaskName()); Assert.assertTrue(taskStatus.getLastUpdatedTimestamp() > currentTime); - } private Set getMockContainerIDs(int num) { diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java index 834355e3e37..d147e58ade5 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/types/GuiceInjectorUtilsForTests.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -33,6 +33,8 @@ import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; import org.apache.hadoop.ozone.recon.spi.impl.ReconContainerDBProvider; import org.apache.hadoop.hdds.utils.db.DBStore; +import org.hadoop.ozone.recon.schema.tables.daos.MissingContainersDao; +import org.jooq.Configuration; import org.junit.Assert; import org.junit.rules.TemporaryFolder; @@ -71,7 +73,7 @@ default Injector getInjector( JooqPersistenceModule jooqPersistenceModule = new JooqPersistenceModule(configurationProvider); - return Guice.createInjector(jooqPersistenceModule, + Injector baseInjector = Guice.createInjector(jooqPersistenceModule, new AbstractModule() { @Override protected void configure() { @@ -93,13 +95,24 @@ protected void configure() { bind(DBStore.class).toProvider(ReconContainerDBProvider.class). in(Singleton.class); - bind(ContainerDBServiceProvider.class).to( - ContainerDBServiceProviderImpl.class).in(Singleton.class); } catch (IOException e) { Assert.fail(); } } }); + + return baseInjector.createChildInjector(new AbstractModule() { + @Override + protected void configure() { + Configuration sqlConfiguration = + baseInjector.getInstance((Configuration.class)); + MissingContainersDao missingContainersDao = + new MissingContainersDao(sqlConfiguration); + bind(MissingContainersDao.class).toInstance(missingContainersDao); + bind(ContainerDBServiceProvider.class).to( + ContainerDBServiceProviderImpl.class).in(Singleton.class); + } + }); } /** From 3da3258aeff4d10573153fb8994f5334804217d1 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Thu, 26 Mar 2020 19:59:45 +0100 Subject: [PATCH 27/38] HDDS-3284. ozonesecure-mr test fails due to lack of disk space (#725) --- hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config index 3786bba0a10..f0b7f5cb22d 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config @@ -99,6 +99,8 @@ YARN-SITE.XML_yarn.resourcemanager.system.metrics.publisher.enabled=true YARN-SITE.XML_yarn.log-aggregation-enable=true YARN-SITE.XML_yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds=3600 YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600 +YARN-SITE.XML_yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage=99 +YARN-SITE.XML_yarn.nodemanager.disk-health-checker.enable=false # Yarn LinuxContainer requires the /opt/hadoop/etc/hadoop to be owned by root and not modifiable by other users, # which prevents start.sh from changing the configurations based on docker-config From c19a3a5976c9c1b802d7f9d3eaf7e50053790e5f Mon Sep 17 00:00:00 2001 From: Neo Yang Date: Fri, 27 Mar 2020 13:17:42 +0800 Subject: [PATCH 28/38] HDDS-3074. Make the configuration of container scrub consistent. (#722) --- .../container/ozoneimpl/ContainerScrubberConfiguration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java index c7c55de45e1..bfc5c50b255 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScrubberConfiguration.java @@ -25,7 +25,7 @@ /** * This class defines configuration parameters for container scrubber. **/ -@ConfigGroup(prefix = "hdds.containerscrub") +@ConfigGroup(prefix = "hdds.container.scrub") public class ContainerScrubberConfiguration { @Config(key = "enabled", From 18a4b0554c9f44d30abb617f896bdca5fb924ee4 Mon Sep 17 00:00:00 2001 From: Li Cheng Date: Fri, 27 Mar 2020 15:29:59 +0800 Subject: [PATCH 29/38] HDDS-3179. Pipeline placement based on Topology does not have fallback (#678) --- .../hadoop/hdds/protocol/DatanodeDetails.java | 2 +- .../scm/pipeline/PipelinePlacementPolicy.java | 37 +++-- .../hdds/scm/container/MockNodeManager.java | 10 +- .../pipeline/TestPipelinePlacementPolicy.java | 145 ++++++++++++++---- 4 files changed, 145 insertions(+), 49 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index a235a4b2b46..28ed36d964b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -70,7 +70,7 @@ private DatanodeDetails(String uuid, String ipAddress, String hostName, this.certSerialId = certSerialId; } - protected DatanodeDetails(DatanodeDetails datanodeDetails) { + public DatanodeDetails(DatanodeDetails datanodeDetails) { super(datanodeDetails.getHostName(), datanodeDetails.getNetworkLocation(), datanodeDetails.getCost()); this.uuid = datanodeDetails.uuid; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index 0f30449c975..e96b12026c3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -99,9 +99,8 @@ boolean meetCriteria(DatanodeDetails datanodeDetails, int nodesRequired) { try { pipeline = stateManager.getPipeline(pid); } catch (PipelineNotFoundException e) { - LOG.error("Pipeline not found in pipeline state manager during" + - " pipeline creation. PipelineID: " + pid + - " exception: " + e.getMessage()); + LOG.debug("Pipeline not found in pipeline state manager during" + + " pipeline creation. PipelineID: {}", pid, e); continue; } if (pipeline != null && @@ -282,26 +281,32 @@ public List getResultSet( LOG.debug("Second node chosen: {}", nextNode); } } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + - "rack based on rack awareness."); - } + LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + + "rack based on rack awareness. anchor: {}", anchor); } // Then choose nodes close to anchor based on network topology int nodesToFind = nodesRequired - results.size(); for (int x = 0; x < nodesToFind; x++) { // Pick remaining nodes based on the existence of rack awareness. - DatanodeDetails pick = rackAwareness - ? chooseNodeFromNetworkTopology( - nodeManager.getClusterNetworkTopologyMap(), anchor, exclude) - : fallBackPickNodes(healthyNodes, exclude); + DatanodeDetails pick = null; + if (rackAwareness) { + pick = chooseNodeFromNetworkTopology( + nodeManager.getClusterNetworkTopologyMap(), anchor, exclude); + } + // fall back protection + if (pick == null) { + pick = fallBackPickNodes(healthyNodes, exclude); + if (rackAwareness) { + LOG.debug("Failed to choose node based on topology. Fallback " + + "picks node as: {}", pick); + } + } + if (pick != null) { results.add(pick); exclude.add(pick); - if (LOG.isDebugEnabled()) { - LOG.debug("Remaining node chosen: {}", pick); - } + LOG.debug("Remaining node chosen: {}", pick); } } @@ -414,6 +419,10 @@ protected DatanodeDetails chooseNodeFromNetworkTopology( Node pick = networkTopology.chooseRandom( anchor.getNetworkLocation(), excluded); DatanodeDetails pickedNode = (DatanodeDetails) pick; + if (pickedNode == null) { + LOG.debug("Pick node is null, excluded nodes {}, anchor {}.", + excluded, anchor); + } return pickedNode; } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index cbeef7f67ab..f15bfdd7bcd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -94,6 +94,7 @@ public class MockNodeManager implements NodeManager { private ConcurrentMap> dnsToUuidMap; public MockNodeManager(NetworkTopologyImpl clusterMap, + List nodes, boolean initializeFakeNodes, int nodeCount) { this.healthyNodes = new LinkedList<>(); this.staleNodes = new LinkedList<>(); @@ -104,6 +105,13 @@ public MockNodeManager(NetworkTopologyImpl clusterMap, this.dnsToUuidMap = new ConcurrentHashMap<>(); this.aggregateStat = new SCMNodeStat(); this.clusterMap = clusterMap; + if (!nodes.isEmpty()) { + for (int x = 0; x < nodes.size(); x++) { + DatanodeDetails node = nodes.get(x); + register(node, null, null); + populateNodeMetric(node, x); + } + } if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); @@ -116,7 +124,7 @@ public MockNodeManager(NetworkTopologyImpl clusterMap, } public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { - this(new NetworkTopologyImpl(new OzoneConfiguration()), + this(new NetworkTopologyImpl(new OzoneConfiguration()), new ArrayList<>(), initializeFakeNodes, nodeCount); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index daad80834c5..fafc4b0acec 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.MockNodeManager; @@ -35,6 +36,9 @@ import java.util.stream.Collectors; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; +import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; /** * Test for PipelinePlacementPolicy. @@ -43,25 +47,55 @@ public class TestPipelinePlacementPolicy { private MockNodeManager nodeManager; private OzoneConfiguration conf; private PipelinePlacementPolicy placementPolicy; + private NetworkTopologyImpl cluster; private static final int PIPELINE_PLACEMENT_MAX_NODES_COUNT = 10; + private List nodesWithOutRackAwareness = new ArrayList<>(); + private List nodesWithRackAwareness = new ArrayList<>(); + @Before public void init() throws Exception { - nodeManager = new MockNodeManager(true, - PIPELINE_PLACEMENT_MAX_NODES_COUNT); + cluster = initTopology(); + // start with nodes with rack awareness. + nodeManager = new MockNodeManager(cluster, getNodesWithRackAwareness(), + false, PIPELINE_PLACEMENT_MAX_NODES_COUNT); conf = new OzoneConfiguration(); conf.setInt(OZONE_DATANODE_PIPELINE_LIMIT, 5); placementPolicy = new PipelinePlacementPolicy( nodeManager, new PipelineStateManager(), conf); } + private NetworkTopologyImpl initTopology() { + NodeSchema[] schemas = new NodeSchema[] + {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA}; + NodeSchemaManager.getInstance().init(schemas, true); + NetworkTopologyImpl topology = + new NetworkTopologyImpl(NodeSchemaManager.getInstance()); + return topology; + } + + private List getNodesWithRackAwareness() { + List datanodes = new ArrayList<>(); + for (Node node : NODES) { + DatanodeDetails datanode = overwriteLocationInNode( + getNodesWithoutRackAwareness(), node); + nodesWithRackAwareness.add(datanode); + datanodes.add(datanode); + } + return datanodes; + } + + private DatanodeDetails getNodesWithoutRackAwareness() { + DatanodeDetails node = MockDatanodeDetails.randomDatanodeDetails(); + nodesWithOutRackAwareness.add(node); + return node; + } + @Test - public void testChooseNodeBasedOnNetworkTopology() { - List healthyNodes = - nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); - DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); + public void testChooseNodeBasedOnNetworkTopology() throws SCMException { + DatanodeDetails anchor = placementPolicy.chooseNode(nodesWithRackAwareness); // anchor should be removed from healthyNodes after being chosen. - Assert.assertFalse(healthyNodes.contains(anchor)); + Assert.assertFalse(nodesWithRackAwareness.contains(anchor)); List excludedNodes = new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT); @@ -69,10 +103,42 @@ public void testChooseNodeBasedOnNetworkTopology() { DatanodeDetails nextNode = placementPolicy.chooseNodeFromNetworkTopology( nodeManager.getClusterNetworkTopologyMap(), anchor, excludedNodes); Assert.assertFalse(excludedNodes.contains(nextNode)); - // nextNode should not be the same as anchor. + // next node should not be the same as anchor. Assert.assertTrue(anchor.getUuid() != nextNode.getUuid()); + // next node should be on the same rack based on topology. + Assert.assertEquals(anchor.getNetworkLocation(), + nextNode.getNetworkLocation()); } + @Test + public void testChooseNodeWithSingleNodeRack() throws SCMException { + // There is only one node on 3 racks altogether. + List datanodes = new ArrayList<>(); + for (Node node : SINGLE_NODE_RACK) { + DatanodeDetails datanode = overwriteLocationInNode( + MockDatanodeDetails.randomDatanodeDetails(), node); + datanodes.add(datanode); + } + MockNodeManager localNodeManager = new MockNodeManager(initTopology(), + datanodes, false, datanodes.size()); + PipelinePlacementPolicy localPlacementPolicy = new PipelinePlacementPolicy( + localNodeManager, new PipelineStateManager(), conf); + int nodesRequired = HddsProtos.ReplicationFactor.THREE.getNumber(); + List results = localPlacementPolicy.chooseDatanodes( + new ArrayList<>(datanodes.size()), + new ArrayList<>(datanodes.size()), + nodesRequired, 0); + + Assert.assertEquals(nodesRequired, results.size()); + // 3 nodes should be on different racks. + Assert.assertNotEquals(results.get(0).getNetworkLocation(), + results.get(1).getNetworkLocation()); + Assert.assertNotEquals(results.get(0).getNetworkLocation(), + results.get(2).getNetworkLocation()); + Assert.assertNotEquals(results.get(1).getNetworkLocation(), + results.get(2).getNetworkLocation()); + } + @Test public void testChooseNodeBasedOnRackAwareness() { List healthyNodes = overWriteLocationInNodes( @@ -84,8 +150,9 @@ public void testChooseNodeBasedOnRackAwareness() { healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), topologyWithDifRacks, anchor); Assert.assertNotNull(nextNode); - Assert.assertFalse(anchor.getNetworkLocation().equals( - nextNode.getNetworkLocation())); + // next node should be on a different rack. + Assert.assertNotEquals(anchor.getNetworkLocation(), + nextNode.getNetworkLocation()); } @Test @@ -115,25 +182,25 @@ public void testFallBackPickNodes() { @Test public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ - List healthyNodes = - nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); - DatanodeDetails anchor = placementPolicy.chooseNode(healthyNodes); - DatanodeDetails randomNode = placementPolicy.chooseNode(healthyNodes); + DatanodeDetails anchor = placementPolicy + .chooseNode(nodesWithOutRackAwareness); + DatanodeDetails randomNode = placementPolicy + .chooseNode(nodesWithOutRackAwareness); // rack awareness is not enabled. Assert.assertTrue(anchor.getNetworkLocation().equals( randomNode.getNetworkLocation())); NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); DatanodeDetails nextNode = placementPolicy.chooseNodeBasedOnRackAwareness( - healthyNodes, new ArrayList<>(PIPELINE_PLACEMENT_MAX_NODES_COUNT), - topology, anchor); + nodesWithOutRackAwareness, new ArrayList<>( + PIPELINE_PLACEMENT_MAX_NODES_COUNT), topology, anchor); // RackAwareness should not be able to choose any node. Assert.assertNull(nextNode); // PlacementPolicy should still be able to pick a set of 3 nodes. int numOfNodes = HddsProtos.ReplicationFactor.THREE.getNumber(); List results = placementPolicy - .getResultSet(numOfNodes, healthyNodes); + .getResultSet(numOfNodes, nodesWithOutRackAwareness); Assert.assertEquals(numOfNodes, results.size()); // All nodes are on same rack. @@ -146,14 +213,20 @@ public void testRackAwarenessNotEnabledWithFallBack() throws SCMException{ private final static Node[] NODES = new NodeImpl[] { new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), new NodeImpl("h2", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h3", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h4", "/r1", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h5", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h6", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h7", "/r2", NetConstants.NODE_COST_DEFAULT), - new NodeImpl("h8", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h4", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h5", "/r3", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h6", "/r3", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h7", "/r4", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h8", "/r4", NetConstants.NODE_COST_DEFAULT), }; + // 3 racks with single node. + private final static Node[] SINGLE_NODE_RACK = new NodeImpl[] { + new NodeImpl("h1", "/r1", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h2", "/r2", NetConstants.NODE_COST_DEFAULT), + new NodeImpl("h3", "/r3", NetConstants.NODE_COST_DEFAULT) + }; private NetworkTopology createNetworkTopologyOnDifRacks() { NetworkTopology topology = new NetworkTopologyImpl(new Configuration()); @@ -163,20 +236,26 @@ private NetworkTopology createNetworkTopologyOnDifRacks() { return topology; } + private DatanodeDetails overwriteLocationInNode( + DatanodeDetails datanode, Node node) { + DatanodeDetails result = DatanodeDetails.newBuilder() + .setUuid(datanode.getUuidString()) + .setHostName(datanode.getHostName()) + .setIpAddress(datanode.getIpAddress()) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) + .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) + .setNetworkLocation(node.getNetworkLocation()).build(); + return result; + } + private List overWriteLocationInNodes( List datanodes) { List results = new ArrayList<>(datanodes.size()); for (int i = 0; i < datanodes.size(); i++) { - DatanodeDetails datanode = datanodes.get(i); - DatanodeDetails result = DatanodeDetails.newBuilder() - .setUuid(datanode.getUuidString()) - .setHostName(datanode.getHostName()) - .setIpAddress(datanode.getIpAddress()) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.STANDALONE)) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.RATIS)) - .addPort(datanode.getPort(DatanodeDetails.Port.Name.REST)) - .setNetworkLocation(NODES[i].getNetworkLocation()).build(); - results.add(result); + DatanodeDetails datanode = overwriteLocationInNode( + datanodes.get(i), NODES[i]); + results.add(datanode); } return results; } From 42d8d257f40933525303f4e8923f99a4e5b25c26 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Fri, 27 Mar 2020 12:25:33 -0700 Subject: [PATCH 30/38] HDDS-3281. Add timeouts to all robot tests (#723) --- .../dist/src/main/smoketest/auditparser/auditparser.robot | 1 + hadoop-ozone/dist/src/main/smoketest/basic/basic.robot | 1 + hadoop-ozone/dist/src/main/smoketest/env-compose.robot | 2 +- hadoop-ozone/dist/src/main/smoketest/freon/freon.robot | 1 + hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot | 1 + .../dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot | 2 +- hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot | 1 + hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot | 1 + hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot | 1 + hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot | 1 + hadoop-ozone/dist/src/main/smoketest/s3/webui.robot | 1 + hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot | 1 + hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot | 1 + .../dist/src/main/smoketest/security/ozone-secure-fs.robot | 1 + .../dist/src/main/smoketest/security/ozone-secure-s3.robot | 1 + 24 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot index 1caae755694..4e90a44b529 100644 --- a/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot +++ b/hadoop-ozone/dist/src/main/smoketest/auditparser/auditparser.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${user} hadoop diff --git a/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot b/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot index c7b43ecd069..bbd19456ed1 100644 --- a/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot +++ b/hadoop-ozone/dist/src/main/smoketest/basic/basic.robot @@ -17,6 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${DATANODE_HOST} datanode diff --git a/hadoop-ozone/dist/src/main/smoketest/env-compose.robot b/hadoop-ozone/dist/src/main/smoketest/env-compose.robot index d529d7f02f2..d21eacaea50 100644 --- a/hadoop-ozone/dist/src/main/smoketest/env-compose.robot +++ b/hadoop-ozone/dist/src/main/smoketest/env-compose.robot @@ -16,7 +16,7 @@ *** Settings *** Documentation High level utilities to execute commands and tests in docker-compose based environments. Resource commonlib.robot - +Test Timeout 5 minutes *** Keywords *** diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot b/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot index 6bf4dbbcab0..83cc8655402 100644 --- a/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot +++ b/hadoop-ozone/dist/src/main/smoketest/freon/freon.robot @@ -17,6 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Test Cases *** Freon Randomkey Generator diff --git a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot index c2bf7dba749..91e41fce609 100644 --- a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot +++ b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library BuiltIn Library String Resource ../commonlib.robot +Test Timeout 5 minutes Suite Setup Generate volume *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot b/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot index d468accff79..66804f9b512 100644 --- a/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot +++ b/hadoop-ozone/dist/src/main/smoketest/om-ratis/testOMAdminCmd.robot @@ -17,7 +17,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Resource ../commonlib.robot - +Test Timeout 5 minutes *** Test Cases *** diff --git a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot index e890b6812fe..8c5a706961f 100644 --- a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot +++ b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library SSHLibrary Library Collections Resource ../commonlib.robot +Test Timeout 8 minutes *** Variables *** ${SECURITY_ENABLED} false diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot index 8d12a526ea4..3336b39603b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/hadoopo3fs.robot @@ -18,6 +18,7 @@ Documentation Test ozone fs with hadoopfs Library OperatingSystem Library String Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${DATANODE_HOST} datanode diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot index 89472f2c3f5..8be67935b30 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot @@ -17,6 +17,7 @@ Documentation Ozonefs test Library OperatingSystem Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot b/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot index 28910ded575..621bbd0de83 100644 --- a/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot +++ b/hadoop-ozone/dist/src/main/smoketest/recon/recon-api.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://recon:9888 diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot index d9c1671bcb3..004a4964591 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Keywords *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot b/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot index 8762d5dac6f..8af0b4c2ba2 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/awss3.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource ./commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot b/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot index 76cbbb85df3..17762bc3108 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/bucketcreate.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot b/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot index ef7bc2d017e..76668716cdb 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/buckethead.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot b/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot index 0b7f5d4aa8b..709c226559b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/bucketlist.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot index 292b3330657..c1b4953e152 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectcopy.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot index 33fda108a24..b3faf7e1aa3 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectdelete.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot index 95b71f4ceb6..6e22d4cfb70 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectmultidelete.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot b/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot index f6146611144..40bcccb42de 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot b/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot index 4b2f88e25b8..180b6ed56dc 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/webui.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Resource ../commonlib.robot Resource ./commonawslib.robot +Test Timeout 5 minutes Suite Setup Setup s3 tests *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot index ed1173d7999..57fa9ac39e4 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/datanode.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot index f411e0c3af6..77c22787f46 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmcli/pipeline.robot @@ -18,6 +18,7 @@ Documentation Smoketest ozone cluster startup Library OperatingSystem Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** diff --git a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot index 3df74f9c36a..4e368591d30 100644 --- a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot @@ -19,6 +19,7 @@ Library OperatingSystem Library String Library BuiltIn Resource ../commonlib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://s3g:9878 diff --git a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot index 9f5e1224503..5103e80279c 100644 --- a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-s3.robot @@ -20,6 +20,7 @@ Library String Library BuiltIn Resource ../commonlib.robot Resource ../s3/commonawslib.robot +Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://s3g:9878 From 37281acaf9c258095854f1b9e7b91303478e0fc9 Mon Sep 17 00:00:00 2001 From: Rakesh Radhakrishnan Date: Sat, 28 Mar 2020 09:47:54 +0530 Subject: [PATCH 31/38] HDDS-3288: Update default RPC handler SCM/OM count to 100 (#729) --- .../main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- hadoop-hdds/common/src/main/resources/ozone-default.xml | 4 ++-- .../main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index fefcfca08aa..c397bc5a472 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -221,7 +221,7 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_HANDLER_COUNT_KEY = "ozone.scm.handler.count.key"; - public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 10; + public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 100; public static final String OZONE_SCM_SECURITY_HANDLER_COUNT_KEY = "ozone.scm.security.handler.count.key"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ca107c0e869..65db7dfcd4e 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -505,7 +505,7 @@ ozone.om.handler.count.key - 20 + 100 OM, PERFORMANCE The number of RPC handler threads for OM service endpoints. @@ -918,7 +918,7 @@ ozone.scm.handler.count.key - 10 + 100 OZONE, MANAGEMENT, PERFORMANCE The number of RPC handler threads for each SCM service diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 51ff17f7e44..f46b30854e2 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -37,7 +37,7 @@ private OMConfigKeys() { public static final String OZONE_OM_HANDLER_COUNT_KEY = "ozone.om.handler.count.key"; - public static final int OZONE_OM_HANDLER_COUNT_DEFAULT = 20; + public static final int OZONE_OM_HANDLER_COUNT_DEFAULT = 100; public static final String OZONE_OM_INTERNAL_SERVICE_ID = "ozone.om.internal.service.id"; From 562ac8b7afc50be76128ea67789b9f0282a79bcd Mon Sep 17 00:00:00 2001 From: Siddharth Date: Sat, 28 Mar 2020 11:29:34 -0700 Subject: [PATCH 32/38] HDDS-3273. getConf does not return all OM addresses. (#727) --- .../java/org/apache/hadoop/ozone/OmUtils.java | 32 +++++++++++++++++++ .../hadoop/ozone/freon/OzoneGetConf.java | 6 +++- .../org/apache/hadoop/ozone/TestOmUtils.java | 25 +++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java index 87522e3d650..3552e79f413 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java @@ -29,8 +29,12 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.stream.Collectors; @@ -89,6 +93,34 @@ public static InetSocketAddress getOmAddress(Configuration conf) { return NetUtils.createSocketAddr(getOmRpcAddress(conf)); } + /** + * Return list of OM addresses by service ids - when HA is enabled. + * + * @param conf {@link Configuration} + * @return {service.id -> [{@link InetSocketAddress}]} + */ + public static Map> getOmHAAddressesById( + Configuration conf) { + Map> result = new HashMap<>(); + for (String serviceId : conf.getTrimmedStringCollection( + OZONE_OM_SERVICE_IDS_KEY)) { + if (!result.containsKey(serviceId)) { + result.put(serviceId, new ArrayList<>()); + } + for (String nodeId : getOMNodeIds(conf, serviceId)) { + String rpcAddr = getOmRpcAddress(conf, + addKeySuffixes(OZONE_OM_ADDRESS_KEY, serviceId, nodeId)); + if (rpcAddr != null) { + result.get(serviceId).add(NetUtils.createSocketAddr(rpcAddr)); + } else { + LOG.warn("Address undefined for nodeId: {} for service {}", nodeId, + serviceId); + } + } + } + return result; + } + /** * Retrieve the socket address that is used by OM. * @param conf diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java index 3c60e5956d4..83283d473eb 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/freon/OzoneGetConf.java @@ -253,7 +253,11 @@ static class OzoneManagersCommandHandler extends CommandHandler { @Override public int doWorkInternal(OzoneGetConf tool, String[] args) throws IOException { - tool.printOut(OmUtils.getOmAddress(tool.getConf()).getHostName()); + if (OmUtils.isServiceIdsDefined(tool.getConf())) { + tool.printOut(OmUtils.getOmHAAddressesById(tool.getConf()).toString()); + } else { + tool.printOut(OmUtils.getOmAddress(tool.getConf()).getHostName()); + } return 0; } } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java index 7f374102754..cdbb786a77c 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; import org.apache.hadoop.io.IOUtils; import org.junit.Rule; @@ -31,9 +32,13 @@ import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.net.InetSocketAddress; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -113,6 +118,26 @@ public void createOMDirThrowsIfCannotCreate() throws IOException { // expecting exception } + @Test + public void testGetOmHAAddressesById() { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OZONE_OM_SERVICE_IDS_KEY, "ozone1"); + conf.set("ozone.om.nodes.ozone1", "node1,node2,node3"); + conf.set("ozone.om.address.ozone1.node1", "1.1.1.1"); + conf.set("ozone.om.address.ozone1.node2", "1.1.1.2"); + conf.set("ozone.om.address.ozone1.node3", "1.1.1.3"); + Map> addresses = + OmUtils.getOmHAAddressesById(conf); + assertFalse(addresses.isEmpty()); + List rpcAddrs = addresses.get("ozone1"); + assertFalse(rpcAddrs.isEmpty()); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.1"))); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.2"))); + assertTrue(rpcAddrs.stream().anyMatch( + a -> a.getAddress().getHostAddress().equals("1.1.1.3"))); + } } class TestDBCheckpoint implements DBCheckpoint { From c2a0d4882d4608545d87ea58a80c05ca1024562f Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 13:30:32 +0430 Subject: [PATCH 33/38] HDDS-3249: renew ContainerCache.INSTANCE in order to test it in a fresh state --- .../container/common/utils/ContainerCache.java | 18 ++++++++++++++++-- .../common/{ => utils}/TestContainerCache.java | 6 ++---- 2 files changed, 18 insertions(+), 6 deletions(-) rename hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/{ => utils}/TestContainerCache.java (94%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 4ddb4e48792..9008cb72e4d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.container.common.utils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.collections.MapIterator; import org.apache.commons.collections.map.LRUMap; @@ -58,9 +59,22 @@ private ContainerCache(int maxSize, float loadFactor, boolean * @return A instance of {@link ContainerCache}. */ public synchronized static ContainerCache getInstance(Configuration conf) { - if (cache == null) { + return getInstance(conf, false); + } + + /** + * Return a singleton instance of {@link ContainerCache} + * that holds the DB handlers. and recreate it if renew is true + * This method is useful for testing because we need a fresh new instance for each test + * + * @param conf - Configuration. + * @return A instance of {@link ContainerCache}. + */ + @VisibleForTesting + synchronized static ContainerCache getInstance(Configuration conf, boolean renew) { + if (renew || cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, - OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); + OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); } return cache; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java similarity index 94% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index b6584d17017..9c20c0d454b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -16,13 +16,11 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.container.common; +package org.apache.hadoop.ozone.container.common.utils; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; -import org.apache.hadoop.ozone.container.common.utils.ReferenceCountedDB; import org.apache.hadoop.hdds.utils.MetadataStore; import org.apache.hadoop.hdds.utils.MetadataStoreBuilder; import org.junit.Assert; @@ -62,7 +60,7 @@ public void testContainerCacheEviction() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); - ContainerCache cache = ContainerCache.getInstance(conf); + ContainerCache cache = ContainerCache.getInstance(conf, true); File containerDir1 = new File(root, "cont1"); File containerDir2 = new File(root, "cont2"); File containerDir3 = new File(root, "cont3"); From aa7cfac41495d9fc280c26d8a7ac08ba5fcdcb99 Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 13:40:44 +0430 Subject: [PATCH 34/38] correct style --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 9008cb72e4d..73b22efd9e1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -65,13 +65,15 @@ public synchronized static ContainerCache getInstance(Configuration conf) { /** * Return a singleton instance of {@link ContainerCache} * that holds the DB handlers. and recreate it if renew is true - * This method is useful for testing because we need a fresh new instance for each test + * This method is useful for testing because we need a fresh + * new instance for each test * * @param conf - Configuration. * @return A instance of {@link ContainerCache}. */ @VisibleForTesting - synchronized static ContainerCache getInstance(Configuration conf, boolean renew) { + synchronized static ContainerCache getInstance(Configuration conf, + boolean renew) { if (renew || cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); From bbdd30d5030350cc0086d64de4917b402ff1116b Mon Sep 17 00:00:00 2001 From: isa Date: Mon, 23 Mar 2020 23:58:50 +0430 Subject: [PATCH 35/38] cleanup ContainerCache after TestBlockDeletingService and TestContainerPersistence also set defaultCache to null on BlockUtils.shutdownCache --- .../common/utils/ContainerCache.java | 22 +++++-------------- .../keyvalue/helpers/BlockUtils.java | 1 + .../common/TestBlockDeletingService.java | 2 ++ .../common/impl/TestContainerPersistence.java | 7 ++++++ .../common/utils/TestContainerCache.java | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 73b22efd9e1..5950e0b4824 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -59,22 +59,7 @@ private ContainerCache(int maxSize, float loadFactor, boolean * @return A instance of {@link ContainerCache}. */ public synchronized static ContainerCache getInstance(Configuration conf) { - return getInstance(conf, false); - } - - /** - * Return a singleton instance of {@link ContainerCache} - * that holds the DB handlers. and recreate it if renew is true - * This method is useful for testing because we need a fresh - * new instance for each test - * - * @param conf - Configuration. - * @return A instance of {@link ContainerCache}. - */ - @VisibleForTesting - synchronized static ContainerCache getInstance(Configuration conf, - boolean renew) { - if (renew || cache == null) { + if (cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); @@ -82,6 +67,10 @@ synchronized static ContainerCache getInstance(Configuration conf, return cache; } + public static void clearDefaultCache() { + cache = null; + } + /** * Closes all the db instances and resets the cache. */ @@ -98,6 +87,7 @@ public void shutdownCache() { } // reset the cache cache.clear(); + cache = null; } finally { lock.unlock(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index 35e0b0c15a7..d5e519db3d3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -93,6 +93,7 @@ public static void removeDB(KeyValueContainerData container, Configuration */ public static void shutdownCache(ContainerCache cache) { cache.shutdownCache(); + ContainerCache.clearDefaultCache(); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index c1f4d9f6dd3..415c49f8082 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -35,6 +35,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Handler; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; @@ -96,6 +97,7 @@ public static void init() throws IOException { @AfterClass public static void cleanup() throws IOException { FileUtils.deleteDirectory(testRoot); + BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java index 1b0f70f4e13..77b055c495a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java @@ -20,6 +20,7 @@ import com.google.common.collect.Maps; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -38,6 +39,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; @@ -150,6 +152,11 @@ public void cleanupDir() throws IOException { } } + @After + public void cleanupBlockUtil() { + BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); + } + private long getTestContainerID() { return ContainerTestHelper.getTestContainerID(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index 9c20c0d454b..1d98be57138 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -60,7 +60,7 @@ public void testContainerCacheEviction() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2); - ContainerCache cache = ContainerCache.getInstance(conf, true); + ContainerCache cache = ContainerCache.getInstance(conf); File containerDir1 = new File(root, "cont1"); File containerDir2 = new File(root, "cont2"); File containerDir3 = new File(root, "cont3"); From 74e58044330096e94901b0c5c5d181cb860655a4 Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 00:40:33 +0430 Subject: [PATCH 36/38] remove unused import --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 5950e0b4824..242d296daed 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.container.common.utils; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.collections.MapIterator; import org.apache.commons.collections.map.LRUMap; From af28759413dfb805da2b7e7a741326227a630a88 Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 00:53:44 +0430 Subject: [PATCH 37/38] remove setting default instance to null in shutdown method --- .../hadoop/ozone/container/common/utils/ContainerCache.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 242d296daed..60a72a2f568 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -86,7 +86,6 @@ public void shutdownCache() { } // reset the cache cache.clear(); - cache = null; } finally { lock.unlock(); } From 744d4ead5c220b735b7d3c5134c2990303430d13 Mon Sep 17 00:00:00 2001 From: isa Date: Tue, 24 Mar 2020 16:39:10 +0430 Subject: [PATCH 38/38] remove cache.isFull assertion from TestContainerCache --- .../ozone/container/common/utils/ContainerCache.java | 6 +----- .../ozone/container/keyvalue/helpers/BlockUtils.java | 1 - .../ozone/container/common/TestBlockDeletingService.java | 2 -- .../container/common/impl/TestContainerPersistence.java | 7 ------- .../ozone/container/common/utils/TestContainerCache.java | 2 -- 5 files changed, 1 insertion(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 60a72a2f568..4ddb4e48792 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -60,16 +60,12 @@ private ContainerCache(int maxSize, float loadFactor, boolean public synchronized static ContainerCache getInstance(Configuration conf) { if (cache == null) { int cacheSize = conf.getInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, - OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); + OzoneConfigKeys.OZONE_CONTAINER_CACHE_DEFAULT); cache = new ContainerCache(cacheSize, LOAD_FACTOR, true); } return cache; } - public static void clearDefaultCache() { - cache = null; - } - /** * Closes all the db instances and resets the cache. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index d5e519db3d3..35e0b0c15a7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -93,7 +93,6 @@ public static void removeDB(KeyValueContainerData container, Configuration */ public static void shutdownCache(ContainerCache cache) { cache.shutdownCache(); - ContainerCache.clearDefaultCache(); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 415c49f8082..c1f4d9f6dd3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -35,7 +35,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Handler; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; @@ -97,7 +96,6 @@ public static void init() throws IOException { @AfterClass public static void cleanup() throws IOException { FileUtils.deleteDirectory(testRoot); - BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java index 77b055c495a..1b0f70f4e13 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java @@ -20,7 +20,6 @@ import com.google.common.collect.Maps; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -39,7 +38,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; @@ -152,11 +150,6 @@ public void cleanupDir() throws IOException { } } - @After - public void cleanupBlockUtil() { - BlockUtils.shutdownCache(ContainerCache.getInstance(new Configuration())); - } - private long getTestContainerID() { return ContainerTestHelper.getTestContainerID(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java index 1d98be57138..0412536e660 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestContainerCache.java @@ -91,8 +91,6 @@ public void testContainerCacheEviction() throws Exception { db3.close(); Assert.assertEquals(0, db3.getReferenceCount()); - Assert.assertTrue(cache.isFull()); - // add one more reference to ContainerCache and verify that it will not // evict the least recent entry as it has reference. ReferenceCountedDB db4 = cache.getDB(3, "RocksDB",