From d5b7747181115060f5912294122802934b42a493 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Tue, 31 Aug 2021 17:06:45 -0700 Subject: [PATCH 1/2] HDFS-16200: Improve NameNode failover --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 ++ .../blockmanagement/DatanodeManager.java | 19 +++++- .../blockmanagement/TestDatanodeManager.java | 59 +++++++++++++++++++ 3 files changed, 80 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index ac0363f1c42e2..28d903dedb725 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -882,6 +882,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.ec.reconstruction.validation"; public static final boolean DFS_DN_EC_RECONSTRUCTION_VALIDATION_VALUE = false; + public static final String DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY = + "dfs.disable.datanode.topology.sort"; + public static final boolean DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY_DEFAULT = false; + + public static final String DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY = "dfs.datanode.directoryscan.throttle.limit.ms.per.sec"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 68ee16ca6f74b..2b97b2bdb8bf2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -169,6 +169,12 @@ public class DatanodeManager { /** The number of stale storages */ private volatile int numStaleStorages; + /** Enable/disable topology sorting for datanodes. Enable for colocated clusters + * while disable for compute/storage separate clusters since they won't be in + * the same host or rack. + */ + private final boolean topologySortDisabled; + /** * Number of blocks to check for each postponedMisreplicatedBlocks iteration */ @@ -237,6 +243,10 @@ public class DatanodeManager { this.namesystem = namesystem; this.blockManager = blockManager; + this.topologySortDisabled = conf.getBoolean( + DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY, + DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY_DEFAULT); + this.useDfsNetworkTopology = conf.getBoolean( DFSConfigKeys.DFS_USE_DFS_NETWORK_TOPOLOGY_KEY, DFSConfigKeys.DFS_USE_DFS_NETWORK_TOPOLOGY_DEFAULT); @@ -587,11 +597,14 @@ private void sortLocatedStripedBlock(final LocatedBlock lb, */ private void sortLocatedBlock(final LocatedBlock lb, String targetHost, Comparator comparator) { - // As it is possible for the separation of node manager and datanode, - // here we should get node but not datanode only . + // Resolving topology is expensive especially by calling external script. + // If datanode and nodemanager are not colocated, we can disable resolving topology + // since they will not locate on the same host and rack. + // Otherwise, it resolves nodemanager topology and compares/sorts the datanodes + // by comparing the distances between nodemanager and the datanodes. boolean nonDatanodeReader = false; Node client = getDatanodeByHost(targetHost); - if (client == null) { + if (client == null && !this.topologySortDisabled) { nonDatanodeReader = true; List hosts = new ArrayList<>(1); hosts.add(targetHost); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java index 5f5452ac16d59..7a53df72145d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java @@ -310,6 +310,65 @@ public void testSortLocatedBlocks() throws IOException, URISyntaxException { HelperFunction(null, 0); } + /** + * This test tests the case of disabling topology sorting + */ + @Test + public void testDisableTopologySorting() throws IOException { + Configuration conf = new Configuration(); + conf.setBoolean(DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY, true); + conf.setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + TestDatanodeManager.MyResolver.class, DNSToSwitchMapping.class); + + GenericTestUtils.LogCapturer log = GenericTestUtils.LogCapturer.captureLogs(DatanodeManager.LOG); + + FSNamesystem fsn = Mockito.mock(FSNamesystem.class); + Mockito.when(fsn.hasWriteLock()).thenReturn(true); + DatanodeManager dm = mockDatanodeManager(fsn, conf); + + // register 5 datanodes, each with different storage ID and type + DatanodeInfo[] locs = new DatanodeInfo[5]; + String[] storageIDs = new String[5]; + StorageType[] storageTypes = new StorageType[]{ + StorageType.ARCHIVE, + StorageType.DEFAULT, + StorageType.DISK, + StorageType.RAM_DISK, + StorageType.SSD + }; + for (int i = 0; i < 5; i++) { + // register new datanode + String uuid = "UUID-" + i; + String ip = "IP-" + i; + DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class); + Mockito.when(dr.getDatanodeUuid()).thenReturn(uuid); + Mockito.when(dr.getIpAddr()).thenReturn(ip); + Mockito.when(dr.getXferAddr()).thenReturn(ip + ":9000"); + Mockito.when(dr.getXferPort()).thenReturn(9000); + Mockito.when(dr.getSoftwareVersion()).thenReturn("version1"); + dm.registerDatanode(dr); + + // get location and storage information + locs[i] = dm.getDatanode(uuid); + storageIDs[i] = "storageID-" + i; + } + + // create LocatedBlock with above locations + ExtendedBlock b = new ExtendedBlock("somePoolID", 1234); + LocatedBlock block = new LocatedBlock(b, locs, storageIDs, storageTypes); + List blocks = new ArrayList<>(); + blocks.add(block); + + final String targetIp = "random_ip"; + + // sort block locations + dm.sortLocatedBlocks(targetIp, blocks); + + // No node topology resolution + assertFalse(log.getOutput().contains("Node Resolution failed")); + } + /** * Execute a functional topology script and make sure that helper * function works correctly From 2d5afe17da9ecf3b57ab3a2f2b579954354e46c2 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Tue, 31 Aug 2021 20:05:36 -0700 Subject: [PATCH 2/2] Support disabling datanode topology sorting for write to avoid resolving topolgy for client hosts Summary: This is to didable resolving topology for write traffic. When addBlock() is called, topology info will not be used to choose the DataNodes when dfs.disable.datanode.topology.sort is set to true. Test Plan: unit test Reviewers: O2125 User ekanth: Add reviewers, O2121 Project Hadoop-HDFS Dev: Add blocking reviewers, jingzhao, qifan Reviewed By: O2121 Project Hadoop-HDFS Dev: Add blocking reviewers, jingzhao, qifan Subscribers: O2133 Project Data Foundation - Hadoop Ecosystem: Add subscribers, O2131 Project Hadoop-HDFS Dev: Add subscribers JIRA Issues: HDFS-1732 Differential Revision: https://code.uberinternal.com/D6522861 --- .../server/blockmanagement/BlockManager.java | 16 +++++ .../blockmanagement/DatanodeManager.java | 13 +---- .../server/namenode/FSDirWriteFileOp.java | 2 +- .../hdfs/server/namenode/FSNamesystem.java | 2 +- .../blockmanagement/TestDatanodeManager.java | 12 +++- .../hdfs/server/namenode/TestAddBlock.java | 58 +++++++++++++++++++ 6 files changed, 86 insertions(+), 17 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index ead915f1d38de..9008bf0edd289 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -294,6 +294,10 @@ public long getTotalECBlockGroups() { return blocksMap.getECBlockGroups(); } + public boolean isTopologySortDisabled() { + return this.topologySortDisabled; + } + /** * redundancyRecheckInterval is how often namenode checks for new * reconstruction work. @@ -458,12 +462,24 @@ public long getTotalECBlockGroups() { /** Storages accessible from multiple DNs. */ private final ProvidedStorageMap providedStorageMap; + /** + * Enable/disable topology sorting for datanodes. Enable for colocated clusters + * while disable for compute/storage separate clusters since they won't be in + * the same host or rack. + */ + private final boolean topologySortDisabled; + public BlockManager(final Namesystem namesystem, boolean haEnabled, final Configuration conf) throws IOException { this.namesystem = namesystem; datanodeManager = new DatanodeManager(this, namesystem, conf); heartbeatManager = datanodeManager.getHeartbeatManager(); this.blockIdManager = new BlockIdManager(this); + + this.topologySortDisabled = conf.getBoolean( + DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY, + DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY_DEFAULT); + blocksPerPostpondedRescan = (int)Math.min(Integer.MAX_VALUE, datanodeManager.getBlocksPerPostponedMisreplicatedBlocksRescan()); rescannedMisreplicatedBlocks = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 2b97b2bdb8bf2..f14959670811c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -169,12 +169,6 @@ public class DatanodeManager { /** The number of stale storages */ private volatile int numStaleStorages; - /** Enable/disable topology sorting for datanodes. Enable for colocated clusters - * while disable for compute/storage separate clusters since they won't be in - * the same host or rack. - */ - private final boolean topologySortDisabled; - /** * Number of blocks to check for each postponedMisreplicatedBlocks iteration */ @@ -242,11 +236,6 @@ public class DatanodeManager { final Configuration conf) throws IOException { this.namesystem = namesystem; this.blockManager = blockManager; - - this.topologySortDisabled = conf.getBoolean( - DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY, - DFSConfigKeys.DFS_DISABLE_DATANODE_TOPOLOGY_SORT_KEY_DEFAULT); - this.useDfsNetworkTopology = conf.getBoolean( DFSConfigKeys.DFS_USE_DFS_NETWORK_TOPOLOGY_KEY, DFSConfigKeys.DFS_USE_DFS_NETWORK_TOPOLOGY_DEFAULT); @@ -604,7 +593,7 @@ private void sortLocatedBlock(final LocatedBlock lb, String targetHost, // by comparing the distances between nodemanager and the datanodes. boolean nonDatanodeReader = false; Node client = getDatanodeByHost(targetHost); - if (client == null && !this.topologySortDisabled) { + if (client == null && !blockManager.isTopologySortDisabled()) { nonDatanodeReader = true; List hosts = new ArrayList<>(1); hosts.add(targetHost); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index 0d9c6aeeb9c45..52678f19a0122 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -277,7 +277,7 @@ static DatanodeStorageInfo[] chooseTargetForNewBlock( // If client locality is ignored, clientNode remains 'null' to indicate if (!ignoreClientLocality) { clientNode = bm.getDatanodeManager().getDatanodeByHost(r.clientMachine); - if (clientNode == null) { + if (clientNode == null && !bm.isTopologySortDisabled()) { clientNode = getClientNode(bm, r.clientMachine); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 7ccaae9773e1f..77f9ea87f7450 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3081,7 +3081,7 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, readUnlock("getAdditionalDatanode"); } - if (clientnode == null) { + if (clientnode == null && !blockManager.isTopologySortDisabled()) { clientnode = FSDirWriteFileOp.getClientNode(blockManager, clientMachine); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java index 7a53df72145d3..73f308e5c966e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java @@ -36,6 +36,7 @@ import java.util.Random; import java.util.Set; +import org.apache.hadoop.test.GenericTestUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -84,8 +85,14 @@ public class TestDatanodeManager { final int NUM_ITERATIONS = 500; private static DatanodeManager mockDatanodeManager( - FSNamesystem fsn, Configuration conf) throws IOException { + FSNamesystem fsn, Configuration conf) throws IOException { + return mockDatanodeManager(fsn, conf, false); + } + + private static DatanodeManager mockDatanodeManager( + FSNamesystem fsn, Configuration conf, boolean topologySortDisabled) throws IOException { BlockManager bm = Mockito.mock(BlockManager.class); + Mockito.when(bm.isTopologySortDisabled()).thenReturn(topologySortDisabled); BlockReportLeaseManager blm = new BlockReportLeaseManager(conf); Mockito.when(bm.getBlockReportLeaseManager()).thenReturn(blm); DatanodeManager dm = new DatanodeManager(bm, fsn, conf); @@ -325,8 +332,7 @@ public void testDisableTopologySorting() throws IOException { FSNamesystem fsn = Mockito.mock(FSNamesystem.class); Mockito.when(fsn.hasWriteLock()).thenReturn(true); - DatanodeManager dm = mockDatanodeManager(fsn, conf); - + DatanodeManager dm = mockDatanodeManager(fsn, conf, true); // register 5 datanodes, each with different storage ID and type DatanodeInfo[] locs = new DatanodeInfo[5]; String[] storageIDs = new String[5]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java index 9e9890f10316a..771cad2b30fca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java @@ -18,21 +18,35 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.anyList; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import java.io.IOException; import java.util.EnumSet; +import org.apache.commons.lang.reflect.FieldUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; +import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; +import org.apache.hadoop.io.EnumSetWritable; +import org.apache.hadoop.net.DNSToSwitchMapping; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -155,4 +169,48 @@ public void testAddBlockUC() throws Exception { } } } + + @Test + public void testAddBlockWithoutTopology() throws Exception { + final String src = "/testAddBlockWithoutTopology"; + + final FSNamesystem ns = cluster.getNamesystem(); + final NamenodeProtocols nn = cluster.getNameNodeRpc(); + + // create file + HdfsFileStatus fileStatus = nn.create(src, FsPermission.getFileDefault(), + "clientName", + new EnumSetWritable(EnumSet.of(CreateFlag.CREATE)), + true, (short) 1, 1024, null, null, null); + + FieldUtils.writeField(ns.getBlockManager(), "topologySortDisabled", true, true); + DNSToSwitchMapping mockMapping = mock(DNSToSwitchMapping.class); + DNSToSwitchMapping origMapping = (DNSToSwitchMapping) FieldUtils.readField( + ns.getBlockManager().getDatanodeManager(), + "dnsToSwitchMapping", + true); + FieldUtils.writeField(ns.getBlockManager().getDatanodeManager(), + "dnsToSwitchMapping", + mockMapping, + true); + + LocatedBlock locatedBlock = nn.addBlock(src, "clientName", null, null, + HdfsConstants.GRANDFATHER_INODE_ID, null, null); + + assertEquals(1, locatedBlock.getLocations().length); + + LocatedBlock additionalLocatedBlock = nn.getAdditionalDatanode( + src, fileStatus.getFileId(), locatedBlock.getBlock(), + locatedBlock.getLocations(), locatedBlock.getStorageIDs(), + new DatanodeInfo[0],2, "clientName"); + + assertEquals(3, additionalLocatedBlock.getLocations().length); + + verify(mockMapping, never()).resolve(anyList()); + FieldUtils.writeField(ns.getBlockManager().getDatanodeManager(), + "dnsToSwitchMapping", + origMapping, + true); + FieldUtils.writeField(ns.getBlockManager(), "topologySortDisabled", false, true); + } }