diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java index e8d80e5dc924..fd8cf05b294e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/StorageContainerNodeProtocol.java @@ -72,24 +72,20 @@ RegisteredCommand register(DatanodeDetails datanodeDetails, * TODO: Cleanup and update tests, HDDS-9642. * * @param datanodeDetails - Datanode ID. - * @param layoutVersionInfo - Layout Version Proto. * @return Commands to be sent to the datanode. */ - default List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutVersionInfo) { - return processHeartbeat(datanodeDetails, layoutVersionInfo, null); + default List processHeartbeat(DatanodeDetails datanodeDetails) { + return processHeartbeat(datanodeDetails, null); }; /** * Send heartbeat to indicate the datanode is alive and doing well. * @param datanodeDetails - Datanode ID. - * @param layoutVersionInfo - Layout Version Proto. * @param queueReport - The CommandQueueReportProto report from the * heartbeating datanode. * @return Commands to be sent to the datanode. */ List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutVersionInfo, CommandQueueReportProto queueReport); /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index cc5fb9aa776e..152c8fd659d9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -511,19 +511,15 @@ private boolean updateDnsToUuidMap( * Send heartbeat to indicate the datanode is alive and doing well. * * @param datanodeDetails - DatanodeDetailsProto. - * @param layoutInfo - Layout Version Proto. * @return SCMheartbeat response. */ @Override public List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutInfo, - CommandQueueReportProto queueReport) { + CommandQueueReportProto queueReport) { Preconditions.checkNotNull(datanodeDetails, "Heartbeat is missing " + "DatanodeDetails."); try { nodeStateManager.updateLastHeartbeatTime(datanodeDetails); - nodeStateManager.updateLastKnownLayoutVersion(datanodeDetails, - layoutInfo); metrics.incNumHBProcessed(); updateDatanodeOpState(datanodeDetails); } catch (NodeNotFoundException e) { @@ -686,6 +682,15 @@ public void processLayoutVersionReport(DatanodeDetails datanodeDetails, layoutVersionReport.toString().replaceAll("\n", "\\\\n")); } + try { + nodeStateManager.updateLastKnownLayoutVersion(datanodeDetails, + layoutVersionReport); + } catch (NodeNotFoundException e) { + LOG.error("SCM trying to process Layout Version from an " + + "unregistered node {}.", datanodeDetails); + return; + } + // Software layout version is hardcoded to the SCM. int scmSlv = scmLayoutVersionManager.getSoftwareLayoutVersion(); int dnSlv = layoutVersionReport.getSoftwareLayoutVersion(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java index 38db618ef539..b6dc6f599bd6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java @@ -130,8 +130,7 @@ public List dispatch(SCMHeartbeatRequestProto heartbeat) { commandQueueReport = heartbeat.getCommandQueueReport(); } // should we dispatch heartbeat through eventPublisher? - commands = nodeManager.processHeartbeat(datanodeDetails, - layoutVersion, commandQueueReport); + commands = nodeManager.processHeartbeat(datanodeDetails, commandQueueReport); if (heartbeat.hasNodeReport()) { LOG.debug("Dispatching Node Report."); eventPublisher.fireEvent( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index 480f82976f40..84f3684ab7cc 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -772,13 +772,11 @@ private synchronized void addEntryTodnsToUuidMap( * Send heartbeat to indicate the datanode is alive and doing well. * * @param datanodeDetails - Datanode ID. - * @param layoutInfo - DataNode Layout info * @param commandQueueReportProto - Command Queue Report Proto * @return SCMheartbeat response list */ @Override public List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutInfo, CommandQueueReportProto commandQueueReportProto) { return null; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java index 2bd13d4489ef..9649159de3f1 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java @@ -428,7 +428,6 @@ public RegisteredCommand register(DatanodeDetails datanodeDetails, @Override public List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutInfo, CommandQueueReportProto commandQueueReportProto) { return null; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index 8dd6914e6449..0e98267d3725 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -25,7 +25,6 @@ import java.util.Arrays; import java.util.List; import java.util.UUID; -import java.util.concurrent.TimeoutException; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; @@ -35,7 +34,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.HddsTestUtils; @@ -67,7 +65,6 @@ import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.SCMTestUtils; -import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.apache.hadoop.test.PathUtils; import org.apache.commons.io.IOUtils; import org.apache.ozone.test.GenericTestUtils; @@ -79,7 +76,6 @@ import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; -import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto; import static org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager.maxLayoutVersion; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -182,7 +178,7 @@ ContainerManager createContainerManager() */ @Test public void testContainerPlacementCapacity() throws IOException, - InterruptedException, TimeoutException { + InterruptedException { final int nodeCount = 4; final long capacity = 10L * OzoneConsts.GB; final long used = 2L * OzoneConsts.GB; @@ -201,11 +197,6 @@ public void testContainerPlacementCapacity() throws IOException, List datanodes = HddsTestUtils .getListOfRegisteredDatanodeDetails(scmNodeManager, nodeCount); XceiverClientManager xceiverClientManager = null; - LayoutVersionManager versionManager = - scmNodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = - toLayoutVersionProto(versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); try { for (DatanodeDetails datanodeDetails : datanodes) { UUID dnId = datanodeDetails.getUuid(); @@ -221,7 +212,7 @@ public void testContainerPlacementCapacity() throws IOException, Arrays.asList(report), emptyList()); datanodeInfo.updateStorageReports( nodeReportProto.getStorageReportList()); - scmNodeManager.processHeartbeat(datanodeDetails, layoutInfo); + scmNodeManager.processHeartbeat(datanodeDetails); } //TODO: wait for heartbeat to be processed diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java index 332d762a4cdc..09f0dd59b9f9 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java @@ -41,7 +41,6 @@ import java.util.ArrayList; import static java.util.Collections.singletonList; -import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.defaultLayoutVersionProto; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -165,7 +164,7 @@ public void testNodesCanBeDecommissionedAndRecommissioned() // Attempt to decommission on dn(9) which has another instance at // dn(11) with identical ports. - nodeManager.processHeartbeat(dns.get(9), defaultLayoutVersionProto()); + nodeManager.processHeartbeat(dns.get(9)); DatanodeDetails duplicatePorts = dns.get(9); decom.decommissionNodes(singletonList(duplicatePorts.getIpAddress())); assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, @@ -237,7 +236,7 @@ public void testNodesCanBeDecommissionedAndRecommissionedMixedPorts() // Now decommission one of the DNs with the duplicate port DatanodeDetails expectedDN = dns.get(9); - nodeManager.processHeartbeat(expectedDN, defaultLayoutVersionProto()); + nodeManager.processHeartbeat(expectedDN); decom.decommissionNodes(singletonList( expectedDN.getIpAddress() + ":" + ratisPort)); @@ -287,7 +286,7 @@ public void testNodesCanBePutIntoMaintenanceAndRecommissioned() // Attempt to enable maintenance on dn(9) which has another instance at // dn(11) with identical ports. - nodeManager.processHeartbeat(dns.get(9), defaultLayoutVersionProto()); + nodeManager.processHeartbeat(dns.get(9)); DatanodeDetails duplicatePorts = dns.get(9); decom.startMaintenanceNodes(singletonList(duplicatePorts.getIpAddress()), 100); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 930774a54bf3..56cf936d2298 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -73,7 +73,6 @@ import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.Time; @@ -222,17 +221,12 @@ public void testScmHeartbeat() throws IOException, InterruptedException, AuthenticationException { try (SCMNodeManager nodeManager = createNodeManager(getConf())) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); int registeredNodes = 5; // Send some heartbeats from different nodes. for (int x = 0; x < registeredNodes; x++) { DatanodeDetails datanodeDetails = HddsTestUtils .createRandomDatanodeAndRegister(nodeManager); - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + nodeManager.processHeartbeat(datanodeDetails); } //TODO: wait for heartbeat to be processed @@ -379,7 +373,7 @@ private void assertPipelineClosedAfterLayoutHeartbeat( allNodes); // node sends incorrect layout. - nodeManager.processHeartbeat(node, layout); + nodeManager.processLayoutVersionReport(node, layout); // Its pipelines should be closed then removed, meaning there is not // enough nodes for factor 3 pipelines. @@ -447,8 +441,10 @@ public void testScmLayoutOnRegister() assertPipelineCreationFailsWithNotEnoughNodes(1); // Heartbeat bad MLV nodes back to healthy. - nodeManager.processHeartbeat(badMlvNode1, CORRECT_LAYOUT_PROTO); - nodeManager.processHeartbeat(badMlvNode2, CORRECT_LAYOUT_PROTO); + nodeManager.processLayoutVersionReport(badMlvNode1, CORRECT_LAYOUT_PROTO); + nodeManager.processLayoutVersionReport(badMlvNode2, CORRECT_LAYOUT_PROTO); + nodeManager.processHeartbeat(badMlvNode1); + nodeManager.processHeartbeat(badMlvNode2); // After moving out of healthy readonly, pipeline creation should be // triggered. @@ -561,14 +557,8 @@ public void testScmShutdown() SCMNodeManager nodeManager = createNodeManager(conf); DatanodeDetails datanodeDetails = HddsTestUtils .createRandomDatanodeAndRegister(nodeManager); - LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - nodeManager.close(); - // These should never be processed. - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + nodeManager.processHeartbeat(datanodeDetails); // Let us just wait for 2 seconds to prove that HBs are not processed. Thread.sleep(2 * 1000); @@ -591,16 +581,10 @@ public void testScmHealthyNodeCount() final int count = 10; try (SCMNodeManager nodeManager = createNodeManager(conf)) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - for (int x = 0; x < count; x++) { DatanodeDetails datanodeDetails = HddsTestUtils .createRandomDatanodeAndRegister(nodeManager); - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + nodeManager.processHeartbeat(datanodeDetails); } //TODO: wait for heartbeat to be processed Thread.sleep(4 * 1000); @@ -660,12 +644,6 @@ public void testSetNodeOpStateAndCommandFired() DatanodeDetails dn = HddsTestUtils.createRandomDatanodeAndRegister( nodeManager); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - final LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - long expiry = System.currentTimeMillis() / 1000 + 1000; nodeManager.setNodeOperationalState(dn, HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE, expiry); @@ -673,7 +651,7 @@ public void testSetNodeOpStateAndCommandFired() // If found mismatch, leader SCM fires a SetNodeOperationalStateCommand // to update the opState persisted in Datanode. scm.getScmContext().updateLeaderAndTerm(true, 1); - List commands = nodeManager.processHeartbeat(dn, layoutInfo); + List commands = nodeManager.processHeartbeat(dn); assertEquals(SetNodeOperationalStateCommand.class, commands.get(0).getClass()); @@ -682,7 +660,7 @@ public void testSetNodeOpStateAndCommandFired() // If found mismatch, follower SCM update its own opState according // to the heartbeat, and no SCMCommand will be fired. scm.getScmContext().updateLeaderAndTerm(false, 2); - commands = nodeManager.processHeartbeat(dn, layoutInfo); + commands = nodeManager.processHeartbeat(dn); assertEquals(0, commands.size()); @@ -716,11 +694,6 @@ public void testScmDetectStaleAndDeadNode() try (SCMNodeManager nodeManager = createNodeManager(conf)) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); List nodeList = createNodeSet(nodeManager, nodeCount); @@ -728,18 +701,18 @@ public void testScmDetectStaleAndDeadNode() nodeManager); // Heartbeat once - nodeManager.processHeartbeat(staleNode, layoutInfo); + nodeManager.processHeartbeat(staleNode); // Heartbeat all other nodes. for (DatanodeDetails dn : nodeList) { - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); } // Wait for 2 seconds .. and heartbeat good nodes again. Thread.sleep(2 * 1000); for (DatanodeDetails dn : nodeList) { - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); } // Wait for 2 seconds, wait a total of 4 seconds to make sure that the @@ -762,7 +735,7 @@ public void testScmDetectStaleAndDeadNode() Thread.sleep(1000); // heartbeat good nodes again. for (DatanodeDetails dn : nodeList) { - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); } // 6 seconds is the dead window for this test , so we wait a total of @@ -821,18 +794,13 @@ public void testScmHandleJvmPause() deadNodeInterval, SECONDS); try (SCMNodeManager nodeManager = createNodeManager(conf)) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); DatanodeDetails node1 = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); DatanodeDetails node2 = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); - nodeManager.processHeartbeat(node1, layoutInfo); - nodeManager.processHeartbeat(node2, layoutInfo); + nodeManager.processHeartbeat(node1); + nodeManager.processHeartbeat(node2); // Sleep so that heartbeat processing thread gets to run. Thread.sleep(1000); @@ -878,7 +846,7 @@ public void testScmHandleJvmPause() assertEquals(2, nodeManager.getNodeCount(NodeStatus.inServiceHealthy())); // Step 5 : heartbeat for node1 - nodeManager.processHeartbeat(node1, layoutInfo); + nodeManager.processHeartbeat(node1); // Step 6 : wait for health check process to run Thread.sleep(1000); @@ -1001,8 +969,6 @@ public void testProcessCommandQueueReport() SCMNodeManager nodeManager = new SCMNodeManager(conf, scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf), scmContext, lvm); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - lvm.getMetadataLayoutVersion(), lvm.getSoftwareLayoutVersion()); DatanodeDetails node1 = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); @@ -1022,7 +988,7 @@ scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf), assertEquals(5, nodeManager.getTotalDatanodeCommandCount( node1, SCMCommandProto.Type.deleteBlocksCommand)); - nodeManager.processHeartbeat(node1, layoutInfo, + nodeManager.processHeartbeat(node1, CommandQueueReportProto.newBuilder() .addCommand(SCMCommandProto.Type.replicateContainerCommand) .addCount(123) @@ -1052,7 +1018,7 @@ scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf), // Send another report missing an earlier entry, and ensure it is not // still reported as a stale value. - nodeManager.processHeartbeat(node1, layoutInfo, + nodeManager.processHeartbeat(node1, CommandQueueReportProto.newBuilder() .addCommand(SCMCommandProto.Type.closeContainerCommand) .addCount(11) @@ -1129,7 +1095,7 @@ public void testCommandCount() public void testScmCheckForErrorOnNullDatanodeDetails() throws IOException, AuthenticationException { try (SCMNodeManager nodeManager = createNodeManager(getConf())) { NullPointerException npe = assertThrows(NullPointerException.class, - () -> nodeManager.processHeartbeat(null, null)); + () -> nodeManager.processHeartbeat(null)); assertThat(npe).hasMessage("Heartbeat is missing DatanodeDetails."); } } @@ -1198,20 +1164,15 @@ public void testScmClusterIsInExpectedState1() * Cluster state: Healthy: All nodes are heartbeat-ing like normal. */ try (SCMNodeManager nodeManager = createNodeManager(conf)) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); DatanodeDetails healthyNode = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); DatanodeDetails staleNode = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); DatanodeDetails deadNode = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); - nodeManager.processHeartbeat(healthyNode, layoutInfo); - nodeManager.processHeartbeat(staleNode, layoutInfo); - nodeManager.processHeartbeat(deadNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); + nodeManager.processHeartbeat(staleNode); + nodeManager.processHeartbeat(deadNode); // Sleep so that heartbeat processing thread gets to run. Thread.sleep(500); @@ -1237,12 +1198,12 @@ public void testScmClusterIsInExpectedState1() * the 3 second windows. */ - nodeManager.processHeartbeat(healthyNode, layoutInfo); - nodeManager.processHeartbeat(staleNode, layoutInfo); - nodeManager.processHeartbeat(deadNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); + nodeManager.processHeartbeat(staleNode); + nodeManager.processHeartbeat(deadNode); Thread.sleep(1500); - nodeManager.processHeartbeat(healthyNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); Thread.sleep(2 * 1000); assertEquals(1, nodeManager.getNodeCount(NodeStatus.inServiceHealthy())); @@ -1263,10 +1224,10 @@ public void testScmClusterIsInExpectedState1() * staleNode to move to stale state and deadNode to move to dead state. */ - nodeManager.processHeartbeat(healthyNode, layoutInfo); - nodeManager.processHeartbeat(staleNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); + nodeManager.processHeartbeat(staleNode); Thread.sleep(1500); - nodeManager.processHeartbeat(healthyNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); Thread.sleep(2 * 1000); // 3.5 seconds have elapsed for stale node, so it moves into Stale. @@ -1298,9 +1259,9 @@ public void testScmClusterIsInExpectedState1() * Cluster State : let us heartbeat all the nodes and verify that we get * back all the nodes in healthy state. */ - nodeManager.processHeartbeat(healthyNode, layoutInfo); - nodeManager.processHeartbeat(staleNode, layoutInfo); - nodeManager.processHeartbeat(deadNode, layoutInfo); + nodeManager.processHeartbeat(healthyNode); + nodeManager.processHeartbeat(staleNode); + nodeManager.processHeartbeat(deadNode); Thread.sleep(500); //Assert all nodes are healthy. assertEquals(3, nodeManager.getAllNodes().size()); @@ -1319,13 +1280,9 @@ public void testScmClusterIsInExpectedState1() private void heartbeatNodeSet(SCMNodeManager manager, List list, int sleepDuration) throws InterruptedException { - LayoutVersionManager versionManager = manager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); while (!Thread.currentThread().isInterrupted()) { for (DatanodeDetails dn : list) { - manager.processHeartbeat(dn, layoutInfo); + manager.processHeartbeat(dn); } Thread.sleep(sleepDuration); } @@ -1408,16 +1365,10 @@ public void testScmClusterIsInExpectedState2() } }; - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - // No Thread just one time HBs the node manager, so that these will be // marked as dead nodes eventually. for (DatanodeDetails dn : deadNodeList) { - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); } @@ -1544,11 +1495,6 @@ public void testScmStatsFromNodeReport() final long remaining = capacity - used; List dnList = new ArrayList<>(nodeCount); try (SCMNodeManager nodeManager = createNodeManager(conf)) { - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); EventQueue eventQueue = (EventQueue) scm.getEventQueue(); for (int x = 0; x < nodeCount; x++) { @@ -1561,7 +1507,7 @@ public void testScmStatsFromNodeReport() .createStorageReport(dnId, storagePath, capacity, used, free, null); nodeManager.register(dn, HddsTestUtils.createNodeReport( Arrays.asList(report), emptyList()), null); - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); } //TODO: wait for EventQueue to be processed eventQueue.processAll(8000L); @@ -1653,12 +1599,7 @@ public void tesVolumeInfoFromNodeReport() } nodeManager.register(dn, HddsTestUtils.createNodeReport(reports, emptyList()), null); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - nodeManager.processHeartbeat(dn, layoutInfo); + nodeManager.processHeartbeat(dn); //TODO: wait for EventQueue to be processed eventQueue.processAll(8000L); @@ -1711,12 +1652,7 @@ public void testScmNodeReportUpdate() nodeReportHandler.onMessage( new NodeReportFromDatanode(datanodeDetails, nodeReportProto), publisher); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + nodeManager.processHeartbeat(datanodeDetails); Thread.sleep(100); } @@ -1791,13 +1727,7 @@ public void testScmNodeReportUpdate() foundRemaining = nodeManager.getStats().getRemaining().get(); assertEquals(0, foundRemaining); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); - - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + nodeManager.processHeartbeat(datanodeDetails); // Wait up to 5 seconds so that the dead node becomes healthy // Verify usage info should be updated. @@ -1846,14 +1776,9 @@ public void testHandlingSCMCommandEvent() new CloseContainerCommand(1L, PipelineID.randomId()))); - LayoutVersionManager versionManager = - nodemanager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); eq.processAll(1000L); List command = - nodemanager.processHeartbeat(datanodeDetails, layoutInfo); + nodemanager.processHeartbeat(datanodeDetails); // With dh registered, SCM will send create pipeline command to dn assertThat(command.size()).isGreaterThanOrEqualTo(1); assertTrue(command.get(0).getClass().equals( @@ -1983,16 +1908,11 @@ public void testGetNodeInfo() Arrays.asList(report), emptyList()), HddsTestUtils.getRandomPipelineReports()); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = toLayoutVersionProto( - versionManager.getMetadataLayoutVersion(), - versionManager.getSoftwareLayoutVersion()); nodeManager.register(datanodeDetails, HddsTestUtils.createNodeReport(Arrays.asList(report), emptyList()), - HddsTestUtils.getRandomPipelineReports(), layoutInfo); - nodeManager.processHeartbeat(datanodeDetails, layoutInfo); + HddsTestUtils.getRandomPipelineReports()); + nodeManager.processHeartbeat(datanodeDetails); if (i == 5) { nodeManager.setNodeOperationalState(datanodeDetails, HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeMetrics.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeMetrics.java index 53f20d531ef8..20c6aa2de37a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeMetrics.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeMetrics.java @@ -26,8 +26,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.LayoutVersionProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; @@ -38,7 +36,6 @@ import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; import org.apache.hadoop.metrics2.MetricsRecordBuilder; -import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -98,16 +95,8 @@ public static void teardown() throws IOException { @Test public void testHBProcessing() throws InterruptedException { long hbProcessed = getCounter("NumHBProcessed"); - createNodeReport(); - - LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = LayoutVersionProto.newBuilder() - .setSoftwareLayoutVersion(versionManager.getSoftwareLayoutVersion()) - .setMetadataLayoutVersion(versionManager.getMetadataLayoutVersion()) - .build(); - nodeManager.processHeartbeat(registeredDatanode, layoutInfo); - + nodeManager.processHeartbeat(registeredDatanode); assertEquals(hbProcessed + 1, getCounter("NumHBProcessed"), "NumHBProcessed"); } @@ -117,17 +106,8 @@ public void testHBProcessing() throws InterruptedException { */ @Test public void testHBProcessingFailure() { - long hbProcessedFailed = getCounter("NumHBProcessingFailed"); - - LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = LayoutVersionProto.newBuilder() - .setSoftwareLayoutVersion(versionManager.getSoftwareLayoutVersion()) - .setMetadataLayoutVersion(versionManager.getMetadataLayoutVersion()) - .build(); - nodeManager.processHeartbeat(MockDatanodeDetails - .randomDatanodeDetails(), layoutInfo); - + nodeManager.processHeartbeat(MockDatanodeDetails.randomDatanodeDetails()); assertEquals(hbProcessedFailed + 1, getCounter("NumHBProcessingFailed"), "NumHBProcessingFailed"); } @@ -252,13 +232,7 @@ public void testNodeCountAndInfoMetricsReported() throws Exception { getMetrics(SCMNodeMetrics.class.getSimpleName())); assertGauge("TotalUsed", 10L, getMetrics(SCMNodeMetrics.class.getSimpleName())); - - LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager(); - LayoutVersionProto layoutInfo = LayoutVersionProto.newBuilder() - .setSoftwareLayoutVersion(versionManager.getSoftwareLayoutVersion()) - .setMetadataLayoutVersion(versionManager.getMetadataLayoutVersion()) - .build(); - nodeManager.processHeartbeat(registeredDatanode, layoutInfo); + nodeManager.processHeartbeat(registeredDatanode); sleep(4000); metricsSource = getMetrics(SCMNodeMetrics.SOURCE_NAME); assertGauge("InServiceHealthyReadonlyNodes", 0, metricsSource); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestStatisticsUpdate.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestStatisticsUpdate.java index 8aff3dd28aaa..0ef28f658d45 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestStatisticsUpdate.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestStatisticsUpdate.java @@ -22,8 +22,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.protocol.proto @@ -40,7 +38,6 @@ import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.container.common.SCMTestUtils; -import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -122,19 +119,13 @@ public void testStatisticsUpdate() throws Exception { //TODO: Support logic to mark a node as dead in NodeManager. - LayoutVersionManager versionManager = nodeManager.getLayoutVersionManager(); - StorageContainerDatanodeProtocolProtos.LayoutVersionProto layoutInfo = - StorageContainerDatanodeProtocolProtos.LayoutVersionProto.newBuilder() - .setSoftwareLayoutVersion(versionManager.getSoftwareLayoutVersion()) - .setMetadataLayoutVersion(versionManager.getMetadataLayoutVersion()) - .build(); - nodeManager.processHeartbeat(datanode2, layoutInfo); + nodeManager.processHeartbeat(datanode2); Thread.sleep(1000); - nodeManager.processHeartbeat(datanode2, layoutInfo); + nodeManager.processHeartbeat(datanode2); Thread.sleep(1000); - nodeManager.processHeartbeat(datanode2, layoutInfo); + nodeManager.processHeartbeat(datanode2); Thread.sleep(1000); - nodeManager.processHeartbeat(datanode2, layoutInfo); + nodeManager.processHeartbeat(datanode2); //THEN statistics in SCM should changed. stat = nodeManager.getStats(); assertEquals(200L, stat.getCapacity().get()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index 0a865043356f..92a6fd455d8b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -371,13 +371,11 @@ public RegisteredCommand register(DatanodeDetails dd, * Send heartbeat to indicate the datanode is alive and doing well. * * @param dd - Datanode Details. - * @param layoutInfo - Layout Version Proto * @param commandQueueReportProto - Command Queue Report Proto * @return SCMheartbeat response list */ @Override public List processHeartbeat(DatanodeDetails dd, - LayoutVersionProto layoutInfo, CommandQueueReportProto commandQueueReportProto) { return null; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 3e9d15d490b2..07787b87f375 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.IncrementalContainerReportProto; @@ -85,7 +84,6 @@ import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ExitUtil; @@ -514,17 +512,8 @@ public void testBlockDeletingThrottling() throws Exception { GenericTestUtils.waitFor(() -> { NodeManager nodeManager = cluster.getStorageContainerManager() .getScmNodeManager(); - LayoutVersionManager versionManager = - nodeManager.getLayoutVersionManager(); - StorageContainerDatanodeProtocolProtos.LayoutVersionProto layoutInfo - = StorageContainerDatanodeProtocolProtos.LayoutVersionProto - .newBuilder() - .setSoftwareLayoutVersion(versionManager.getSoftwareLayoutVersion()) - .setMetadataLayoutVersion(versionManager.getMetadataLayoutVersion()) - .build(); List commands = nodeManager.processHeartbeat( - nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0), - layoutInfo); + nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0)); if (commands != null) { for (SCMCommand cmd : commands) { if (cmd.getType() == SCMCommandProto.Type.deleteBlocksCommand) { diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java index 2db858616156..ab919b7d9719 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java @@ -220,12 +220,11 @@ public void onMessage(CommandForDatanode commandForDatanode, * Send heartbeat to indicate the datanode is alive and doing well. * * @param datanodeDetails - DatanodeDetailsProto. - * @param layoutInfo - Layout Version Proto * @return SCMheartbeat response. */ @Override public List processHeartbeat(DatanodeDetails datanodeDetails, - LayoutVersionProto layoutInfo, CommandQueueReportProto queueReport) { + CommandQueueReportProto queueReport) { List cmds = new ArrayList<>(); long currentTime = Time.now(); if (needUpdate(datanodeDetails, currentTime)) { @@ -237,8 +236,7 @@ public List processHeartbeat(DatanodeDetails datanodeDetails, } // Update heartbeat map with current time datanodeHeartbeatMap.put(datanodeDetails.getUuid(), Time.now()); - cmds.addAll(super.processHeartbeat(datanodeDetails, - layoutInfo, queueReport)); + cmds.addAll(super.processHeartbeat(datanodeDetails, queueReport)); return cmds.stream() .filter(c -> ALLOWED_COMMANDS.contains(c.getType())) .collect(toList()); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java index 80ec025f91f8..99bb482cb51e 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java @@ -22,7 +22,6 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_NAMES; -import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.defaultLayoutVersionProto; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_DIRS; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -138,8 +137,7 @@ public void testReconNodeDB() throws IOException, NodeNotFoundException { // Upon processing the heartbeat, the illegal command should be filtered out List returnedCmds = - reconNodeManager.processHeartbeat(datanodeDetails, - defaultLayoutVersionProto()); + reconNodeManager.processHeartbeat(datanodeDetails); assertEquals(1, returnedCmds.size()); assertEquals(SCMCommandProto.Type.reregisterCommand, returnedCmds.get(0).getType()); @@ -148,8 +146,7 @@ public void testReconNodeDB() throws IOException, NodeNotFoundException { datanodeDetails.setPersistedOpState( HddsProtos.NodeOperationalState.DECOMMISSIONED); datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L); - reconNodeManager.processHeartbeat(datanodeDetails, - defaultLayoutVersionProto()); + reconNodeManager.processHeartbeat(datanodeDetails); // Check both persistedOpState and NodeStatus#operationalState assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, dnDetails.getPersistedOpState()); @@ -238,8 +235,7 @@ public void testDatanodeUpdate() throws IOException { datanodeDetails.setHostName("hostname2"); // Upon processing the heartbeat, the illegal command should be filtered out List returnedCmds = - reconNodeManager.processHeartbeat(datanodeDetails, - defaultLayoutVersionProto()); + reconNodeManager.processHeartbeat(datanodeDetails); assertEquals(1, returnedCmds.size()); assertEquals(SCMCommandProto.Type.reregisterCommand, returnedCmds.get(0).getType());