diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3bee85223f5f2..9accdc01a34c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -20,6 +20,9 @@ Trunk (Unreleased) HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9) + HDFS-7281. Missing block is marked as corrupted block (Ming Ma via + Yongjun Zhang) + NEW FEATURES HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 1db1356678e87..53ffe0bc354e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -849,7 +849,8 @@ private LocatedBlock createLocatedBlock(final BlockInfoContiguous blk, final lon } final int numNodes = blocksMap.numNodes(blk); - final boolean isCorrupt = numCorruptNodes == numNodes; + final boolean isCorrupt = numCorruptNodes != 0 && + numCorruptNodes == numNodes; final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes; final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines]; int j = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 0cfe31a28213b..ac77394857d5d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -531,6 +531,7 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res int missing = 0; int corrupt = 0; long missize = 0; + long corruptSize = 0; int underReplicatedPerFile = 0; int misReplicatedPerFile = 0; StringBuilder report = new StringBuilder(); @@ -570,10 +571,11 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res // count corrupt blocks boolean isCorrupt = lBlk.isCorrupt(); if (isCorrupt) { + res.addCorrupt(block.getNumBytes()); corrupt++; - res.corruptBlocks++; - out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + - " block " + block.getBlockName()+"\n"); + corruptSize += block.getNumBytes(); + out.print("\n" + path + ": CORRUPT blockpool " + + block.getBlockPoolId() + " block " + block.getBlockName() + "\n"); } // count minimally replicated blocks @@ -619,7 +621,11 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res // report String blkName = block.toString(); report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes()); - if (totalReplicasPerBlock == 0) { + if (totalReplicasPerBlock == 0 && !isCorrupt) { + // If the block is corrupted, it means all its available replicas are + // corrupted. We don't mark it as missing given these available replicas + // might still be accessible as the block might be incorrectly marked as + // corrupted by client machines. report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; @@ -674,9 +680,15 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res // count corrupt file & move or delete if necessary if ((missing > 0) || (corrupt > 0)) { - if (!showFiles && (missing > 0)) { - out.print("\n" + path + ": MISSING " + missing - + " blocks of total size " + missize + " B."); + if (!showFiles) { + if (missing > 0) { + out.print("\n" + path + ": MISSING " + missing + + " blocks of total size " + missize + " B."); + } + if (corrupt > 0) { + out.print("\n" + path + ": CORRUPT " + corrupt + + " blocks of total size " + corruptSize + " B."); + } } res.corruptFiles++; if (isOpen) { @@ -688,9 +700,16 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res } if (showFiles) { - if (missing > 0) { - out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); - } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { + if (missing > 0 || corrupt > 0) { + if (missing > 0) { + out.print(" MISSING " + missing + " blocks of total size " + + missize + " B\n"); + } + if (corrupt > 0) { + out.print(" CORRUPT " + corrupt + " blocks of total size " + + corruptSize + " B\n"); + } + } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { out.print(" OK\n"); } if (showBlocks) { @@ -956,6 +975,7 @@ static class Result { long missingSize = 0L; long corruptFiles = 0L; long corruptBlocks = 0L; + long corruptSize = 0L; long excessiveReplicas = 0L; long missingReplicas = 0L; long decommissionedReplicas = 0L; @@ -998,7 +1018,13 @@ void addMissing(String id, long size) { missingIds.add(id); missingSize += size; } - + + /** Add a corrupt block. */ + void addCorrupt(long size) { + corruptBlocks++; + corruptSize += size; + } + /** Return the actual replication factor. */ float getReplicationFactor() { if (totalBlocks == 0) @@ -1051,7 +1077,8 @@ public String toString() { "\n MISSING SIZE:\t\t").append(missingSize).append(" B"); } if (corruptBlocks > 0) { - res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks); + res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks).append( + "\n CORRUPT SIZE:\t\t").append(corruptSize).append(" B"); } } res.append("\n ********************************"); @@ -1086,7 +1113,8 @@ public String toString() { } res.append("\n Default replication factor:\t").append(replication) .append("\n Average block replication:\t").append( - getReplicationFactor()).append("\n Corrupt blocks:\t\t").append( + getReplicationFactor()).append("\n Missing blocks:\t\t").append( + missingIds.size()).append("\n Corrupt blocks:\t\t").append( corruptBlocks).append("\n Missing replicas:\t\t").append( missingReplicas); if (totalReplicas > 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 8fe273b05908f..1ce09e1767343 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -120,7 +120,10 @@ public class TestFsck { "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s" + "cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" + "perm=null\\s" + "proto=.*"); - + + static final Pattern numMissingBlocksPattern = Pattern.compile( + ".*Missing blocks:\t\t([0123456789]*).*"); + static final Pattern numCorruptBlocksPattern = Pattern.compile( ".*Corrupt blocks:\t\t([0123456789]*).*"); @@ -360,19 +363,27 @@ public void testFsckMove() throws Exception { // Wait for fsck to discover all the missing blocks while (true) { outStr = runFsck(conf, 1, false, "/"); + String numMissing = null; String numCorrupt = null; for (String line : outStr.split(LINE_SEPARATOR)) { - Matcher m = numCorruptBlocksPattern.matcher(line); + Matcher m = numMissingBlocksPattern.matcher(line); + if (m.matches()) { + numMissing = m.group(1); + } + m = numCorruptBlocksPattern.matcher(line); if (m.matches()) { numCorrupt = m.group(1); + } + if (numMissing != null && numCorrupt != null) { break; } } - if (numCorrupt == null) { - throw new IOException("failed to find number of corrupt " + - "blocks in fsck output."); + if (numMissing == null || numCorrupt == null) { + throw new IOException("failed to find number of missing or corrupt" + + " blocks in fsck output."); } - if (numCorrupt.equals(Integer.toString(totalMissingBlocks))) { + if (numMissing.equals(Integer.toString(totalMissingBlocks))) { + assertTrue(numCorrupt.equals(Integer.toString(0))); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); break; }