Skip to content

Commit

Permalink
HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be t…
Browse files Browse the repository at this point in the history
…oo verbose and hurt performance. Contributed by Rui Li
  • Loading branch information
Kai Zheng authored and Zhe Zhang committed Sep 22, 2015
1 parent 1080c37 commit 7bff8ca
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 3 deletions.
3 changes: 3 additions & 0 deletions hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt
Expand Up @@ -444,3 +444,6 @@


HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to
getAllErasureCodingPolicies. (Rakesh R via zhz) getAllErasureCodingPolicies. (Rakesh R via zhz)

HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too
verbose and hurt performance. (Rui Li via Kai Zheng)
Expand Up @@ -1057,9 +1057,7 @@ protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block,
} }
} }
if (chosenNode == null) { if (chosenNode == null) {
DFSClient.LOG.warn("No live nodes contain block " + block.getBlock() + reportLostBlock(block, ignoredNodes);
" after checking nodes = " + Arrays.toString(nodes) +
", ignoredNodes = " + ignoredNodes);
return null; return null;
} }
final String dnAddr = final String dnAddr =
Expand All @@ -1071,6 +1069,17 @@ protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block,
return new DNAddrPair(chosenNode, targetAddr, storageType); return new DNAddrPair(chosenNode, targetAddr, storageType);
} }


/**
* Warn the user of a lost block
*/
protected void reportLostBlock(LocatedBlock lostBlock,
Collection<DatanodeInfo> ignoredNodes) {
DatanodeInfo[] nodes = lostBlock.getLocations();
DFSClient.LOG.warn("No live nodes contain block " + lostBlock.getBlock() +
" after checking nodes = " + Arrays.toString(nodes) +
", ignoredNodes = " + ignoredNodes);
}

private static String getBestNodeDNAddrPairErrorString( private static String getBestNodeDNAddrPairErrorString(
DatanodeInfo nodes[], AbstractMap<DatanodeInfo, DatanodeInfo nodes[], AbstractMap<DatanodeInfo,
DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) { DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) {
Expand Down
Expand Up @@ -45,8 +45,11 @@
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
Expand Down Expand Up @@ -154,6 +157,17 @@ void skip() {
private StripeRange curStripeRange; private StripeRange curStripeRange;
private final CompletionService<Void> readingService; private final CompletionService<Void> readingService;


/**
* When warning the user of a lost block in striping mode, we remember the
* dead nodes we've logged. All other striping blocks on these nodes can be
* considered lost too, and we don't want to log a warning for each of them.
* This is to prevent the log from being too verbose. Refer to HDFS-8920.
*
* To minimize the overhead, we only store the datanodeUuid in this set
*/
private final Set<String> warnedNodes = Collections.newSetFromMap(
new ConcurrentHashMap<String, Boolean>());

DFSStripedInputStream(DFSClient dfsClient, String src, DFSStripedInputStream(DFSClient dfsClient, String src,
boolean verifyChecksum, ErasureCodingPolicy ecPolicy, boolean verifyChecksum, ErasureCodingPolicy ecPolicy,
LocatedBlocks locatedBlocks) throws IOException { LocatedBlocks locatedBlocks) throws IOException {
Expand Down Expand Up @@ -527,6 +541,26 @@ protected void fetchBlockByteRange(LocatedBlock block, long start,
} }
} }


@Override
protected void reportLostBlock(LocatedBlock lostBlock,
Collection<DatanodeInfo> ignoredNodes) {
DatanodeInfo[] nodes = lostBlock.getLocations();
if (nodes != null && nodes.length > 0) {
List<String> dnUUIDs = new ArrayList<>();
for (DatanodeInfo node : nodes) {
dnUUIDs.add(node.getDatanodeUuid());
}
if (!warnedNodes.containsAll(dnUUIDs)) {
DFSClient.LOG.warn(Arrays.toString(nodes) + " are unavailable and " +
"all striping blocks on them are lost. " +
"IgnoredNodes = " + ignoredNodes);
warnedNodes.addAll(dnUUIDs);
}
} else {
super.reportLostBlock(lostBlock, ignoredNodes);
}
}

/** /**
* The reader for reading a complete {@link AlignedStripe}. Note that an * The reader for reading a complete {@link AlignedStripe}. Note that an
* {@link AlignedStripe} may cross multiple stripes with cellSize width. * {@link AlignedStripe} may cross multiple stripes with cellSize width.
Expand Down

0 comments on commit 7bff8ca

Please sign in to comment.