Skip to content

Commit

Permalink
Changes following code review comments for DRILL-6880
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben-Zvi authored and Boaz Ben-Zvi committed Jan 9, 2019
1 parent 4bc5623 commit 4c9dd8d
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 60 deletions.
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;


import com.sun.codemodel.JExpression;
import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollector;
import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.expression.LogicalExpression;
Expand Down Expand Up @@ -129,6 +130,8 @@ public class ChainedHashTable {
private RecordBatch incomingProbe; private RecordBatch incomingProbe;
private final RecordBatch outgoing; private final RecordBatch outgoing;


private enum setupWork {DO_BUILD, DO_PROBE, CHECK_BOTH_NULLS};

public ChainedHashTable(HashTableConfig htConfig, FragmentContext context, BufferAllocator allocator, public ChainedHashTable(HashTableConfig htConfig, FragmentContext context, BufferAllocator allocator,
RecordBatch incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing) { RecordBatch incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing) {


Expand Down Expand Up @@ -216,15 +219,16 @@ public HashTable createAndSetupHashTable(TypedFieldId[] outKeyFieldIds) throws C
i++; i++;
} }


// Only in case of an equi-join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls // Only in case of a join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls
// (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to O(n^2) work on that chain.) // (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to useless O(n^2) work on that chain.)
// The logic is: Nulls match on build, and don't match on probe. Note that this logic covers outer joins as well.
setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, bothKeysNullHtableMapping, keyExprsBuild, setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, bothKeysNullHtableMapping, keyExprsBuild,
htConfig.getComparators(), htKeyFieldIds); htConfig.getComparators(), htKeyFieldIds, setupWork.CHECK_BOTH_NULLS);
// generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys() // generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys()
setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild, setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild,
htConfig.getComparators(), htKeyFieldIds); htConfig.getComparators(), htKeyFieldIds, setupWork.DO_BUILD);
setupIsKeyMatchInternal(cgInner, KeyMatchIncomingProbeMapping, KeyMatchHtableProbeMapping, keyExprsProbe, setupIsKeyMatchInternal(cgInner, KeyMatchIncomingProbeMapping, KeyMatchHtableProbeMapping, keyExprsProbe,
htConfig.getComparators(), htKeyFieldIds); htConfig.getComparators(), htKeyFieldIds, setupWork.DO_PROBE);


setupSetValue(cgInner, keyExprsBuild, htKeyFieldIds); setupSetValue(cgInner, keyExprsBuild, htKeyFieldIds);
if (outgoing != null) { if (outgoing != null) {
Expand All @@ -235,8 +239,8 @@ public HashTable createAndSetupHashTable(TypedFieldId[] outKeyFieldIds) throws C
} }
setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds); setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds);


setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild, false); setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild);
setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe, true); setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe);


HashTable ht = context.getImplementationClass(top); HashTable ht = context.getImplementationClass(top);
ht.setup(htConfig, allocator, incomingBuild.getContainer(), incomingProbe, outgoing, htContainerOrig, context, cgInner); ht.setup(htConfig, allocator, incomingBuild.getContainer(), incomingProbe, outgoing, htContainerOrig, context, cgInner);
Expand All @@ -245,15 +249,19 @@ public HashTable createAndSetupHashTable(TypedFieldId[] outKeyFieldIds) throws C
} }


private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping, private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping,
LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds) { LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds, setupWork work) {

boolean checkIfBothNulls = work == setupWork.CHECK_BOTH_NULLS;


boolean isProbe = incomingMapping == KeyMatchIncomingProbeMapping; // Regular key matching may return false in the middle (i.e., some pair of columns did not match), and true only if all matched;
boolean areBothNulls = incomingMapping == bothKeysNullIncomingBuildMapping; // but "both nulls" check returns the opposite logic (i.e., true when one pair of nulls is found, need check no more)
JExpression midPointResult = checkIfBothNulls ? JExpr.TRUE : JExpr.FALSE;
JExpression finalResult = checkIfBothNulls ? JExpr.FALSE : JExpr.TRUE;


cg.setMappingSet(incomingMapping); cg.setMappingSet(incomingMapping);


if (keyExprs == null || keyExprs.length == 0 || if (keyExprs == null || keyExprs.length == 0 ||
areBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join checkIfBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join
cg.getEvalBlock()._return(JExpr.FALSE); cg.getEvalBlock()._return(JExpr.FALSE);
return; return;
} }
Expand All @@ -269,29 +277,29 @@ private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet in


JConditional jc; JConditional jc;


if ( areBothNulls || isProbe ) { if ( work != setupWork.DO_BUILD ) { // BUILD runs this logic in a separate method - areBothKeysNull()
// codegen for the special case when both columns are null (i.e., return early - as equal or not) // codegen for the special case when both columns are null (i.e., return early with midPointResult)
if (comparators.get(i) == Comparator.EQUALS if (comparators.get(i) == Comparator.EQUALS
&& left.isOptional() && right.isOptional()) { && left.isOptional() && right.isOptional()) {
jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)). jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)).
cand(right.getIsSet().eq(JExpr.lit(0)))); cand(right.getIsSet().eq(JExpr.lit(0))));
jc._then()._return( areBothNulls ? JExpr.TRUE : JExpr.FALSE); jc._then()._return(midPointResult);
} }
} }
if ( ! areBothNulls ) { if ( ! checkIfBothNulls ) { // generate comparison code (at least one of the two columns' values is non-null)
final LogicalExpression f = FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry()); final LogicalExpression f = FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry());


HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE); HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE);


// check if two values are not equal (comparator result != 0) // check if two values are not equal (comparator result != 0)
jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));


jc._then()._return(JExpr.FALSE); jc._then()._return(midPointResult);
} }
} }


// All key expressions compared equal, so return TRUE // All key expressions compared equal, so return TRUE
cg.getEvalBlock()._return( areBothNulls ? JExpr.FALSE : JExpr.TRUE); cg.getEvalBlock()._return(finalResult);
} }


private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs, private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs,
Expand Down Expand Up @@ -321,8 +329,8 @@ private void setupOutputRecordKeys(ClassGenerator<HashTable> cg, TypedFieldId[]
} }
} }


private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs)
boolean isProbe) throws SchemaChangeException { throws SchemaChangeException {


cg.setMappingSet(incomingMapping); cg.setMappingSet(incomingMapping);


Expand Down
Expand Up @@ -87,9 +87,6 @@ public abstract class HashTableTemplate implements HashTable {
// current available (free) slot globally across all batch holders // current available (free) slot globally across all batch holders
private int freeIndex = 0; private int freeIndex = 0;


// Placeholder for the current index while probing the hash table
private IndexPointer currentIdxHolder;

private BufferAllocator allocator; private BufferAllocator allocator;


// The incoming build side record batch // The incoming build side record batch
Expand Down Expand Up @@ -205,39 +202,38 @@ protected void setup() throws SchemaChangeException {
setupInterior(incomingBuild, incomingProbe, outgoing, htContainer); setupInterior(incomingBuild, incomingProbe, outgoing, htContainer);
} }


// Check if the key at the currentIdx position in hash table matches the key // Check if the key at the current Index position in hash table matches the key
// at the incomingRowIdx. if the key does not match, update the // at the incomingRowIdx.
// currentIdxHolder with the index of the next link.
private boolean isKeyMatch(int incomingRowIdx, private boolean isKeyMatch(int incomingRowIdx,
IndexPointer currentIdxHolder, int currentIndex,
boolean isProbe) throws SchemaChangeException { boolean isProbe) throws SchemaChangeException {
int currentIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; int currentIdxWithinBatch = currentIndex & BATCH_MASK;
boolean match;


if (currentIdxWithinBatch >= batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount()) { if (currentIdxWithinBatch >= batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount()) {
logger.debug("Batch size = {}, incomingRowIdx = {}, currentIdxWithinBatch = {}.", logger.debug("Batch size = {}, incomingRowIdx = {}, currentIdxWithinBatch = {}.",
batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch); batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch);
} }
assert (currentIdxWithinBatch < batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount()); assert (currentIdxWithinBatch < batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount());
assert (incomingRowIdx < HashTable.BATCH_SIZE); assert (incomingRowIdx < HashTable.BATCH_SIZE);


if (isProbe) { if (isProbe) {
match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch); return isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch);
} else {
// in case (of a hash-join only) where both the new incoming key and the current are null, treat them as
// a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never
// be used (not putting it into the helper would take a bigger code change, and some performance cost, hence
// not worth it). In the past such a new null key was added into the Hash-Table (i.e., no match), which
// created long costly chains - SEE DRILL-6880)
if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; }

match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch);
} }


if (!match) { // in case of a hash-join build, where both the new incoming key and the current key are null, treat them as
currentIdxHolder.value = links.getAccessor().get(currentIdxWithinBatch); // a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never
} // be used (not putting it into the helper would take a bigger code change, and some performance cost, hence
return match; // not worth it). In the past such a new null key was added into the Hash-Table (i.e., no match), which
// created long costly chains - SEE DRILL-6880)
if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; }

return isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch);
}

// This method should only be called following a "false" return from isKeyMatch()
// It returns the next index in the hash chain (if any) so that next call to isKeyMatch() would compare the next link
private int nextLinkInHashChain(int currentIndex) {
return links.getAccessor().get(currentIndex & BATCH_MASK);
} }


// Insert a new <key1, key2...keyN> entry coming from the incoming batch into the hash table // Insert a new <key1, key2...keyN> entry coming from the incoming batch into the hash table
Expand Down Expand Up @@ -524,7 +520,6 @@ public void setup(HashTableConfig htConfig, BufferAllocator allocator, VectorCon
throw new IllegalStateException("Unexpected schema change", e); throw new IllegalStateException("Unexpected schema change", e);
} }


currentIdxHolder = new IndexPointer();
} }


@Override @Override
Expand Down Expand Up @@ -674,16 +669,15 @@ public PutStatus put(int incomingRowIdx, IndexPointer htIdxHolder, int hashCode,


// if startIdx is non-empty, follow the hash chain links until we find a matching // if startIdx is non-empty, follow the hash chain links until we find a matching
// key or reach the end of the chain (and remember the last link there) // key or reach the end of the chain (and remember the last link there)
for ( currentIdxHolder.value = startIdx; for ( int currentIndex = startIdx;
currentIdxHolder.value != EMPTY_SLOT; currentIndex != EMPTY_SLOT;
/* isKeyMatch() below also advances the currentIdxHolder to the next link */) { currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) {

// remember the current link, which would be the last when the next link is empty // remember the current link, which would be the last when the next link is empty
lastEntryBatch = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK);
lastEntryIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; lastEntryIdxWithinBatch = currentIndex & BATCH_MASK;


if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIdxHolder, false)) { if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, false)) {
htIdxHolder.value = currentIdxHolder.value; htIdxHolder.value = currentIndex;
return PutStatus.KEY_PRESENT; return PutStatus.KEY_PRESENT;
} }
} }
Expand Down Expand Up @@ -751,12 +745,15 @@ public PutStatus put(int incomingRowIdx, IndexPointer htIdxHolder, int hashCode,
@Override @Override
public int probeForKey(int incomingRowIdx, int hashCode) throws SchemaChangeException { public int probeForKey(int incomingRowIdx, int hashCode) throws SchemaChangeException {
int bucketIndex = getBucketIndex(hashCode, numBuckets()); int bucketIndex = getBucketIndex(hashCode, numBuckets());

int startIdx = startIndices.getAccessor().get(bucketIndex);
for ( currentIdxHolder.value = startIndices.getAccessor().get(bucketIndex); BatchHolder lastEntryBatch = null;
currentIdxHolder.value != EMPTY_SLOT; ) {
BatchHolder bh = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); for ( int currentIndex = startIdx;
if (bh.isKeyMatch(incomingRowIdx, currentIdxHolder, true /* isProbe */)) { currentIndex != EMPTY_SLOT;
return currentIdxHolder.value; currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) {
lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK);
if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, true /* isProbe */)) {
return currentIndex;
} }
} }
return -1; return -1;
Expand Down

0 comments on commit 4c9dd8d

Please sign in to comment.