Skip to content

Commit

Permalink
ARROW-6738: [Java] Fix problems with current union comparison logic
Browse files Browse the repository at this point in the history
There are some problems with the current union comparison logic. For example:
1. For type check, we should not require fields to be equal. It is possible that two vectors' value ranges are equal but their fields are different.
2. We should not compare the number of sub vectors, as it is possible that two union vectors have different numbers of sub vectors, but have equal values in the range.

Closes #5544 from liyafan82/fly_0930_share and squashes the following commits:

d6ef3d2 <liyafan82>  Refine test case
c008289 <liyafan82>  Resolve test failure after rebasing
c515393 <liyafan82>  Rule out the change for union type comparison
bab7402 <liyafan82>  Compare fields for all vectors except union vectors
5b2225e <liyafan82>  Fix the bug with decimal vector
4d8b570 <liyafan82>  Fix problems with current union comparison logic

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
  • Loading branch information
liyafan82 authored and kszucs committed Feb 7, 2020
1 parent b5a9839 commit 195643f
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 23 deletions.
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/templates/UnionVector.java
Expand Up @@ -550,7 +550,7 @@ public Iterator<ValueVector> iterator() {
return vectors.iterator();
}

private ValueVector getVector(int index) {
public ValueVector getVector(int index) {
int type = typeBuffer.getByte(index * TYPE_WIDTH);
switch (MinorType.values()[type]) {
case NULL:
Expand Down
Expand Up @@ -109,8 +109,10 @@ public Boolean visit(BaseFixedWidthVector left, Range range) {
}

@Override
protected ApproxEqualsVisitor createInnerVisitor(ValueVector left, ValueVector right) {
return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone());
protected ApproxEqualsVisitor createInnerVisitor(
ValueVector left, ValueVector right,
BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone(), typeComparator);
}

private boolean float4ApproxEquals(Range range) {
Expand Down
Expand Up @@ -24,7 +24,6 @@
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
Expand Down Expand Up @@ -193,29 +192,33 @@ public Boolean visit(NullVector left, Range range) {
return true;
}

/**
* Creates a visitor to visit child vectors.
* It is used for complex vector types.
* @return the visitor for child vecors.
*/
protected RangeEqualsVisitor createInnerVisitor(ValueVector leftInner, ValueVector rightInner) {
protected RangeEqualsVisitor createInnerVisitor(
ValueVector leftInner, ValueVector rightInner,
BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
return new RangeEqualsVisitor(leftInner, rightInner, typeComparator);
}

protected boolean compareUnionVectors(Range range) {
UnionVector leftVector = (UnionVector) left;
UnionVector rightVector = (UnionVector) right;

List<FieldVector> leftChildren = leftVector.getChildrenFromFields();
List<FieldVector> rightChildren = rightVector.getChildrenFromFields();

if (leftChildren.size() != rightChildren.size()) {
return false;
}

for (int k = 0; k < leftChildren.size(); k++) {
RangeEqualsVisitor visitor = createInnerVisitor(leftChildren.get(k), rightChildren.get(k));
if (!visitor.rangeEquals(range)) {
Range subRange = new Range(0, 0, 1);
for (int i = 0; i < range.getLength(); i++) {
subRange.setLeftStart(range.getLeftStart() + i).setRightStart(range.getRightStart() + i);
ValueVector leftSubVector = leftVector.getVector(range.getLeftStart() + i);
ValueVector rightSubVector = rightVector.getVector(range.getRightStart() + i);

if (leftSubVector == null || rightSubVector == null) {
if (leftSubVector == rightSubVector) {
continue;
} else {
return false;
}
}
TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector);
RangeEqualsVisitor visitor =
createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left));
if (!visitor.rangeEquals(subRange)) {
return false;
}
}
Expand All @@ -232,7 +235,8 @@ protected boolean compareStructVectors(Range range) {
}

for (String name : leftChildNames) {
RangeEqualsVisitor visitor = createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name));
RangeEqualsVisitor visitor =
createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
if (!visitor.rangeEquals(range)) {
return false;
}
Expand Down Expand Up @@ -311,7 +315,8 @@ protected boolean compareListVectors(Range range) {
ListVector leftVector = (ListVector) left;
ListVector rightVector = (ListVector) right;

RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector());
RangeEqualsVisitor innerVisitor =
createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
Range innerRange = new Range();

for (int i = 0; i < range.getLength(); i++) {
Expand Down Expand Up @@ -357,7 +362,8 @@ protected boolean compareFixedSizeListVectors(Range range) {
}

int listSize = leftVector.getListSize();
RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector());
RangeEqualsVisitor innerVisitor =
createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
Range innerRange = new Range(0, 0, listSize);

for (int i = 0; i < range.getLength(); i++) {
Expand Down
Expand Up @@ -282,6 +282,57 @@ public void testUnionVectorRangeEquals() {
}
}

/**
* Test comparing two union vectors.
* The two vectors are different in total, but have a range with equal values.
*/
@Test
public void testUnionVectorSubRangeEquals() {
try (final UnionVector vector1 = new UnionVector("union", allocator, null, null);
final UnionVector vector2 = new UnionVector("union", allocator, null, null);) {

final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
uInt4Holder.value = 10;
uInt4Holder.isSet = 1;

final NullableIntHolder intHolder = new NullableIntHolder();
intHolder.value = 20;
intHolder.isSet = 1;

vector1.setType(0, Types.MinorType.UINT4);
vector1.setSafe(0, uInt4Holder);

vector1.setType(1, Types.MinorType.INT);
vector1.setSafe(1, intHolder);

vector1.setType(2, Types.MinorType.INT);
vector1.setSafe(2, intHolder);

vector1.setType(3, Types.MinorType.INT);
vector1.setSafe(3, intHolder);

vector1.setValueCount(4);

vector2.setType(0, Types.MinorType.UINT4);
vector2.setSafe(0, uInt4Holder);

vector2.setType(1, Types.MinorType.INT);
vector2.setSafe(1, intHolder);

vector2.setType(2, Types.MinorType.INT);
vector2.setSafe(2, intHolder);

vector2.setType(3, Types.MinorType.UINT4);
vector2.setSafe(3, uInt4Holder);

vector2.setValueCount(4);

RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
assertFalse(visitor.rangeEquals(new Range(0, 0, 4)));
assertTrue(visitor.rangeEquals(new Range(1, 1, 2)));
}
}

@Ignore
@Test
public void testEqualsWithOutTypeCheck() {
Expand Down

0 comments on commit 195643f

Please sign in to comment.