Skip to content
Permalink
Browse files

[SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in A…

…rrowColumnVector

## What changes were proposed in this pull request?
This PR changes usage of `MapVector` in Spark codebase to use `NullableMapVector`.

`MapVector` is an internal Arrow class that is not supposed to be used directly. We should use `NullableMapVector` instead.

## How was this patch tested?

Existing test.

Author: Li Jin <ice.xelloss@gmail.com>

Closes #20239 from icexelloss/arrow-map-vector.

(cherry picked from commit 4e6f8fb)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
  • Loading branch information...
icexelloss authored and HyukjinKwon committed Jan 17, 2018
1 parent 79ccd0c commit 6e509fde3f056316f46c71b672a7d69adb1b4f8e
@@ -247,8 +247,8 @@ public ArrowColumnVector(ValueVector vector) {

childColumns = new ArrowColumnVector[1];
childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
} else if (vector instanceof MapVector) {
MapVector mapVector = (MapVector) vector;
} else if (vector instanceof NullableMapVector) {
NullableMapVector mapVector = (NullableMapVector) vector;
accessor = new StructAccessor(mapVector);

childColumns = new ArrowColumnVector[mapVector.size()];
@@ -553,9 +553,16 @@ final int getArrayOffset(int rowId) {
}
}

/**
* Any call to "get" method will throw UnsupportedOperationException.
*
* Access struct values in a ArrowColumnVector doesn't use this accessor. Instead, it uses getStruct() method defined
* in the parent class. Any call to "get" method in this class is a bug in the code.
*
*/
private static class StructAccessor extends ArrowVectorAccessor {

StructAccessor(MapVector vector) {
StructAccessor(NullableMapVector vector) {
super(vector);
}
}
@@ -322,6 +322,42 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
allocator.close()
}

test("non nullable struct") {
val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
val schema = new StructType().add("int", IntegerType).add("long", LongType)
val vector = ArrowUtils.toArrowField("struct", schema, nullable = false, null)
.createVector(allocator).asInstanceOf[NullableMapVector]

vector.allocateNew()
val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]

vector.setIndexDefined(0)
intVector.setSafe(0, 1)
longVector.setSafe(0, 1L)

vector.setIndexDefined(1)
intVector.setSafe(1, 2)
longVector.setNull(1)

vector.setValueCount(2)

val columnVector = new ArrowColumnVector(vector)
assert(columnVector.dataType === schema)
assert(columnVector.numNulls === 0)

val row0 = columnVector.getStruct(0, 2)
assert(row0.getInt(0) === 1)
assert(row0.getLong(1) === 1L)

val row1 = columnVector.getStruct(1, 2)
assert(row1.getInt(0) === 2)
assert(row1.isNullAt(1))

columnVector.close()
allocator.close()
}

test("struct") {
val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
val schema = new StructType().add("int", IntegerType).add("long", LongType)

0 comments on commit 6e509fd

Please sign in to comment.
You can’t perform that action at this time.