Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-9368][SQL] Support get(ordinal, dataType) generic getter in UnsafeRow. #7682

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import java.util.HashSet;
import java.util.Set;

import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.PlatformDependent;
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.bitset.BitSetMethods;
Expand Down Expand Up @@ -235,6 +235,41 @@ public Object get(int ordinal) {
throw new UnsupportedOperationException();
}

@Override
public Object get(int ordinal, DataType dataType) {
if (dataType instanceof NullType) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found a bug here: we should also check whether the row isNullAt(ordinal), in which case we should return null.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reminds me: there's a potentially subtle pitfall with UnsafeRow if someone calls a primitive type-specific accessor without first checking the nullability: in that case, we'll currently return 0 instead of throwing an error.

I'm going to add some assertions to try to see if there are any places where we make this mistake.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it looks like our existing row behavior is to just return the zero-value of the given type for null inputs (e.g. getFloat on a null column returns 0.0f whereas the generic getter returns null). For some reason, it looks like UnsafeRow was returning NaN instead of 0 in those cases, leading to a confusing bug. I'm going to fix this inconsistency in a separate patch.

return null;
} else if (dataType instanceof BooleanType) {
return getBoolean(ordinal);
} else if (dataType instanceof ByteType) {
return getByte(ordinal);
} else if (dataType instanceof ShortType) {
return getShort(ordinal);
} else if (dataType instanceof IntegerType) {
return getInt(ordinal);
} else if (dataType instanceof LongType) {
return getLong(ordinal);
} else if (dataType instanceof FloatType) {
return getFloat(ordinal);
} else if (dataType instanceof DoubleType) {
return getDouble(ordinal);
} else if (dataType instanceof DecimalType) {
return getDecimal(ordinal);
} else if (dataType instanceof DateType) {
return getInt(ordinal);
} else if (dataType instanceof TimestampType) {
return getLong(ordinal);
} else if (dataType instanceof BinaryType) {
return getBinary(ordinal);
} else if (dataType instanceof StringType) {
return getUTF8String(ordinal);
} else if (dataType instanceof StructType) {
return getStruct(ordinal, ((StructType) dataType).size());
} else {
throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
}
}

@Override
public boolean isNullAt(int ordinal) {
assertIndexIsValid(ordinal);
Expand Down Expand Up @@ -436,4 +471,19 @@ public String toString() {
public boolean anyNull() {
return BitSetMethods.anySet(baseObject, baseOffset, bitSetWidthInBytes / 8);
}

/**
* Writes the content of this row into a memory address, identified by an object and an offset.
* The target memory address must already been allocated, and have enough space to hold all the
* bytes in this string.
*/
public void writeToMemory(Object target, long targetOffset) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this method is unused? Are there other outstanding changes that you need to push?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR, but I'm using this in my struct type support PR.

PlatformDependent.copyMemory(
baseObject,
baseOffset,
target,
targetOffset,
sizeInBytes
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ abstract class InternalRow extends Serializable {

def numFields: Int

def get(ordinal: Int): Any
def get(ordinal: Int): Any = get(ordinal, null)

def genericGet(ordinal: Int): Any = get(ordinal, null)

def get(ordinal: Int, dataType: DataType): Any = get(ordinal)
def get(ordinal: Int, dataType: DataType): Any

def getAs[T](ordinal: Int, dataType: DataType): T = get(ordinal, dataType).asInstanceOf[T]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class JoinedRow extends InternalRow {
if (i < row1.numFields) row1.getBinary(i) else row2.getBinary(i - row1.numFields)
}

override def get(i: Int): Any =
override def get(i: Int, dataType: DataType): Any =
if (i < row1.numFields) row1.get(i) else row2.get(i - row1.numFields)

override def isNullAt(i: Int): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ final class SpecificMutableRow(val values: Array[MutableValue]) extends MutableR
values(i).isNull = true
}

override def get(i: Int): Any = values(i).boxed
override def get(i: Int, dataType: DataType): Any = values(i).boxed

override def getStruct(ordinal: Int, numFields: Int): InternalRow = {
values(ordinal).boxed.asInstanceOf[InternalRow]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
public void setNullAt(int i) { nullBits[i] = true; }
public boolean isNullAt(int i) { return nullBits[i]; }

public Object get(int i) {
public Object get(int i, ${classOf[DataType].getName} dataType) {
if (isNullAt(i)) return null;
switch (i) {
$getCases
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class GenericInternalRow(protected[sql] val values: Array[Any]) extends Internal

override def numFields: Int = values.length

override def get(i: Int): Any = values(i)
override def get(i: Int, dataType: DataType): Any = values(i)

override def getStruct(ordinal: Int, numFields: Int): InternalRow = {
values(ordinal).asInstanceOf[InternalRow]
Expand Down Expand Up @@ -130,7 +130,7 @@ class GenericMutableRow(val values: Array[Any]) extends MutableRow {

override def numFields: Int = values.length

override def get(i: Int): Any = values(i)
override def get(i: Int, dataType: DataType): Any = values(i)

override def getStruct(ordinal: Int, numFields: Int): InternalRow = {
values(ordinal).asInstanceOf[InternalRow]
Expand Down