Skip to content

Commit

Permalink
[SPARK-41358][SQL] Refactor ColumnVectorUtils#populate method to us…
Browse files Browse the repository at this point in the history
…e `PhysicalDataType` instead of `DataType`

### What changes were proposed in this pull request?
The main change of this pr is refactor `ColumnVectorUtils#populate` method to use `PhysicalDataType` instead of `DataType`.

### Why are the changes needed?
Simplify type match.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing ColumnVectorUtilsSuite

Closes apache#38873 from LuciferYang/SPARK-41358.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
LuciferYang authored and beliefer committed Dec 18, 2022
1 parent a1cd9fe commit a564b40
Showing 1 changed file with 14 additions and 17 deletions.
Expand Up @@ -28,6 +28,7 @@
import org.apache.spark.memory.MemoryMode;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.types.*;
import org.apache.spark.sql.catalyst.util.DateTimeUtils;
import org.apache.spark.sql.types.*;
import org.apache.spark.sql.vectorized.ColumnarArray;
Expand All @@ -48,31 +49,32 @@ public class ColumnVectorUtils {
*/
public static void populate(ConstantColumnVector col, InternalRow row, int fieldIdx) {
DataType t = col.dataType();
PhysicalDataType pdt = t.physicalDataType();

if (row.isNullAt(fieldIdx)) {
col.setNull();
} else {
if (t == DataTypes.BooleanType) {
if (pdt instanceof PhysicalBooleanType) {
col.setBoolean(row.getBoolean(fieldIdx));
} else if (t == DataTypes.BinaryType) {
} else if (pdt instanceof PhysicalBinaryType) {
col.setBinary(row.getBinary(fieldIdx));
} else if (t == DataTypes.ByteType) {
} else if (pdt instanceof PhysicalByteType) {
col.setByte(row.getByte(fieldIdx));
} else if (t == DataTypes.ShortType) {
} else if (pdt instanceof PhysicalShortType) {
col.setShort(row.getShort(fieldIdx));
} else if (t == DataTypes.IntegerType) {
} else if (pdt instanceof PhysicalIntegerType) {
col.setInt(row.getInt(fieldIdx));
} else if (t == DataTypes.LongType) {
} else if (pdt instanceof PhysicalLongType) {
col.setLong(row.getLong(fieldIdx));
} else if (t == DataTypes.FloatType) {
} else if (pdt instanceof PhysicalFloatType) {
col.setFloat(row.getFloat(fieldIdx));
} else if (t == DataTypes.DoubleType) {
} else if (pdt instanceof PhysicalDoubleType) {
col.setDouble(row.getDouble(fieldIdx));
} else if (t == DataTypes.StringType) {
} else if (pdt instanceof PhysicalStringType) {
UTF8String v = row.getUTF8String(fieldIdx);
col.setUtf8String(v);
} else if (t instanceof DecimalType) {
DecimalType dt = (DecimalType) t;
} else if (pdt instanceof PhysicalDecimalType) {
PhysicalDecimalType dt = (PhysicalDecimalType) pdt;
Decimal d = row.getDecimal(fieldIdx, dt.precision(), dt.scale());
if (dt.precision() <= Decimal.MAX_INT_DIGITS()) {
col.setInt((int)d.toUnscaledLong());
Expand All @@ -83,14 +85,9 @@ public static void populate(ConstantColumnVector col, InternalRow row, int field
byte[] bytes = integer.toByteArray();
col.setBinary(bytes);
}
} else if (t instanceof CalendarIntervalType) {
} else if (pdt instanceof PhysicalCalendarIntervalType) {
// The value of `numRows` is irrelevant.
col.setCalendarInterval((CalendarInterval) row.get(fieldIdx, t));
} else if (t instanceof DateType || t instanceof YearMonthIntervalType) {
col.setInt(row.getInt(fieldIdx));
} else if (t instanceof TimestampType || t instanceof TimestampNTZType ||
t instanceof DayTimeIntervalType) {
col.setLong(row.getLong(fieldIdx));
} else {
throw new RuntimeException(String.format("DataType %s is not supported" +
" in column vectorized reader.", t.sql()));
Expand Down

0 comments on commit a564b40

Please sign in to comment.