-
Notifications
You must be signed in to change notification settings - Fork 397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GLUTEN-4772][VL] Support empty map/array literal #4771
Changes from 10 commits
e7ed77d
32053b0
a7cff81
e218a77
112fedb
2e3889e
5b83029
5800076
4d5bda4
e7efd20
3a28733
5bdd9fa
2c630ae
28dc0ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,16 +60,16 @@ RowVectorPtr makeRowVector(const std::vector<VectorPtr>& children) { | |
return std::make_shared<RowVector>(children[0]->pool(), rowType, BufferPtr(nullptr), vectorSize, children); | ||
} | ||
|
||
ArrayVectorPtr makeEmptyArrayVector(memory::MemoryPool* pool) { | ||
ArrayVectorPtr makeEmptyArrayVector(memory::MemoryPool* pool, const TypePtr& elementType) { | ||
BufferPtr offsets = allocateOffsets(1, pool); | ||
BufferPtr sizes = allocateOffsets(1, pool); | ||
return std::make_shared<ArrayVector>(pool, ARRAY(UNKNOWN()), nullptr, 1, offsets, sizes, nullptr); | ||
return std::make_shared<ArrayVector>(pool, ARRAY(elementType), nullptr, 1, offsets, sizes, nullptr); | ||
} | ||
|
||
MapVectorPtr makeEmptyMapVector(memory::MemoryPool* pool) { | ||
MapVectorPtr makeEmptyMapVector(memory::MemoryPool* pool, const TypePtr& keyType, const TypePtr& valueType) { | ||
BufferPtr offsets = allocateOffsets(1, pool); | ||
BufferPtr sizes = allocateOffsets(1, pool); | ||
return std::make_shared<MapVector>(pool, MAP(UNKNOWN(), UNKNOWN()), nullptr, 1, offsets, sizes, nullptr, nullptr); | ||
return std::make_shared<MapVector>(pool, MAP(keyType, valueType), nullptr, 1, offsets, sizes, nullptr, nullptr); | ||
} | ||
|
||
RowVectorPtr makeEmptyRowVector(memory::MemoryPool* pool) { | ||
|
@@ -351,10 +351,21 @@ std::shared_ptr<const core::ConstantTypedExpr> SubstraitVeloxExprConverter::toVe | |
auto constantVector = BaseVector::wrapInConstant(1, 0, literalsToArrayVector(substraitLit)); | ||
return std::make_shared<const core::ConstantTypedExpr>(constantVector); | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kEmptyList: { | ||
auto elementType = SubstraitParser::parseType(substraitLit.empty_list().type()); | ||
auto constantVector = BaseVector::wrapInConstant(1, 0, makeEmptyArrayVector(pool_, elementType)); | ||
return std::make_shared<const core::ConstantTypedExpr>(constantVector); | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kMap: { | ||
auto constantVector = BaseVector::wrapInConstant(1, 0, literalsToMapVector(substraitLit)); | ||
return std::make_shared<const core::ConstantTypedExpr>(constantVector); | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kEmptyMap: { | ||
auto keyType = SubstraitParser::parseType(substraitLit.empty_map().key()); | ||
auto valueType = SubstraitParser::parseType(substraitLit.empty_map().value()); | ||
auto constantVector = BaseVector::wrapInConstant(1, 0, makeEmptyMapVector(pool_, keyType, valueType)); | ||
return std::make_shared<const core::ConstantTypedExpr>(constantVector); | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kStruct: { | ||
auto constantVector = BaseVector::wrapInConstant(1, 0, literalsToRowVector(substraitLit)); | ||
return std::make_shared<const core::ConstantTypedExpr>(constantVector); | ||
|
@@ -382,33 +393,34 @@ std::shared_ptr<const core::ConstantTypedExpr> SubstraitVeloxExprConverter::toVe | |
ArrayVectorPtr SubstraitVeloxExprConverter::literalsToArrayVector(const ::substrait::Expression::Literal& literal) { | ||
auto childSize = literal.list().values().size(); | ||
if (childSize == 0) { | ||
return makeEmptyArrayVector(pool_); | ||
return makeEmptyArrayVector(pool_, UNKNOWN()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it's better to throw for unexpected behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1, if the child size is 0 that means it is a empty list, we should not go into this method. @WangGuangxin can you address this comment ? |
||
} | ||
auto childTypeCase = literal.list().values(0).literal_type_case(); | ||
auto childLiteral = literal.list().values(0); | ||
auto elementAtFunc = [&](vector_size_t idx) { return literal.list().values(idx); }; | ||
auto childVector = literalsToVector(childTypeCase, childSize, literal, elementAtFunc); | ||
auto childVector = literalsToVector(childLiteral, childSize, literal, elementAtFunc); | ||
return makeArrayVector(childVector); | ||
} | ||
|
||
MapVectorPtr SubstraitVeloxExprConverter::literalsToMapVector(const ::substrait::Expression::Literal& literal) { | ||
auto childSize = literal.map().key_values().size(); | ||
if (childSize == 0) { | ||
return makeEmptyMapVector(pool_); | ||
return makeEmptyMapVector(pool_, UNKNOWN(), UNKNOWN()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
||
} | ||
auto keyTypeCase = literal.map().key_values(0).key().literal_type_case(); | ||
auto valueTypeCase = literal.map().key_values(0).value().literal_type_case(); | ||
auto& keyLiteral = literal.map().key_values(0).key(); | ||
auto& valueLiteral = literal.map().key_values(0).value(); | ||
auto keyAtFunc = [&](vector_size_t idx) { return literal.map().key_values(idx).key(); }; | ||
auto valueAtFunc = [&](vector_size_t idx) { return literal.map().key_values(idx).value(); }; | ||
auto keyVector = literalsToVector(keyTypeCase, childSize, literal, keyAtFunc); | ||
auto valueVector = literalsToVector(valueTypeCase, childSize, literal, valueAtFunc); | ||
auto keyVector = literalsToVector(keyLiteral, childSize, literal, keyAtFunc); | ||
auto valueVector = literalsToVector(valueLiteral, childSize, literal, valueAtFunc); | ||
return makeMapVector(keyVector, valueVector); | ||
} | ||
|
||
VectorPtr SubstraitVeloxExprConverter::literalsToVector( | ||
::substrait::Expression_Literal::LiteralTypeCase childTypeCase, | ||
const ::substrait::Expression::Literal& childLiteral, | ||
vector_size_t childSize, | ||
const ::substrait::Expression::Literal& literal, | ||
std::function<::substrait::Expression::Literal(vector_size_t /* idx */)> elementAtFunc) { | ||
auto childTypeCase = childLiteral.literal_type_case(); | ||
switch (childTypeCase) { | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kNull: { | ||
auto veloxType = SubstraitParser::parseType(literal.null()); | ||
|
@@ -456,6 +468,15 @@ VectorPtr SubstraitVeloxExprConverter::literalsToVector( | |
} | ||
return rowVector; | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kEmptyList: { | ||
auto elementType = SubstraitParser::parseType(childLiteral.empty_list().type()); | ||
return BaseVector::wrapInConstant(1, 0, makeEmptyArrayVector(pool_, elementType)); | ||
} | ||
case ::substrait::Expression_Literal::LiteralTypeCase::kEmptyMap: { | ||
auto keyType = SubstraitParser::parseType(childLiteral.empty_map().key()); | ||
auto valueType = SubstraitParser::parseType(childLiteral.empty_map().value()); | ||
return BaseVector::wrapInConstant(1, 0, makeEmptyMapVector(pool_, keyType, valueType)); | ||
} | ||
default: | ||
auto veloxType = getScalarType(elementAtFunc(0)); | ||
if (veloxType) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
|
||
import io.substrait.proto.Expression; | ||
import io.substrait.proto.Expression.Literal.Builder; | ||
import io.substrait.proto.Type; | ||
import org.apache.spark.sql.catalyst.util.ArrayData; | ||
|
||
public class ListLiteralNode extends LiteralNodeWithValue<ArrayData> { | ||
|
@@ -33,13 +34,18 @@ protected void updateLiteralBuilder(Builder literalBuilder, ArrayData array) { | |
Object[] elements = array.array(); | ||
TypeNode elementType = ((ListNode) getTypeNode()).getNestedType(); | ||
|
||
Expression.Literal.List.Builder listBuilder = Expression.Literal.List.newBuilder(); | ||
for (Object element : elements) { | ||
LiteralNode elementNode = ExpressionBuilder.makeLiteral(element, elementType); | ||
Expression.Literal elementExpr = elementNode.getLiteral(); | ||
listBuilder.addValues(elementExpr); | ||
if (elements.length > 0) { | ||
Expression.Literal.List.Builder listBuilder = Expression.Literal.List.newBuilder(); | ||
for (Object element : elements) { | ||
LiteralNode elementNode = ExpressionBuilder.makeLiteral(element, elementType); | ||
Expression.Literal elementExpr = elementNode.getLiteral(); | ||
listBuilder.addValues(elementExpr); | ||
} | ||
literalBuilder.setList(listBuilder.build()); | ||
} else { | ||
Type.List.Builder listTypeBuilder = Type.List.newBuilder(); | ||
listTypeBuilder.setType(elementType.toProtobuf()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if if element type is NullType, we will still fallback it right ? e.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's related to #2996. It will not fallback once NullType is supported |
||
literalBuilder.setEmptyList(listTypeBuilder.build()); | ||
} | ||
|
||
literalBuilder.setList(listBuilder.build()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know why does it fallback ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, it's not related to empty literal, but NullType support.
I'll fix it in another PR, otherwise this PR is too big.