Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions be/src/core/data_type/data_type_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ namespace ErrorCodes {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

DataTypeArray::DataTypeArray(const DataTypePtr& nested_) : nested {nested_} {}
DataTypeArray::DataTypeArray(const DataTypePtr& nested_) {
DataTypePtr nullable_nested = make_nullable(nested_);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrapping every array child in make_nullable() changes the type metadata you export later. DataTypeArray::to_protobuf() uses nested->is_nullable(), and FE's FoldConstantRuleOnBE.convertToNereidsType() reconstructs ArrayType from that flag. After this change, a folded expression like CAST([1] AS ARRAY<INT NOT NULL>) will round-trip back to FE as ARRAY<INT> because BE now always reports contains_null=true. FE still treats ArrayType.containsNull as part of exact type matching, so this is a real regression in the constant-folding path, not just an internal invariant cleanup. Please preserve the original child-nullability flag on the exported type metadata path or add a compatibility fix/test for it.

auto nested_type = std::dynamic_pointer_cast<const DataTypeNullable>(nullable_nested);
DORIS_CHECK(nested_type != nullptr);
nested = std::move(nested_type);
nested_as_base = nested;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nested_as_base now stores the nullable wrapper, so get_default() starts returning [NULL] for every array type because DataTypeNullable::get_default() is Field(). That default is used when BE backfills omitted values for non-nullable columns in partial-update/default-fill paths (be/src/storage/tablet/base_tablet.cpp:1060, be/src/storage/partial_update_info.cpp:451), so ARRAY<INT> can now be materialized as [NULL] instead of [0]. I think nested_as_base needs to preserve the original nested_ type (or remove_nullable(nested)) while nested keeps the nullable execution invariant.

}

MutableColumnPtr DataTypeArray::create_column() const {
return ColumnArray::create(nested->create_column(), ColumnArray::ColumnOffsets::create());
Expand All @@ -72,9 +78,10 @@ bool DataTypeArray::equals(const IDataType& rhs) const {

// here we should remove nullable, otherwise here always be 1
size_t DataTypeArray::get_number_of_dimensions() const {
const DataTypeArray* nested_array =
typeid_cast<const DataTypeArray*>(remove_nullable(nested).get());
if (!nested_array) return 1;
auto* nested_array = typeid_cast<const DataTypeArray*>(remove_nullable(nested).get());
if (!nested_array) {
return 1;
}
return 1 +
nested_array
->get_number_of_dimensions(); /// Every modern C++ compiler optimizes tail recursion.
Expand Down Expand Up @@ -133,7 +140,7 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column

void DataTypeArray::to_pb_column_meta(PColumnMeta* col_meta) const {
IDataType::to_pb_column_meta(col_meta);
auto children = col_meta->add_children();
auto* children = col_meta->add_children();
get_nested_type()->to_pb_column_meta(children);
}

Expand Down
11 changes: 7 additions & 4 deletions be/src/core/data_type/data_type_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "common/status.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type_serde/data_type_array_serde.h"
#include "core/data_type_serde/data_type_serde.h"
Expand All @@ -47,7 +48,8 @@ namespace doris {
class DataTypeArray final : public IDataType {
private:
/// The type of array elements.
DataTypePtr nested;
DataTypeNullablePtr nested;
DataTypePtr nested_as_base;

public:
static constexpr PrimitiveType PType = TYPE_ARRAY;
Expand Down Expand Up @@ -79,7 +81,8 @@ class DataTypeArray final : public IDataType {

bool equals(const IDataType& rhs) const override;

const DataTypePtr& get_nested_type() const { return nested; }
const DataTypePtr& get_nested_type() const { return nested_as_base; }
const DataTypeNullablePtr& get_nullable_nested_type() const { return nested; }

/// 1 for plain array, 2 for array of arrays and so on.
size_t get_number_of_dimensions() const;
Expand All @@ -99,15 +102,15 @@ class DataTypeArray final : public IDataType {
void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType* scalar_type) const override {
node->set_type(TTypeNodeType::ARRAY);
node->set_contains_null(nested->is_nullable());
nested->to_protobuf(ptype);
get_nested_type()->to_protobuf(ptype);
}

#ifdef BE_TEST
void to_thrift(TTypeDesc& thrift_type, TTypeNode& node) const override {
node.type = TTypeNodeType::ARRAY;
node.__isset.contains_nulls = true;
node.contains_nulls.push_back(nested->is_nullable());
nested->to_thrift(thrift_type);
get_nested_type()->to_thrift(thrift_type);
}
#endif
};
Expand Down
2 changes: 2 additions & 0 deletions be/src/core/data_type/primitive_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ class DataTypeHLL;
class DataTypeJsonb;
class DataTypeArray;
class DataTypeMap;
class DataTypeNullable;
class DataTypeVariant;
class DataTypeStruct;
class DataTypeBitMap;
class DataTypeQuantileState;
using DataTypeNullablePtr = std::shared_ptr<const DataTypeNullable>;
template <PrimitiveType T>
class ColumnVector;
using ColumnUInt8 = ColumnVector<TYPE_BOOLEAN>;
Expand Down
4 changes: 2 additions & 2 deletions be/src/core/data_type_serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
DCHECK(nested_column.is_nullable());
DORIS_CHECK(nested_column.is_nullable());
if (slice[0] != '[') {
return Status::InvalidArgument("Array does not start with '[' character, found '{}'",
slice[0]);
Expand Down Expand Up @@ -162,7 +162,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text(
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
DCHECK(nested_column.is_nullable());
DORIS_CHECK(nested_column.is_nullable());

char collection_delimiter =
options.get_collection_delimiter(hive_text_complex_type_delimiter_level);
Expand Down
10 changes: 8 additions & 2 deletions be/src/core/data_type_serde/data_type_array_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <utility>

#include "common/status.h"
#include "core/data_type_serde/data_type_nullable_serde.h"
#include "core/data_type_serde/data_type_serde.h"

namespace doris {
Expand All @@ -39,7 +40,12 @@ class IDataType;
class DataTypeArraySerDe : public DataTypeSerDe {
public:
DataTypeArraySerDe(DataTypeSerDeSPtr _nested_serde, int nesting_level = 1)
: DataTypeSerDe(nesting_level), nested_serde(std::move(_nested_serde)) {}
: DataTypeSerDe(nesting_level) {
auto nullable_serde =
std::dynamic_pointer_cast<DataTypeNullableSerDe>(std::move(_nested_serde));
DORIS_CHECK(nullable_serde != nullptr);
nested_serde = std::move(nullable_serde);
}

std::string get_name() const override { return "Array(" + nested_serde->get_name() + ")"; }

Expand Down Expand Up @@ -133,6 +139,6 @@ class DataTypeArraySerDe : public DataTypeSerDe {
template <bool is_strict_mode>
Status _from_string(StringRef& str, IColumn& column, const FormatOptions& options) const;

DataTypeSerDeSPtr nested_serde;
DataTypeNullableSerDeSPtr nested_serde;
};
} // namespace doris
3 changes: 3 additions & 0 deletions be/src/core/data_type_serde/data_type_nullable_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,7 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
private:
DataTypeSerDeSPtr nested_serde;
};

using DataTypeNullableSerDeSPtr = std::shared_ptr<DataTypeNullableSerDe>;

} // namespace doris
2 changes: 1 addition & 1 deletion be/src/exprs/function/cast/cast_to_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ WrapperType create_array_wrapper(FunctionContext* context, const DataTypePtr& fr
"CAST AS Array can only be performed between same-dimensional array types");
}

const DataTypePtr& to_nested_type = to_type.get_nested_type();
DataTypePtr to_nested_type = to_type.get_nested_type();

/// Prepare nested type conversion
const auto nested_function =
Expand Down
8 changes: 6 additions & 2 deletions be/test/core/data_type_serde/data_type_serde_string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/column/column_array.h"
#include "core/column/column_nullable.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
Expand Down Expand Up @@ -308,9 +309,12 @@ TEST_F(DataTypeStringSerDeTest, ArrowMemNotAlignedNestedArr) {
EXPECT_EQ(values_address % 4, 1);

// 5.Test read_column_from_arrow
auto ser_col = ColumnArray::create(ColumnString::create(), ColumnOffset64::create());
auto ser_col = ColumnArray::create(
ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
ColumnOffset64::create());
cctz::time_zone tz;
auto serde_list = std::make_shared<DataTypeArraySerDe>(serde_str);
auto serde_nullable_str = std::make_shared<DataTypeNullableSerDe>(serde_str);
auto serde_list = std::make_shared<DataTypeArraySerDe>(serde_nullable_str);
auto st = serde_list->read_column_from_arrow(*ser_col, arr.get(), 0, 1, tz);
EXPECT_TRUE(st.ok());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5259,7 +5259,7 @@ public DataType visitComplexDataType(ComplexDataTypeContext ctx) {
return ParserUtils.withOrigin(ctx, () -> {
switch (ctx.complex.getType()) {
case DorisParser.ARRAY:
return ArrayType.of(typedVisit(ctx.dataType(0)), true);
return ArrayType.of(typedVisit(ctx.dataType(0)));
case DorisParser.MAP:
return MapType.of(typedVisit(ctx.dataType(0)), typedVisit(ctx.dataType(1)));
case DorisParser.STRUCT:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,13 +629,13 @@ public static List<Literal> getResultExpression(DataType type, PValues resultCon

private static Pair<DataType, Integer> convertToNereidsType(List<PTypeNode> typeNodes, int start) {
PScalarType pScalarType = typeNodes.get(start).getScalarType();
boolean containsNull = typeNodes.get(start).getContainsNull();
TPrimitiveType tPrimitiveType = TPrimitiveType.findByValue(pScalarType.getType());
DataType type;
int parsedNodes;
if (tPrimitiveType == TPrimitiveType.ARRAY) {
Pair<DataType, Integer> itemType = convertToNereidsType(typeNodes, start + 1);
type = ArrayType.of(itemType.key(), containsNull);
// Array elements are always nullable
type = ArrayType.of(itemType.key());
parsedNodes = 1 + itemType.value();
} else if (tPrimitiveType == TPrimitiveType.MAP) {
Pair<DataType, Integer> keyType = convertToNereidsType(typeNodes, start + 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,7 @@ public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast
children.values().iterator().next().pruneCastType(
origin.children.values().iterator().next(),
cast.children.values().iterator().next()
),
((ArrayType) cast.type).containsNull()
)
);
} else if (type instanceof MapType) {
return MapType.of(
Expand Down Expand Up @@ -717,7 +716,7 @@ private DataType pruneDataType(DataType dataType, List<Pair<String, DataType>> n
}
return new StructType(newFields);
} else if (dataType instanceof ArrayType) {
return ArrayType.of(newChildrenTypes.get(0).second, ((ArrayType) dataType).containsNull());
return ArrayType.of(newChildrenTypes.get(0).second);
} else if (dataType instanceof MapType) {
return MapType.of(newChildrenTypes.get(0).second, newChildrenTypes.get(1).second);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ public List<String> getQualifier() {

@Override
public boolean nullable() {
return ((ArrayType) (this.children.get(0).getDataType())).containsNull();
// Array elements are always nullable
return true;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ private AggregateFunction buildForEach(String nestedName, List<? extends Object>
"foreach must be input array type: '" + nestedName);
}
DataType itemType = ((ArrayType) arrayType).getItemType();
return new SlotReference("mocked", itemType, (((ArrayType) arrayType).containsNull()));
// Array elements are always nullable
return new SlotReference("mocked", itemType, true);
}).collect(Collectors.toList());
return (AggregateFunction) nestedBuilder.build(nestedName, forEachargs).first;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ public static FunctionSignature ensureNestedNullableOfArray(FunctionSignature si
return signature;
}
ArrayType arrayType = (ArrayType) signature.returnType;
return signature.withReturnType(ArrayType.of(arrayType.getItemType(), true));
return signature.withReturnType(ArrayType.of(arrayType.getItemType()));
}

// for time type with precision(now are DateTimeV2Type and TimeV2Type),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {

@Override
public DataType getDataType() {
return ArrayType.of(nested.getDataType(), true);
return ArrayType.of(nested.getDataType());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public ArrayMap withChildren(List<Expression> children) {
public DataType getDataType() {
Preconditions.checkArgument(children.get(0) instanceof Lambda,
"The first arg of array_map must be lambda");
return ArrayType.of(((Lambda) children.get(0)).getRetType(), true);
return ArrayType.of(((Lambda) children.get(0)).getRetType());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ public DataType getDataType() {
ArrayItemReference argRef = lambda.getLambdaArguments().get(0);
Expression arrayExpr = argRef.getArrayExpression();
ArrayType arrayType = (ArrayType) arrayExpr.getDataType();
return ArrayType.of(arrayType.getItemType(), true);
return ArrayType.of(arrayType.getItemType());
} else if (children.get(0).getDataType() instanceof ArrayType) {
Expression arrayExpr = children.get(0);
ArrayType arrayType = (ArrayType) arrayExpr.getDataType();
return ArrayType.of(arrayType.getItemType(), true);
return ArrayType.of(arrayType.getItemType());
} else {
throw new AnalysisException("The first arg of array_sort must be lambda or array");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,48 +24,43 @@

/**
* Array type in Nereids.
*
* <p>Note: Array elements are always nullable in Doris. The NOT NULL constraint on array elements
* (e.g., ARRAY&lt;INT NOT NULL&gt;) is not supported. This simplification aligns with the actual
* behavior where both FE planning and BE execution always treat array elements as nullable.</p>
*/
public class ArrayType extends DataType implements ComplexDataType, NestedColumnPrunable {

public static final ArrayType SYSTEM_DEFAULT = new ArrayType(NullType.INSTANCE, true);
public static final ArrayType SYSTEM_DEFAULT = new ArrayType(NullType.INSTANCE);

public static final int WIDTH = 64;

private final DataType itemType;
private final boolean containsNull;

private ArrayType(DataType itemType, boolean containsNull) {
private ArrayType(DataType itemType) {
this.itemType = Objects.requireNonNull(itemType, "itemType can not be null");
this.containsNull = containsNull;
}

public static ArrayType of(DataType itemType) {
return of(itemType, true);
}

public static ArrayType of(DataType itemType, boolean containsNull) {
if (itemType.equals(NullType.INSTANCE)) {
return SYSTEM_DEFAULT;
}
return new ArrayType(itemType, containsNull);
return new ArrayType(itemType);
}

@Override
public DataType conversion() {
return new ArrayType(itemType.conversion(), containsNull);
return new ArrayType(itemType.conversion());
}

public DataType getItemType() {
return itemType;
}

public boolean containsNull() {
return containsNull;
}

@Override
public Type toCatalogDataType() {
return new org.apache.doris.catalog.ArrayType(itemType.toCatalogDataType(), containsNull);
// Catalog ArrayType defaults containsNull to true via single-arg constructor
return new org.apache.doris.catalog.ArrayType(itemType.toCatalogDataType());
}

@Override
Expand All @@ -85,8 +80,7 @@ public boolean equals(Object o) {
return false;
}
ArrayType arrayType = (ArrayType) o;
return Objects.equals(itemType, arrayType.itemType)
&& Objects.equals(containsNull, arrayType.containsNull);
return Objects.equals(itemType, arrayType.itemType);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ public static DataType fromCatalogType(Type type) {
return MapType.of(fromCatalogType(mapType.getKeyType()), fromCatalogType(mapType.getValueType()));
} else if (type.isArrayType()) {
org.apache.doris.catalog.ArrayType arrayType = (org.apache.doris.catalog.ArrayType) type;
return ArrayType.of(fromCatalogType(arrayType.getItemType()), arrayType.getContainsNull());
return ArrayType.of(fromCatalogType(arrayType.getItemType()));
} else if (type.isVariantType()) {
// In the past, variant metadata used the ScalarType type.
// Now, we use VariantType, which inherits from ScalarType, as the new metadata storage.
Expand Down Expand Up @@ -803,7 +803,7 @@ public List<DataType> getAllPromotions() {
return arrayType.getItemType()
.getAllPromotions()
.stream()
.map(promotionType -> ArrayType.of(promotionType, arrayType.containsNull()))
.map(promotionType -> ArrayType.of(promotionType))
.collect(ImmutableList.toImmutableList());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,7 @@ public static DataType replaceTimesWithTargetPrecision(DataType dataType, int ta
public static DataType replaceSpecifiedType(DataType dataType,
Class<? extends DataType> specifiedType, DataType newType) {
if (dataType instanceof ArrayType) {
return ArrayType.of(replaceSpecifiedType(((ArrayType) dataType).getItemType(), specifiedType, newType),
((ArrayType) dataType).containsNull());
return ArrayType.of(replaceSpecifiedType(((ArrayType) dataType).getItemType(), specifiedType, newType));
} else if (dataType instanceof MapType) {
return MapType.of(replaceSpecifiedType(((MapType) dataType).getKeyType(), specifiedType, newType),
replaceSpecifiedType(((MapType) dataType).getValueType(), specifiedType, newType));
Expand Down
Loading
Loading