-
Notifications
You must be signed in to change notification settings - Fork 13.8k
[FLINK-17668][table] Fix shortcomings in new data structures #12130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a645fd
3ce9117
98c8478
e562ee8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,11 +21,20 @@ | |
| import org.apache.flink.annotation.PublicEvolving; | ||
| import org.apache.flink.table.types.logical.ArrayType; | ||
| import org.apache.flink.table.types.logical.DecimalType; | ||
| import org.apache.flink.table.types.logical.DistinctType; | ||
| import org.apache.flink.table.types.logical.LocalZonedTimestampType; | ||
| import org.apache.flink.table.types.logical.LogicalType; | ||
| import org.apache.flink.table.types.logical.RowType; | ||
| import org.apache.flink.table.types.logical.TimestampType; | ||
|
|
||
| import javax.annotation.Nullable; | ||
|
|
||
| import java.io.Serializable; | ||
|
|
||
| import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getFieldCount; | ||
| import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getPrecision; | ||
| import static org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getScale; | ||
|
|
||
| /** | ||
| * Base interface of an internal data structure representing data of {@link ArrayType}. | ||
| * | ||
|
|
@@ -163,7 +172,9 @@ public interface ArrayData { | |
| * @param pos position of the element to return | ||
| * @param elementType the element type of the array | ||
| * @return the element object at the specified position in this array data | ||
| * @deprecated Use {@link #createElementGetter(LogicalType)} for avoiding logical types during runtime. | ||
| */ | ||
| @Deprecated | ||
| static Object get(ArrayData array, int pos, LogicalType elementType) { | ||
| if (array.isNullAt(pos)) { | ||
| return null; | ||
|
|
@@ -215,4 +226,103 @@ static Object get(ArrayData array, int pos, LogicalType elementType) { | |
| throw new UnsupportedOperationException("Unsupported type: " + elementType); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Creates an accessor for getting elements in an internal array data structure at the | ||
| * given position. | ||
| * | ||
| * @param elementType the element type of the array | ||
| */ | ||
| static ElementGetter createElementGetter(LogicalType elementType) { | ||
| final ElementGetter elementGetter; | ||
| // ordered by type root definition | ||
| switch (elementType.getTypeRoot()) { | ||
| case CHAR: | ||
| case VARCHAR: | ||
| elementGetter = ArrayData::getString; | ||
| break; | ||
| case BOOLEAN: | ||
| elementGetter = ArrayData::getBoolean; | ||
| break; | ||
| case BINARY: | ||
| case VARBINARY: | ||
| elementGetter = ArrayData::getBinary; | ||
| break; | ||
| case DECIMAL: | ||
| final int decimalPrecision = getPrecision(elementType); | ||
| final int decimalScale = getScale(elementType); | ||
| elementGetter = (array, pos) -> array.getDecimal(pos, decimalPrecision, decimalScale); | ||
| break; | ||
| case TINYINT: | ||
| elementGetter = ArrayData::getByte; | ||
| break; | ||
| case SMALLINT: | ||
| elementGetter = ArrayData::getShort; | ||
| break; | ||
| case INTEGER: | ||
| case DATE: | ||
| case TIME_WITHOUT_TIME_ZONE: | ||
| case INTERVAL_YEAR_MONTH: | ||
| elementGetter = ArrayData::getInt; | ||
| break; | ||
| case BIGINT: | ||
| case INTERVAL_DAY_TIME: | ||
| elementGetter = ArrayData::getLong; | ||
| break; | ||
| case FLOAT: | ||
| elementGetter = ArrayData::getFloat; | ||
| break; | ||
| case DOUBLE: | ||
| elementGetter = ArrayData::getDouble; | ||
| break; | ||
| case TIMESTAMP_WITHOUT_TIME_ZONE: | ||
| case TIMESTAMP_WITH_LOCAL_TIME_ZONE: | ||
| final int timestampPrecision = getPrecision(elementType); | ||
| elementGetter = (array, pos) -> array.getTimestamp(pos, timestampPrecision); | ||
| break; | ||
| case TIMESTAMP_WITH_TIME_ZONE: | ||
| throw new UnsupportedOperationException(); | ||
| case ARRAY: | ||
| elementGetter = ArrayData::getArray; | ||
| break; | ||
| case MULTISET: | ||
| case MAP: | ||
| elementGetter = ArrayData::getMap; | ||
| break; | ||
| case ROW: | ||
| case STRUCTURED_TYPE: | ||
| final int rowFieldCount = getFieldCount(elementType); | ||
| elementGetter = (array, pos) -> array.getRow(pos, rowFieldCount); | ||
| break; | ||
| case DISTINCT_TYPE: | ||
| elementGetter = createElementGetter(((DistinctType) elementType).getSourceType()); | ||
| break; | ||
| case RAW: | ||
| elementGetter = ArrayData::getRawValue; | ||
| break; | ||
| case NULL: | ||
| case SYMBOL: | ||
| case UNRESOLVED: | ||
| default: | ||
| throw new IllegalArgumentException(); | ||
| } | ||
| if (!elementType.isNullable()) { | ||
| return elementGetter; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the nullable attribute reliable now ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. I have to say it's tricky to guarantee the NOT NULL attributes in runtime, especially in streaming. AFAIK,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A lot of locations ignore nullability. We should gradually fix this. By default a type is nullable, so the check is performed in most of the cases. The implementation of the method is correct (given its input parameters), if there is a problem it should be the callers task.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think we should be cautious here. As you said, most of the cases are nullable, so we don't get much performance improvement from this. However, it may occure unexpected bugs if it is called in planner. We can add a TODO on this. When we fix all the nullable problem, we can update this without breaking compatibility.
The problem is planner will also call this method, and I'm sure there are cases that null values exist in NOT NULL fields, then an exception will happen. This will be a bug then.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no strong guarantee that our runtime would not generate null value for a not null field based on current codebase, and if there was, the bug would probably hard to trace out. How much performance can we gain with the type nullable short-cut check ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So far only the converters will call this method. Let's fix upstream bugs gradually, once code is updated to this method.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not only about performance but also correctness. A caller assumes the behavior when passing a nullable/not nullable type. It is harder to trace where a null comes from than a method that throws a hard exception when it encounters an unexpected null.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The nullble problems are known issues but can't be fixed easily, they may need major rework (e.g. Expand node). I don't think we should let the queries failed. If we want to keep this logic, then I would suggest only use this method in connectors, because we guarantee the NOT NULL constraint when writing values to sink (
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, we can add this information to the issue that removes the |
||
| } | ||
| return (array, pos) -> { | ||
| if (array.isNullAt(pos)) { | ||
| return null; | ||
| } | ||
| return elementGetter.getElementOrNull(array, pos); | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * Accessor for getting the elements of an array during runtime. | ||
| * | ||
| * @see #createElementGetter(LogicalType) | ||
| */ | ||
| interface ElementGetter extends Serializable { | ||
| @Nullable Object getElementOrNull(ArrayData array, int pos); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you create an issue to use
createElementGetterto replacegetutility and remove this method? I think we should provide a clean interface before release.