Skip to content

Commit

Permalink
apacheGH-35352: [Java] Fix issues with "semi complex" types.
Browse files Browse the repository at this point in the history
"semi complex" types like TimeStamp*TZ, Duration, and FixedSizeBinary
were missing implementations in UnionListWriter, UnionVector,
UnionReader and other associated classes.

This patch adds these missing methods so that these types can now be
written to things like ListVectors, whereas before it would throw an
exception because the methods were just not implemented.

For example, without this patch, one of the new tests added would fail:
```
    TestListVector.testWriterGetTimestampMilliTZField:913 ? IllegalArgument You tried to write a TimeStampMilliTZ type when you are using a ValueWriter of type UnionListWriter.
```

There are also fixes for get and set methods for holders for the respective
*Vectors classes for these types:
  - The get methods did not set fields like TimeStampMilliTZHolder.timezone,
    DurationHolder.unit, FixedSizeBinaryHolder.byteWidth.

  - The set methods did not all validate that those fields matched what
    the vector's ArrowType was set to. For example TimeStampMilliTZHolder.timezone should
    match ArrowType.Timestamp.timezone on the vector and should throw if it doesn't.
    Otherwise users would never get a signal that there is anything wrong with their code writing
    these holders with mismatching values.

This patch additionally marks some of the existing interfaces for writing these
"semi complex" types as deprecated, because they do not actually provide enough
context to properly write these parameterized ArrowTypes. Instead, users should
use the write methods that take *Holders that do provide enough context for the
writers to properly write these types. Also note that the equivalent write
methods for Decimal have already also been marked as deprecated, for the same
reasoning.
  • Loading branch information
henrymai committed Apr 29, 2023
1 parent 16dbd98 commit ebc3270
Show file tree
Hide file tree
Showing 17 changed files with 807 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@

<#include "/@includes/vv_imports.ftl" />

<#function is_timestamp_tz type>
<#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
</#function>

/*
* A FieldWriter which delegates calls to another FieldWriter. The delegate FieldWriter can be promoted to a new type
* when necessary. Classes that extend this class are responsible for handling promotion.
Expand Down Expand Up @@ -105,17 +109,7 @@ public void endEntry() {

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#if minor.class != "Decimal" && minor.class != "Decimal256">
@Override
public void write(${name}Holder holder) {
getWriter(MinorType.${name?upper_case}).write(holder);
}

public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}

<#elseif minor.class == "Decimal">
<#if minor.class == "Decimal">
@Override
public void write(DecimalHolder holder) {
getWriter(MinorType.DECIMAL).write(holder);
Expand Down Expand Up @@ -156,8 +150,72 @@ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
public void writeBigEndianBytesToDecimal256(byte[] value) {
getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value);
}
<#elseif is_timestamp_tz(minor.class)>
@Override
public void write(${name}Holder holder) {
ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType();
// Take the holder.timezone similar to how PromotableWriter.java:write(DecimalHolder) takes the scale from the holder.
ArrowType.Timestamp arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone);
getWriter(MinorType.${name?upper_case}, arrowType).write(holder);
}

/**
* This version is deprecated in favor of the holder version.
* The holder version is preferred because otherwise the timezone will default to UTC.
*/
@Deprecated
@Override
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType();
// Assumes UTC if no timezone is provided
ArrowType.Timestamp arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC");
getWriter(MinorType.${name?upper_case}, arrowType).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}
<#elseif minor.class == "Duration">
@Override
public void write(${name}Holder holder) {
ArrowType.Duration arrowType = new ArrowType.Duration(holder.unit);
getWriter(MinorType.${name?upper_case}, arrowType).write(holder);
}

/**
* This version is deprecated in favor of the holder version.
* If you experience errors with using this version of the method, switch to the holder version.
* The errors occur when using an untyped or unioned PromotableWriter, because this version of the
* method does not have enough information to infer the ArrowType.
*/
@Deprecated
@Override
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}
<#elseif minor.class == "FixedSizeBinary">
@Override
public void write(${name}Holder holder) {
ArrowType.FixedSizeBinary arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth);
getWriter(MinorType.${name?upper_case}, arrowType).write(holder);
}

/**
* This version is deprecated in favor of the holder version.
* If you experience errors with using this version of the method, switch to the holder version.
* The errors occur when using an untyped or unioned PromotableWriter, because this version of the
* method does not have enough information to infer the ArrowType.
*/
@Deprecated
@Override
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}
<#else>
@Override
public void write(${name}Holder holder) {
getWriter(MinorType.${name?upper_case}).write(holder);
}

public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}
</#if>

</#list></#list>
Expand Down
8 changes: 7 additions & 1 deletion java/vector/src/main/codegen/templates/ComplexWriters.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@

<#include "/@includes/vv_imports.ftl" />

<#function is_timestamp_tz type>
<#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
</#function>

/*
* This class is generated using FreeMarker on the ${.template_name} template.
*/
Expand Down Expand Up @@ -191,7 +195,9 @@ public void writeNull() {
public interface ${eName}Writer extends BaseWriter {
public void write(${minor.class}Holder h);

<#if minor.class?starts_with("Decimal")>@Deprecated</#if>
<#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
@Deprecated
</#if>
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>);
<#if minor.class?starts_with("Decimal")>

Expand Down
19 changes: 19 additions & 0 deletions java/vector/src/main/codegen/templates/StructWriters.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.complex.writer.FieldWriter;

<#function is_timestamp_tz type>
<#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
</#function>

/*
* This class is generated using FreeMarker and the ${.template_name} template.
*/
Expand Down Expand Up @@ -314,7 +318,22 @@ public void end() {
} else {
if (writer instanceof PromotableWriter) {
// ensure writers are initialized
<#if minor.class?starts_with("Decimal")>
((PromotableWriter)writer).getWriter(MinorType.${upperName}<#if minor.class?starts_with("Decimal")>, new ${minor.arrowType}(precision, scale, ${vectName}Vector.TYPE_WIDTH * 8)</#if>);
<#elseif is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
<#if minor.arrowTypeConstructorParams??>
<#assign constructorParams = minor.arrowTypeConstructorParams />
<#else>
<#assign constructorParams = [] />
<#list minor.typeParams?reverse as typeParam>
<#assign constructorParams = constructorParams + [ typeParam.name ] />
</#list>
</#if>
ArrowType arrowType = new ${minor.arrowType}(${constructorParams?join(", ")});
((PromotableWriter)writer).getWriter(MinorType.${upperName}, arrowType);
<#else>
((PromotableWriter)writer).getWriter(MinorType.${upperName});
</#if>
}
}
return writer;
Expand Down
128 changes: 42 additions & 86 deletions java/vector/src/main/codegen/templates/UnionListWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
<#include "/@includes/vv_imports.ftl" />

<#function is_timestamp_tz type>
<#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
</#function>

/*
* This class is generated using freemarker and the ${.template_name} template.
*/
Expand Down Expand Up @@ -103,55 +107,31 @@ public void setPosition(int index) {
super.setPosition(index);
}

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
<#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
<#if !minor.typeParams?? >

<#list vv.types as type><#list type.minor as minor>
<#assign lowerName = minor.class?uncap_first />
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
<#assign upperName = minor.class?upper_case />
<#assign capName = minor.class?cap_first />
<#assign vectName = capName />
@Override
public ${name}Writer ${uncappedName}() {
public ${minor.class}Writer ${lowerName}() {
return this;
}

<#if minor.typeParams?? >
@Override
public ${name}Writer ${uncappedName}(String name) {
structName = name;
return writer.${uncappedName}(name);
public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
return writer.${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
}
</#if>
</#list></#list>

@Override
public DecimalWriter decimal() {
return this;
}

@Override
public DecimalWriter decimal(String name, int scale, int precision) {
return writer.decimal(name, scale, precision);
}

@Override
public DecimalWriter decimal(String name) {
return writer.decimal(name);
}

@Override
public Decimal256Writer decimal256() {
return this;
}

@Override
public Decimal256Writer decimal256(String name, int scale, int precision) {
return writer.decimal256(name, scale, precision);
}

@Override
public Decimal256Writer decimal256(String name) {
return writer.decimal256(name);
public ${minor.class}Writer ${lowerName}(String name) {
structName = name;
return writer.${lowerName}(name);
}

</#list></#list>

@Override
public StructWriter struct() {
Expand Down Expand Up @@ -240,18 +220,6 @@ public void end() {
inStruct = false;
}

@Override
public void write(DecimalHolder holder) {
writer.write(holder);
writer.setPosition(writer.idx()+1);
}

@Override
public void write(Decimal256Holder holder) {
writer.write(holder);
writer.setPosition(writer.idx()+1);
}

@Override
public void writeNull() {
if (!listStarted){
Expand All @@ -261,65 +229,53 @@ public void writeNull() {
}
}

public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
writer.writeDecimal(start, buffer, arrowType);
writer.setPosition(writer.idx()+1);
}

public void writeDecimal(long start, ArrowBuf buffer) {
writer.writeDecimal(start, buffer);
<#list vv.types as type>
<#list type.minor as minor>
<#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
@Override
public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
writer.setPosition(writer.idx()+1);
}

public void writeDecimal(BigDecimal value) {
writer.writeDecimal(value);
<#if is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
@Override
public void write(${name}Holder holder) {
writer.write(holder);
writer.setPosition(writer.idx()+1);
}

public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType){
writer.writeBigEndianBytesToDecimal(value, arrowType);
writer.setPosition(writer.idx() + 1);
}

public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
writer.writeDecimal256(start, buffer, arrowType);
<#elseif minor.class?starts_with("Decimal")>
public void write${name}(long start, ArrowBuf buffer, ArrowType arrowType) {
writer.write${name}(start, buffer, arrowType);
writer.setPosition(writer.idx()+1);
}

public void writeDecimal256(long start, ArrowBuf buffer) {
writer.writeDecimal256(start, buffer);
@Override
public void write(${name}Holder holder) {
writer.write(holder);
writer.setPosition(writer.idx()+1);
}

public void writeDecimal256(BigDecimal value) {
writer.writeDecimal256(value);
public void write${name}(BigDecimal value) {
writer.write${name}(value);
writer.setPosition(writer.idx()+1);
}

public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType){
writer.writeBigEndianBytesToDecimal256(value, arrowType);
public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType){
writer.writeBigEndianBytesTo${name}(value, arrowType);
writer.setPosition(writer.idx() + 1);
}


<#list vv.types as type>
<#list type.minor as minor>
<#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
<#if !minor.typeParams?? >
<#else>
@Override
public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
writer.setPosition(writer.idx()+1);
}

public void write(${name}Holder holder) {
writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
writer.setPosition(writer.idx()+1);
}
</#if>

</#if>
</#list>
</#list>
}
Expand Down
9 changes: 7 additions & 2 deletions java/vector/src/main/codegen/templates/UnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
package org.apache.arrow.vector.complex.impl;

<#include "/@includes/vv_imports.ftl" />

<#function is_timestamp_tz type>
<#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
</#function>

/**
* Source code generated using FreeMarker template ${.template_name}
*/
Expand Down Expand Up @@ -90,7 +95,7 @@ private FieldReader getReaderForIndex(int index) {
<#list type.minor as minor>
<#assign name = minor.class?cap_first />
<#assign uncappedName = name?uncap_first/>
<#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
<#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
case ${name?upper_case}:
return (FieldReader) get${name}();
</#if>
Expand Down Expand Up @@ -170,7 +175,7 @@ public int size() {
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
<#assign safeType=friendlyType />
<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
<#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
<#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">

private ${name}ReaderImpl ${uncappedName}Reader;

Expand Down

0 comments on commit ebc3270

Please sign in to comment.