Skip to content

Commit

Permalink
ARROW-7301: [Java] Sql type DATE should correspond to DateDayVector
Browse files Browse the repository at this point in the history
According to SQL convertion, sql type DATE should correspond to a format of YYYY-MM-DD, without the components for hour/minute/second/millis

Therefore, JDBC type DATE should correspond to DateDayVector, with a type width of 4, instead of 8.

Closes #5944 from liyafan82/fly_1203_date and squashes the following commits:

a6de377 <liyafan82>  Remove division in time conversion
be73192 <liyafan82>  Resolve comments
eea8b79 <liyafan82>  Sql type DATE should correspond to DateDayVector

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
  • Loading branch information
liyafan82 authored and kszucs committed Feb 7, 2020
1 parent 819b18d commit 273e1ce
Show file tree
Hide file tree
Showing 15 changed files with 110 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.Float4Vector;
Expand Down Expand Up @@ -210,7 +210,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
* <li>BINARY --> ArrowType.Binary</li>
* <li>VARBINARY --> ArrowType.Binary</li>
* <li>LONGVARBINARY --> ArrowType.Binary</li>
* <li>DATE --> ArrowType.Date(DateUnit.MILLISECOND)</li>
* <li>DATE --> ArrowType.Date(DateUnit.DAY)</li>
* <li>TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)</li>
* <li>TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)</li>
* <li>CLOB --> ArrowType.Utf8</li>
Expand Down Expand Up @@ -265,7 +265,7 @@ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calend
case Types.CLOB:
return new ArrowType.Utf8();
case Types.DATE:
return new ArrowType.Date(DateUnit.MILLISECOND);
return new ArrowType.Date(DateUnit.DAY);
case Types.TIME:
return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
case Types.TIMESTAMP:
Expand Down Expand Up @@ -402,7 +402,7 @@ static JdbcConsumer getConsumer(ResultSet resultSet, int columnIndex, int jdbcCo
case Types.LONGNVARCHAR:
return VarCharConsumer.createConsumer((VarCharVector) vector, columnIndex, nullable);
case Types.DATE:
return DateConsumer.createConsumer((DateMilliVector) vector, columnIndex, nullable, calendar);
return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar);
case Types.TIME:
return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar);
case Types.TIMESTAMP:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@

package org.apache.arrow.adapter.jdbc.consumer;

import java.sql.Date;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.concurrent.TimeUnit;

import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;

/**
Expand All @@ -30,11 +34,23 @@
*/
public class DateConsumer {

public static final int MAX_DAY;

static {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
try {
java.util.Date date = dateFormat.parse("9999-12-31");
MAX_DAY = (int) TimeUnit.MILLISECONDS.toDays(date.getTime());
} catch (ParseException e) {
throw new IllegalArgumentException("Failed to parse max day", e);
}
}

/**
* Creates a consumer for {@link DateMilliVector}.
*/
public static JdbcConsumer<DateMilliVector> createConsumer(
DateMilliVector vector, int index, boolean nullable, Calendar calendar) {
public static JdbcConsumer<DateDayVector> createConsumer(
DateDayVector vector, int index, boolean nullable, Calendar calendar) {
if (nullable) {
return new NullableDateConsumer(vector, index, calendar);
} else {
Expand All @@ -45,21 +61,21 @@ public static JdbcConsumer<DateMilliVector> createConsumer(
/**
* Nullable consumer for date.
*/
static class NullableDateConsumer extends BaseConsumer<DateMilliVector> {
static class NullableDateConsumer extends BaseConsumer<DateDayVector> {

protected final Calendar calendar;

/**
* Instantiate a DateConsumer.
*/
public NullableDateConsumer(DateMilliVector vector, int index) {
public NullableDateConsumer(DateDayVector vector, int index) {
this(vector, index, /* calendar */null);
}

/**
* Instantiate a DateConsumer.
*/
public NullableDateConsumer(DateMilliVector vector, int index, Calendar calendar) {
public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) {
super(vector, index);
this.calendar = calendar;
}
Expand All @@ -69,7 +85,11 @@ public void consume(ResultSet resultSet) throws SQLException {
Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) :
resultSet.getDate(columnIndexInResultSet, calendar);
if (!resultSet.wasNull()) {
vector.setSafe(currentIndex, date.getTime());
int day = (int) TimeUnit.MILLISECONDS.toDays(date.getTime());
if (day < 0 || day > MAX_DAY) {
throw new IllegalArgumentException("Day overflow: " + day);
}
vector.setSafe(currentIndex, day);
}
currentIndex++;
}
Expand All @@ -78,21 +98,21 @@ public void consume(ResultSet resultSet) throws SQLException {
/**
* Non-nullable consumer for date.
*/
static class NonNullableDateConsumer extends BaseConsumer<DateMilliVector> {
static class NonNullableDateConsumer extends BaseConsumer<DateDayVector> {

protected final Calendar calendar;

/**
* Instantiate a DateConsumer.
*/
public NonNullableDateConsumer(DateMilliVector vector, int index) {
public NonNullableDateConsumer(DateDayVector vector, int index) {
this(vector, index, /* calendar */null);
}

/**
* Instantiate a DateConsumer.
*/
public NonNullableDateConsumer(DateMilliVector vector, int index, Calendar calendar) {
public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) {
super(vector, index);
this.calendar = calendar;
}
Expand All @@ -101,7 +121,11 @@ public NonNullableDateConsumer(DateMilliVector vector, int index, Calendar calen
public void consume(ResultSet resultSet) throws SQLException {
Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) :
resultSet.getDate(columnIndexInResultSet, calendar);
vector.setSafe(currentIndex, date.getTime());
int day = (int) TimeUnit.MILLISECONDS.toDays(date.getTime());
if (day < 0 || day > MAX_DAY) {
throw new IllegalArgumentException("Day overflow: " + day);
}
vector.setSafe(currentIndex, day);
currentIndex++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand Down Expand Up @@ -173,14 +173,14 @@ public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int r
}
}

public static void assertDateVectorValues(DateMilliVector dateMilliVector, int rowCount, Long[] values) {
assertEquals(rowCount, dateMilliVector.getValueCount());
public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) {
assertEquals(rowCount, dateDayVector.getValueCount());

for (int j = 0; j < dateMilliVector.getValueCount(); j++) {
for (int j = 0; j < dateDayVector.getValueCount(); j++) {
if (values[j] == null) {
assertTrue(dateMilliVector.isNull(j));
assertTrue(dateDayVector.isNull(j));
} else {
assertEquals(values[j].longValue(), dateMilliVector.get(j));
assertEquals(values[j].longValue(), dateDayVector.get(j));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand Down Expand Up @@ -202,8 +202,8 @@ public void testDataSets(VectorSchemaRoot root) {
table.getCharValues());
break;
case DATE:
assertDateVectorValues((DateMilliVector) root.getVector(table.getVector()), table.getValues().length,
table.getLongValues());
assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length,
table.getIntValues());
break;
case TIME:
assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand Down Expand Up @@ -206,8 +206,8 @@ private void testAllVectorValues(VectorSchemaRoot root) {
assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(),
getBooleanValues(table.getValues(), BOOL));

assertDateVectorValues((DateMilliVector) root.getVector(DATE), table.getRowCount(),
getLongValues(table.getValues(), DATE));
assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(),
getIntValues(table.getValues(), DATE));

assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(),
getLongValues(table.getValues(), TIME));
Expand Down Expand Up @@ -242,7 +242,7 @@ public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, in
assertNullValues((Float8Vector) root.getVector(vectors[6]), rowCount);
assertNullValues((Float4Vector) root.getVector(vectors[7]), rowCount);
assertNullValues((TimeMilliVector) root.getVector(vectors[8]), rowCount);
assertNullValues((DateMilliVector) root.getVector(vectors[9]), rowCount);
assertNullValues((DateDayVector) root.getVector(vectors[9]), rowCount);
assertNullValues((TimeStampVector) root.getVector(vectors[10]), rowCount);
assertNullValues((VarBinaryVector) root.getVector(vectors[11]), rowCount);
assertNullValues((VarCharVector) root.getVector(vectors[12]), rowCount);
Expand All @@ -265,7 +265,7 @@ public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSche
assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount);
assertNullValues((Float4Vector) root.getVector(vectors[3]), rowCount);
assertNullValues((TimeMilliVector) root.getVector(vectors[4]), rowCount);
assertNullValues((DateMilliVector) root.getVector(vectors[5]), rowCount);
assertNullValues((DateDayVector) root.getVector(vectors[5]), rowCount);
assertNullValues((TimeStampVector) root.getVector(vectors[6]), rowCount);
assertNullValues((VarBinaryVector) root.getVector(vectors[7]), rowCount);
assertNullValues((VarCharVector) root.getVector(vectors[8]), rowCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand Down Expand Up @@ -202,8 +202,8 @@ public void testDataSets(VectorSchemaRoot root) {
assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(),
getBooleanValues(table.getValues(), BOOL));

assertDateVectorValues((DateMilliVector) root.getVector(DATE), table.getRowCount(),
getLongValues(table.getValues(), DATE));
assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(),
getIntValues(table.getValues(), DATE));

assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(),
getLongValues(table.getValues(), TIME));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
import org.apache.arrow.adapter.jdbc.Table;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.TimeMilliVector;
import org.apache.arrow.vector.TimeStampVector;
import org.apache.arrow.vector.VectorSchemaRoot;
Expand Down Expand Up @@ -145,8 +145,8 @@ public void testDataSets(VectorSchemaRoot root) {
case EST_DATE:
case GMT_DATE:
case PST_DATE:
assertDateVectorValues((DateMilliVector) root.getVector(table.getVector()), table.getValues().length,
table.getLongValues());
assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length,
table.getIntValues());
break;
case EST_TIME:
case GMT_TIME:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
Expand All @@ -51,6 +51,7 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

Expand All @@ -66,6 +67,7 @@ public JdbcToArrowVectorIteratorTest(Table table) {
super(table);
}

@Test
@Override
public void testJdbcToArrowValues() throws SQLException, IOException {

Expand All @@ -91,7 +93,7 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep
List<VarCharVector> vectorsForChar = new ArrayList<>();
List<BitVector> vectorsForBit = new ArrayList<>();
List<BitVector> vectorsForBool = new ArrayList<>();
List<DateMilliVector> dateMilliVectors = new ArrayList<>();
List<DateDayVector> dateDayVectors = new ArrayList<>();
List<TimeMilliVector> timeMilliVectors = new ArrayList<>();
List<TimeStampVector> timeStampVectors = new ArrayList<>();
List<DecimalVector> decimalVectors = new ArrayList<>();
Expand All @@ -115,7 +117,7 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep
vectorsForChar.add((VarCharVector) root.getVector(CHAR));
vectorsForBit.add((BitVector) root.getVector(BIT));
vectorsForBool.add((BitVector) root.getVector(BOOL));
dateMilliVectors.add((DateMilliVector) root.getVector(DATE));
dateDayVectors.add((DateDayVector) root.getVector(DATE));
timeMilliVectors.add((TimeMilliVector) root.getVector(TIME));
timeStampVectors.add((TimeStampVector) root.getVector(TIMESTAMP));
decimalVectors.add((DecimalVector) root.getVector(DECIMAL));
Expand All @@ -134,7 +136,7 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep
assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR));
assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT));
assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL));
assertDateMilliVectorValues(dateMilliVectors, table.getRowCount(), getLongValues(table.getValues(), DATE));
assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE));
assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME));
assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP));
assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL));
Expand Down Expand Up @@ -205,12 +207,12 @@ private void assertTimeMilliVectorValues(List<TimeMilliVector> vectors, int rowC
}
}

private void assertDateMilliVectorValues(List<DateMilliVector> vectors, int rowCount, Long[] values) {
private void assertDateDayVectorValues(List<DateDayVector> vectors, int rowCount, Long[] values) {
int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
assertEquals(rowCount, valueCount);

int index = 0;
for (DateMilliVector vector : vectors) {
for (DateDayVector vector : vectors) {
for (int i = 0; i < vector.getValueCount(); i++) {
assertEquals(values[index++].longValue(), vector.get(i));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ values:
- 'DECIMAL_FIELD6=17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23'
- 'DOUBLE_FIELD7=56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345'
- 'TIME_FIELD9=45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000'
- 'DATE_FIELD10=1518393600000,1518393600000,1518393600000,1518393600000,1518393600000,1518393600000,1518393600000,1518393600000,1518393600000,1518393600000'
- 'DATE_FIELD10=17574,17574,17574,17574,17574,17574,17574,17574,17574,17574'
- 'TIMESTAMP_FIELD11=1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000'
- 'CHAR_FIELD16=some char text,some char text,some char text,some char text,some char text,
some char text,some char text,some char text,some char text,some char text'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ values:
- 'DECIMAL_FIELD6=null,17345667789.23,null,17345667789.23,null'
- 'DOUBLE_FIELD7=null,56478356785.345,null,56478356785.345,null'
- 'TIME_FIELD9=null,45935000,null,45935000,null'
- 'DATE_FIELD10=null,1518393600000,null,1518393600000,null'
- 'DATE_FIELD10=null,17574,null,17574,null'
- 'TIMESTAMP_FIELD11=null,1518439535000,null,1518439535000,null'
- 'CHAR_FIELD16=null,some char text,null,some char text,null'
- 'VARCHAR_FIELD13=null,some text that needs to be converted to varchar,null,
Expand Down
20 changes: 10 additions & 10 deletions java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ query: 'select date_field10 from table1;'
drop: 'DROP table table1;'

values:
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '1518393600000'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
- '17574'
Loading

0 comments on commit 273e1ce

Please sign in to comment.