Skip to content

Commit

Permalink
Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to rec…
Browse files Browse the repository at this point in the history
…eive JdbcToArrowConfig objects.
  • Loading branch information
Mike Pigott committed Dec 8, 2018
1 parent 8d6cf00 commit 68c91e7
Showing 1 changed file with 49 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.util.Calendar;
import java.util.List;

import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
Expand Down Expand Up @@ -90,6 +91,21 @@ public class JdbcToArrowUtils {
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;

/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
* @return {@link Schema}
* @throws SQLException on error
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");

return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}

/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
Expand Down Expand Up @@ -120,14 +136,15 @@ public class JdbcToArrowUtils {
* CLOB --> ArrowType.Utf8
* BLOB --> ArrowType.Binary
*
* @param rsmd ResultSetMetaData
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param config The configuration to use when constructing the schema.
* @return {@link Schema}
* @throws SQLException on error
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {

public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
Preconditions.checkNotNull(config, "The configuration object must not be null");
Preconditions.checkArgument(config.isValid(), "The configuration object must be valid");

List<Field> fields = new ArrayList<>();
int columnCount = rsmd.getColumnCount();
Expand Down Expand Up @@ -179,7 +196,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
break;
case Types.TIMESTAMP:
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND,
calendar.getTimeZone().getID())), null));
config.getCalendar().getTimeZone().getID())), null));
break;
case Types.BINARY:
case Types.VARBINARY:
Expand Down Expand Up @@ -222,17 +239,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
* the given Arrow Vector objects.
*
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param calendar The calendar to use when reading time-based data.
* @throws SQLException on error
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "Vector Schema cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");

jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}

/**
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
* the given Arrow Vector objects.
*
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param config The configuration to use when reading the data.
* @throws SQLException on error
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid");

ResultSetMetaData rsmd = rs.getMetaData();
int columnCount = rsmd.getColumnCount();

Expand Down Expand Up @@ -289,16 +327,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
break;
case Types.DATE:
updateVector((DateMilliVector) root.getVector(columnName),
rs.getDate(i, calendar), !rs.wasNull(), rowCount);
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.TIME:
updateVector((TimeMilliVector) root.getVector(columnName),
rs.getTime(i, calendar), !rs.wasNull(), rowCount);
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.TIMESTAMP:
// TODO: Need to handle precision such as milli, micro, nano
updateVector((TimeStampVector) root.getVector(columnName),
rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount);
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.BINARY:
case Types.VARBINARY:
Expand Down

0 comments on commit 68c91e7

Please sign in to comment.