generated from finos/software-project-blueprint
-
Notifications
You must be signed in to change notification settings - Fork 230
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add arrow service test support (#2329)
* Update service test executor for arrow executions
- Loading branch information
Showing
6 changed files
with
350 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
229 changes: 229 additions & 0 deletions
229
...-arrow-runtime/src/main/java/org/apache/arrow/adapter/jdbc/LegendArrowVectorIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
// Copyright 2023 Goldman Sachs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License | ||
|
||
package org.apache.arrow.adapter.jdbc; | ||
|
||
import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.getJdbcFieldInfoForColumn; | ||
|
||
import java.sql.ResultSet; | ||
import java.sql.ResultSetMetaData; | ||
import java.sql.SQLException; | ||
import java.util.Iterator; | ||
|
||
import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; | ||
import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; | ||
import org.apache.arrow.util.AutoCloseables; | ||
import org.apache.arrow.util.Preconditions; | ||
import org.apache.arrow.vector.FieldVector; | ||
import org.apache.arrow.vector.VectorSchemaRoot; | ||
import org.apache.arrow.vector.types.pojo.ArrowType; | ||
import org.apache.arrow.vector.types.pojo.Schema; | ||
import org.apache.arrow.vector.util.ValueVectorUtility; | ||
|
||
/** | ||
* temporary over ride of ArrowVectorIterator until https://github.com/apache/arrow/pull/37085 is released. This works around an long standing bug in H2 where ResultSetMetadata may incorrectly set a result column as not nullable | ||
*/ | ||
public class LegendArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable | ||
{ | ||
|
||
private final ResultSet resultSet; | ||
private final JdbcToArrowConfig config; | ||
|
||
private final Schema schema; | ||
private final ResultSetMetaData rsmd; | ||
|
||
private final JdbcConsumer[] consumers; | ||
final CompositeJdbcConsumer compositeConsumer; | ||
|
||
// this is used only if resuing vector schema root is enabled. | ||
private VectorSchemaRoot nextBatch; | ||
|
||
private final int targetBatchSize; | ||
|
||
// This is used to track whether the ResultSet has been fully read, and is needed spcifically for cases where there | ||
// is a ResultSet having zero rows (empty): | ||
private boolean readComplete = false; | ||
|
||
/** | ||
* Construct an instance. | ||
*/ | ||
private LegendArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException | ||
{ | ||
this.resultSet = resultSet; | ||
this.config = config; | ||
this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config); | ||
this.targetBatchSize = config.getTargetBatchSize(); | ||
|
||
rsmd = resultSet.getMetaData(); | ||
consumers = new JdbcConsumer[rsmd.getColumnCount()]; | ||
this.compositeConsumer = new CompositeJdbcConsumer(consumers); | ||
this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; | ||
} | ||
|
||
/** | ||
* Create a ArrowVectorIterator to partially convert data. | ||
*/ | ||
public static LegendArrowVectorIterator create( | ||
ResultSet resultSet, | ||
JdbcToArrowConfig config) | ||
throws SQLException | ||
{ | ||
LegendArrowVectorIterator iterator = null; | ||
try | ||
{ | ||
iterator = new LegendArrowVectorIterator(resultSet, config); | ||
} | ||
catch (Throwable e) | ||
{ | ||
AutoCloseables.close(e, iterator); | ||
throw new RuntimeException("Error occurred while creating iterator.", e); | ||
} | ||
return iterator; | ||
} | ||
|
||
private void consumeData(VectorSchemaRoot root) | ||
{ | ||
// consume data | ||
try | ||
{ | ||
int readRowCount = 0; | ||
if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) | ||
{ | ||
while (resultSet.next()) | ||
{ | ||
ValueVectorUtility.ensureCapacity(root, readRowCount + 1); | ||
compositeConsumer.consume(resultSet); | ||
readRowCount++; | ||
} | ||
readComplete = true; | ||
} | ||
else | ||
{ | ||
while ((readRowCount < targetBatchSize) && !readComplete) | ||
{ | ||
if (resultSet.next()) | ||
{ | ||
compositeConsumer.consume(resultSet); | ||
readRowCount++; | ||
} | ||
else | ||
{ | ||
readComplete = true; | ||
} | ||
} | ||
} | ||
|
||
root.setRowCount(readRowCount); | ||
} | ||
catch (Throwable e) | ||
{ | ||
compositeConsumer.close(); | ||
throw new RuntimeException("Error occurred while consuming data.", e); | ||
} | ||
} | ||
|
||
private VectorSchemaRoot createVectorSchemaRoot() throws SQLException | ||
{ | ||
VectorSchemaRoot root = null; | ||
try | ||
{ | ||
root = VectorSchemaRoot.create(schema, config.getAllocator()); | ||
if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) | ||
{ | ||
ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); | ||
} | ||
} | ||
catch (Throwable e) | ||
{ | ||
if (root != null) | ||
{ | ||
root.close(); | ||
} | ||
throw new RuntimeException("Error occurred while creating schema root.", e); | ||
} | ||
initialize(root); | ||
return root; | ||
} | ||
|
||
private void initialize(VectorSchemaRoot root) throws SQLException | ||
{ | ||
for (int i = 1; i <= consumers.length; i++) | ||
{ | ||
final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); | ||
ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); | ||
consumers[i - 1] = JdbcToArrowUtils.getConsumer( | ||
arrowType, i, true, root.getVector(i - 1), config); | ||
} | ||
} | ||
|
||
// Loads the next schema root or null if no more rows are available. | ||
private void load(VectorSchemaRoot root) | ||
{ | ||
for (int i = 0; i < consumers.length; i++) | ||
{ | ||
FieldVector vec = root.getVector(i); | ||
if (config.isReuseVectorSchemaRoot()) | ||
{ | ||
// if we are reusing the vector schema root, | ||
// we must reset the vector before populating it with data. | ||
vec.reset(); | ||
} | ||
consumers[i].resetValueVector(vec); | ||
} | ||
|
||
consumeData(root); | ||
} | ||
|
||
@Override | ||
public boolean hasNext() | ||
{ | ||
return !readComplete; | ||
} | ||
|
||
/** | ||
* Gets the next vector. | ||
* If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, | ||
* the client is responsible for freeing its resources. | ||
*/ | ||
@Override | ||
public VectorSchemaRoot next() | ||
{ | ||
Preconditions.checkArgument(hasNext()); | ||
try | ||
{ | ||
VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); | ||
load(ret); | ||
return ret; | ||
} | ||
catch (Exception e) | ||
{ | ||
close(); | ||
throw new RuntimeException("Error occurred while getting next schema root.", e); | ||
} | ||
} | ||
|
||
/** | ||
* Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a new VectorSchemaRoot | ||
* is created for each batch, each root must be closed manually by the client code. | ||
*/ | ||
@Override | ||
public void close() | ||
{ | ||
if (config.isReuseVectorSchemaRoot()) | ||
{ | ||
nextBatch.close(); | ||
compositeConsumer.close(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.