Skip to content

Commit

Permalink
Addressing Owens comments and integrating proposed changes
Browse files Browse the repository at this point in the history
Change-Id: I631a8c6b2efde8ee7a07591bdeae6e12ac3e9eb3
  • Loading branch information
Panos Garefalakis committed Apr 20, 2020
1 parent 34565b4 commit d4ce960
Show file tree
Hide file tree
Showing 14 changed files with 324 additions and 388 deletions.
4 changes: 0 additions & 4 deletions java/bench/core/pom.xml
Expand Up @@ -112,10 +112,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down
32 changes: 27 additions & 5 deletions java/core/src/java/org/apache/orc/Reader.java
Expand Up @@ -189,7 +189,7 @@ class Options implements Cloneable {
private Boolean useZeroCopy = null;
private Boolean skipCorruptRecords = null;
private TypeDescription schema = null;
private String[] skipRowColumns = null;
private String[] preFilterColumns = null;
Consumer<VectorizedRowBatch> skipRowCallback = null;
private DataReader dataReader = null;
private Boolean tolerateMissingSchema = null;
Expand Down Expand Up @@ -241,8 +241,30 @@ public Options schema(TypeDescription schema) {
return this;
}

public Options setFilter(String[] filterColumnNames, Consumer<VectorizedRowBatch> filterCallback) {
this.skipRowColumns = filterColumnNames;
/**
* Set a row level filter.
* This is an advanced feature that allows the caller to specify
* a list of columns that are read first and then a filter that
* is called to determine which rows if any should be read.
*
* User should expect the batches that come from the reader
* to use the selected array set by their filter.
*
* Use cases for this are predicates that SearchArgs can't represent,
* such as relationships between columns (eg. columnA == columnB).
* @param filterColumnNames a comma separated list of the column names that
* are read before the filter is applied. Only top
* level columns in the reader's schema can be used
* here and they must not be duplicated.
* @param filterCallback a function callback to perform filtering during the call to
* RecordReader.nextBatch. This function should not reference
* any static fields nor modify the passed in ColumnVectors but
* should set the filter output using the selected array.
*
* @return this
*/
public Options setRowFilter(String[] filterColumnNames, Consumer<VectorizedRowBatch> filterCallback) {
this.preFilterColumns = filterColumnNames;
this.skipRowCallback = filterCallback;
return this;
}
Expand Down Expand Up @@ -350,8 +372,8 @@ public Consumer<VectorizedRowBatch> getFilterCallback() {
return skipRowCallback;
}

public String[] getFilterColumnNames(){
return skipRowColumns;
public String[] getPreFilterColumnNames(){
return preFilterColumns;
}

public String[] getColumnNames() {
Expand Down
10 changes: 5 additions & 5 deletions java/core/src/java/org/apache/orc/impl/BitFieldReader.java
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand All @@ -20,16 +20,16 @@
import java.io.EOFException;
import java.io.IOException;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.io.filter.FilterContext;

public final class BitFieldReader {
private final RunLengthByteReader input;
private int current;
private byte currentIdx = 8;

public BitFieldReader(InStream input, IOUtils ioUtilsInstance) {
this.input = new RunLengthByteReader(input, ioUtilsInstance);
public BitFieldReader(InStream input) {
this.input = new RunLengthByteReader(input);
}

private void readByte() throws IOException {
Expand All @@ -52,7 +52,7 @@ public int next() throws IOException {
}

public void nextVector(LongColumnVector previous,
TreeReaderFactory.FilterContext filterContext,
FilterContext filterContext,
long previousLen) throws IOException {
previous.isRepeating = false;
int previousIdx = 0;
Expand Down

0 comments on commit d4ce960

Please sign in to comment.