Skip to content

Commit

Permalink
Re-implement ORCRecordReader
Browse files Browse the repository at this point in the history
- Support batch read
- Support most value types
- Support read only required fields
- Enhance tests to cover most value types
  • Loading branch information
Jackie-Jiang committed Apr 17, 2020
1 parent e312a93 commit 5a80fad
Show file tree
Hide file tree
Showing 11 changed files with 501 additions and 449 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.commons.lang.StringUtils;


// TODO: Use pinot-spi StringUtils instead
public class StringUtil {
private static final char NULL_CHARACTER = '\0';
private static final String charSet = "UTF-8";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,18 @@
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.StringUtils;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.AbstractRecordExtractorTest;
import org.apache.pinot.spi.data.readers.AbstractRecordReaderTest;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.data.readers.RecordReader;
import org.testng.Assert;


/**
* Tests the {@link CSVRecordExtractor} using a schema containing groovy transform functions
*/
Expand Down Expand Up @@ -86,7 +84,7 @@ protected void checkValue(Map<String, Object> inputRecord, GenericRow genericRow
Object expectedValue = entry.getValue();
Object actualValue = genericRow.getValue(columnName);
if (expectedValue instanceof Collection) {
List expectedArray = (ArrayList) expectedValue;
List expectedArray = (List) expectedValue;
if (expectedArray.size() == 1) {
// in CSV, cannot differentiate between array with single element vs actual single element
Assert.assertEquals(actualValue, String.valueOf(expectedArray.get(0)));
Expand Down
28 changes: 6 additions & 22 deletions pinot-plugins/pinot-input-format/pinot-orc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,14 @@
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-mapreduce</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</exclusion>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
</exclusions>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</dependency>
</dependencies>
</project>

This file was deleted.

This file was deleted.

0 comments on commit 5a80fad

Please sign in to comment.