From f0c8e8b2bdf68d324e3fcc1d30e255f810707dec Mon Sep 17 00:00:00 2001 From: Mike Hurley Date: Fri, 3 Jun 2016 15:39:33 -0500 Subject: [PATCH 1/4] AVRO-1858: added --head option to the tojson operation --- .../apache/avro/tool/DataFileReadTool.java | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java index fcc89caf0b8..a9690815932 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java @@ -20,6 +20,7 @@ import java.io.BufferedInputStream; import java.io.InputStream; import java.io.PrintStream; +import java.util.ArrayList; import java.util.List; import joptsimple.OptionParser; @@ -36,6 +37,7 @@ /** Reads a data file and dumps to JSON */ public class DataFileReadTool implements Tool { + private static final int DEFAULT_HEAD_COUNT = 10; @Override public String getName() { @@ -53,10 +55,14 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, OptionParser optionParser = new OptionParser(); OptionSpec prettyOption = optionParser .accepts("pretty", "Turns on pretty printing."); + String headDesc = String.format("Converts the first X records (default is %d).", DEFAULT_HEAD_COUNT); + OptionSpec headOption = optionParser.accepts("head", headDesc).withOptionalArg(); OptionSet optionSet = optionParser.parse(args.toArray(new String[0])); Boolean pretty = optionSet.has(prettyOption); - List nargs = (List)optionSet.nonOptionArguments(); + List nargs = new ArrayList((List)optionSet.nonOptionArguments()); + + int headCount = getHeadCount(optionSet, headOption, nargs); if (nargs.size() != 1) { printHelp(err); @@ -73,8 +79,12 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, Schema schema = streamReader.getSchema(); DatumWriter writer = new GenericDatumWriter(schema); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty); - for (Object datum : streamReader) + int recordCount = 0; + for (Object datum : streamReader) { writer.write(datum, encoder); + recordCount++; + if(recordCount == headCount) break; + } encoder.flush(); out.println(); out.flush(); @@ -84,8 +94,27 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, return 0; } + private static int getHeadCount(OptionSet optionSet, OptionSpec headOption, List nargs) { + int headCount = Integer.MAX_VALUE; + if(optionSet.has(headOption)) { + headCount = DEFAULT_HEAD_COUNT; + List headValues = optionSet.valuesOf(headOption); + if(headValues.size() > 0) { + // if the value parses to int, assume it's meant to go with --head + // otherwise assume it was an optionSet.nonOptionArgument and add back to the list + // TODO: support input filenames whose whole path+name is int parsable? + try { + headCount = Integer.parseInt(headValues.get(0)); + } catch(NumberFormatException ex) { + nargs.addAll(headValues); + } + } + } + return headCount; + } + private void printHelp(PrintStream ps) { - ps.println("tojson --pretty input-file"); + ps.println("tojson [--pretty] [--head[=X]] input-file"); ps.println(); ps.println(getShortDescription()); ps.println("A dash ('-') can be given as an input file to use stdin"); From fa7dc3aaa260ab6b89095f890ab081aa31beae43 Mon Sep 17 00:00:00 2001 From: Mike Hurley Date: Mon, 6 Jun 2016 09:46:53 -0500 Subject: [PATCH 2/4] AVRO-1858: added unit tests for tojson --head option --- .../apache/avro/tool/TestDataFileTools.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java index 0270b713fcf..e538e453d1d 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java @@ -47,7 +47,7 @@ @SuppressWarnings("deprecation") public class TestDataFileTools { - static final int COUNT = 10; + static final int COUNT = 15; static File sampleFile; static String jsonData; static Schema schema; @@ -117,6 +117,27 @@ public void testReadToJsonPretty() throws Exception { run(new DataFileReadTool(), "--pretty", sampleFile.getPath())); } + @Test + public void testReadHeadDefaultCount() throws Exception { + String expectedJson = jsonData.substring(0, 20); // first 10 numbers + assertEquals(expectedJson, + run(new DataFileReadTool(), "--head", sampleFile.getPath())); + } + + @Test + public void testReadHeadEquals3Count() throws Exception { + String expectedJson = jsonData.substring(0, 6); // first 3 numbers + assertEquals(expectedJson, + run(new DataFileReadTool(), "--head=3", sampleFile.getPath())); + } + + @Test + public void testReadHeadSpace5Count() throws Exception { + String expectedJson = jsonData.substring(0, 10); // first 5 numbers + assertEquals(expectedJson, + run(new DataFileReadTool(), "--head", "5", sampleFile.getPath())); + } + @Test public void testGetMeta() throws Exception { String output = run(new DataFileGetMetaTool(), sampleFile.getPath()); From 7d07f54758a9230795ced66cef968ef696ee3d8c Mon Sep 17 00:00:00 2001 From: Mike Hurley Date: Mon, 6 Jun 2016 17:11:41 -0500 Subject: [PATCH 3/4] AVRO-1858: head input and record counters are now longs --- .../java/org/apache/avro/tool/DataFileReadTool.java | 12 ++++++------ .../java/org/apache/avro/tool/TestDataFileTools.java | 6 ++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java index a9690815932..d7c9f731c15 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java @@ -37,7 +37,7 @@ /** Reads a data file and dumps to JSON */ public class DataFileReadTool implements Tool { - private static final int DEFAULT_HEAD_COUNT = 10; + private static final long DEFAULT_HEAD_COUNT = 10; @Override public String getName() { @@ -62,7 +62,7 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, Boolean pretty = optionSet.has(prettyOption); List nargs = new ArrayList((List)optionSet.nonOptionArguments()); - int headCount = getHeadCount(optionSet, headOption, nargs); + long headCount = getHeadCount(optionSet, headOption, nargs); if (nargs.size() != 1) { printHelp(err); @@ -79,7 +79,7 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, Schema schema = streamReader.getSchema(); DatumWriter writer = new GenericDatumWriter(schema); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty); - int recordCount = 0; + long recordCount = 0; for (Object datum : streamReader) { writer.write(datum, encoder); recordCount++; @@ -94,8 +94,8 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, return 0; } - private static int getHeadCount(OptionSet optionSet, OptionSpec headOption, List nargs) { - int headCount = Integer.MAX_VALUE; + private static long getHeadCount(OptionSet optionSet, OptionSpec headOption, List nargs) { + long headCount = Long.MAX_VALUE; if(optionSet.has(headOption)) { headCount = DEFAULT_HEAD_COUNT; List headValues = optionSet.valuesOf(headOption); @@ -104,7 +104,7 @@ private static int getHeadCount(OptionSet optionSet, OptionSpec headOpti // otherwise assume it was an optionSet.nonOptionArgument and add back to the list // TODO: support input filenames whose whole path+name is int parsable? try { - headCount = Integer.parseInt(headValues.get(0)); + headCount = Long.parseLong(headValues.get(0)); } catch(NumberFormatException ex) { nargs.addAll(headValues); } diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java index e538e453d1d..7efa5afa9fd 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java @@ -138,6 +138,12 @@ public void testReadHeadSpace5Count() throws Exception { run(new DataFileReadTool(), "--head", "5", sampleFile.getPath())); } + @Test + public void testReadHeadLongCount() throws Exception { + assertEquals(jsonData, + run(new DataFileReadTool(), "--head=3000000000", sampleFile.getPath())); + } + @Test public void testGetMeta() throws Exception { String output = run(new DataFileGetMetaTool(), sampleFile.getPath()); From 8b379080fddb60394c40bf8e1b8e6ad721ace325 Mon Sep 17 00:00:00 2001 From: Mike Hurley Date: Tue, 5 Jul 2016 10:18:55 -0500 Subject: [PATCH 4/4] AVRO-1858: added tojson --head tests for zero and negative values. Negative head count is now an error. --- .../java/org/apache/avro/tool/DataFileReadTool.java | 8 ++++---- .../java/org/apache/avro/tool/TestDataFileTools.java | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java index d7c9f731c15..79625e3cdce 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java @@ -27,6 +27,7 @@ import joptsimple.OptionSet; import joptsimple.OptionSpec; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; import org.apache.avro.file.DataFileStream; import org.apache.avro.io.DatumWriter; @@ -79,11 +80,9 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, Schema schema = streamReader.getSchema(); DatumWriter writer = new GenericDatumWriter(schema); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty); - long recordCount = 0; - for (Object datum : streamReader) { + for(long recordCount = 0; streamReader.hasNext() && recordCount < headCount; recordCount++) { + Object datum = streamReader.next(); writer.write(datum, encoder); - recordCount++; - if(recordCount == headCount) break; } encoder.flush(); out.println(); @@ -105,6 +104,7 @@ private static long getHeadCount(OptionSet optionSet, OptionSpec headOpt // TODO: support input filenames whose whole path+name is int parsable? try { headCount = Long.parseLong(headValues.get(0)); + if(headCount < 0) throw new AvroRuntimeException("--head count must not be negative"); } catch(NumberFormatException ex) { nargs.addAll(headValues); } diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java index 7efa5afa9fd..473ac2d4d7f 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java @@ -35,6 +35,7 @@ import java.util.Collections; import java.util.List; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTestUtil; import org.apache.avro.Schema; import org.apache.avro.Schema.Type; @@ -144,6 +145,16 @@ public void testReadHeadLongCount() throws Exception { run(new DataFileReadTool(), "--head=3000000000", sampleFile.getPath())); } + @Test + public void testReadHeadEqualsZeroCount() throws Exception { + assertEquals("\n", run(new DataFileReadTool(), "--head=0", sampleFile.getPath())); + } + + @Test(expected = AvroRuntimeException.class) + public void testReadHeadNegativeCount() throws Exception { + assertEquals("\n", run(new DataFileReadTool(), "--head=-5", sampleFile.getPath())); + } + @Test public void testGetMeta() throws Exception { String output = run(new DataFileGetMetaTool(), sampleFile.getPath());