From 0926be039c107f2445589e84d6920bd57a53cc1d Mon Sep 17 00:00:00 2001 From: Jacob Tolar Date: Wed, 7 Nov 2018 09:53:51 -0600 Subject: [PATCH] [AVRO-2245] Improve java tests for compression codecs --- .../org/apache/avro/file/TestAllCodecs.java | 96 +++++++++++++++++++ .../org/apache/avro/file/TestBZip2Codec.java | 66 ------------- .../apache/avro/file/TestZstandardCodec.java | 62 ------------ 3 files changed, 96 insertions(+), 128 deletions(-) create mode 100644 lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java delete mode 100644 lang/java/avro/src/test/java/org/apache/avro/file/TestBZip2Codec.java delete mode 100644 lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java new file mode 100644 index 00000000000..0e531b7a598 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.file; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collection; + +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class TestAllCodecs { + + @Parameterized.Parameters(name = "{index}: codec={0}") + public static Collection data() { + return Arrays.asList(new Object[][] { + { "bzip2", BZip2Codec.class }, + { "zstandard", ZstandardCodec.class }, + { "null", NullCodec.class }, + { "xz", XZCodec.class }, + { "snappy", SnappyCodec.class }, + { "deflate", DeflateCodec.class }, + }); + } + + @Parameterized.Parameter(0) + public String codec; + + @Parameterized.Parameter(1) + public Class codecClass; + + + @Test + public void testCodec() throws IOException { + int inputSize = 500_000; + + byte[] input = generateTestData(inputSize); + + Codec codecInstance = CodecFactory.fromString(codec).createInstance(); + assertTrue(codecClass.isInstance(codecInstance)); + assertTrue(codecInstance.getName().equals(codec)); + + ByteBuffer inputByteBuffer = ByteBuffer.wrap(input); + ByteBuffer compressedBuffer = codecInstance.compress(inputByteBuffer); + + int compressedSize = compressedBuffer.remaining(); + + // Make sure something returned + assertTrue(compressedSize > 0); + + // While the compressed size could in many real cases + // *increase* compared to the input size, our input data + // is extremely easy to compress and all Avro's compression algorithms + // should have a compression ratio greater than 1 (except 'null'). + assertTrue(compressedSize < inputSize || codec.equals("null")); + + // Decompress the data + ByteBuffer decompressedBuffer = codecInstance.decompress(compressedBuffer); + + // Validate the the input and output are equal. + inputByteBuffer.rewind(); + Assert.assertEquals(decompressedBuffer, inputByteBuffer); + } + + // Generate some test data that will compress easily + public static byte[] generateTestData(int inputSize) { + byte[] arr = new byte[inputSize]; + for (int i = 0; i < arr.length; i++) { + arr[i] = (byte) (65 + i % 10); + } + + return arr; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestBZip2Codec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestBZip2Codec.java deleted file mode 100644 index dfa95afa113..00000000000 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestBZip2Codec.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.file; - -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.junit.Test; - -public class TestBZip2Codec { - - @Test - public void testBZip2CompressionAndDecompression() throws IOException { - Codec codec = CodecFactory.fromString("bzip2").createInstance(); - assertTrue(codec instanceof BZip2Codec); - assertTrue(codec.getName().equals("bzip2")); - - //This is 3 times the byte buffer on the BZip2 decompress plus some extra - final int inputByteSize = BZip2Codec.DEFAULT_BUFFER_SIZE * 3 + 42; - - byte[] inputByteArray = new byte[inputByteSize]; - - //Generate something that will compress well - for (int i = 0; i < inputByteSize; i++) { - inputByteArray[i] = (byte)(65 + i % 10); - } - - ByteBuffer inputByteBuffer = ByteBuffer.allocate(inputByteSize * 2); - inputByteBuffer.put(inputByteArray); - - ByteBuffer compressedBuffer = codec.compress(inputByteBuffer); - - //Make sure something returned - assertTrue(compressedBuffer.array().length > 0); - //Make sure the compressed output is smaller then the original - assertTrue(compressedBuffer.array().length < inputByteArray.length); - - ByteBuffer decompressedBuffer = codec.decompress(compressedBuffer); - - //The original array should be the same length as the decompressed array - assertTrue(decompressedBuffer.array().length == inputByteArray.length); - - //Every byte in the outputByteArray should equal every byte in the input array - byte[] outputByteArray = decompressedBuffer.array(); - for (int i = 0; i < inputByteSize; i++) { - inputByteArray[i] = outputByteArray[i]; - } - } -} diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java deleted file mode 100644 index 62726f748a6..00000000000 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.file; - -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.junit.Test; - -public class TestZstandardCodec { - - @Test - public void testZstandardCompressionAndDecompression() throws IOException { - Codec codec = CodecFactory.zstandardCodec().createInstance(); - assertTrue(codec instanceof ZstandardCodec); - assertTrue(codec.getName().equals("zstandard")); - - // Generate some random input. - final int inputByteSize = 64 * 1024; - byte[] inputByteArray = new byte[inputByteSize]; - for (int i = 0; i < inputByteSize; i++) { - inputByteArray[i] = (byte)(65 + i % 10); - } - - ByteBuffer inputByteBuffer = ByteBuffer.allocate(inputByteSize * 2); - inputByteBuffer.put(inputByteArray); - - ByteBuffer compressedBuffer = codec.compress(inputByteBuffer); - //Make sure something returned - assertTrue(compressedBuffer.array().length > 0); - //Make sure the compressed output is smaller then the original - assertTrue(compressedBuffer.array().length < inputByteArray.length); - - ByteBuffer decompressedBuffer = codec.decompress(compressedBuffer); - - //The original array should be the same length as the decompressed array - assertTrue(decompressedBuffer.array().length == inputByteArray.length); - - //Every byte in the outputByteArray should equal every byte in the input array - byte[] outputByteArray = decompressedBuffer.array(); - for (int i = 0; i < inputByteSize; i++) { - inputByteArray[i] = outputByteArray[i]; - } - } -}