From c49a9ae7e44a9caff61deab62487d92ac5c401cb Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 25 Apr 2026 11:08:12 -0700 Subject: [PATCH 1/3] HDDS-15115. CrcUtil/CrcComposer should not throw IOException for non-IO. --- .../ozone/client/checksum/CrcComposer.java | 24 +- .../hadoop/ozone/client/checksum/CrcUtil.java | 46 +--- .../client/checksum/TestCrcComposer.java | 235 ++++++++++++++++++ .../ozone/client/checksum/TestCrcUtil.java | 84 +++++++ 4 files changed, 340 insertions(+), 49 deletions(-) create mode 100644 hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java create mode 100644 hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java index bda93e3e3a47..f4ece694aba6 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java @@ -45,7 +45,7 @@ public class CrcComposer { private int curCompositeCrc = 0; private long curPositionInStripe = 0; - private ByteArrayOutputStream digestOut = new ByteArrayOutputStream(); + private final ByteArrayOutputStream digestOut = new ByteArrayOutputStream(); /** * Returns a CrcComposer which will collapse all ingested CRCs into a single @@ -53,12 +53,10 @@ public class CrcComposer { * * @param type type. * @param bytesPerCrcHint bytesPerCrcHint. - * @throws IOException raised on errors performing I/O. * @return a CrcComposer which will collapse all ingested CRCs into a single value. */ public static CrcComposer newCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint) - throws IOException { + DataChecksum.Type type, long bytesPerCrcHint) { return newStripedCrcComposer(type, bytesPerCrcHint, Long.MAX_VALUE); } @@ -77,12 +75,10 @@ public static CrcComposer newCrcComposer( * @param stripeLength stripeLength. * @return a CrcComposer which will collapse CRCs for every combined. * underlying data size which aligns with the specified stripe boundary. - * @throws IOException raised on errors performing I/O. */ public static CrcComposer newStripedCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) - throws IOException { - int polynomial = org.apache.hadoop.ozone.client.checksum.CrcUtil.getCrcPolynomialForType(type); + DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { + int polynomial = CrcUtil.getCrcPolynomialForType(type); return new CrcComposer( polynomial, org.apache.hadoop.ozone.client.checksum.CrcUtil.getMonomial(bytesPerCrcHint, polynomial), @@ -117,13 +113,10 @@ public static CrcComposer newStripedCrcComposer( * @param offset offset. * @param length must be a multiple of the expected byte-size of a CRC. * @param bytesPerCrc bytesPerCrc. - * @throws IOException raised on errors performing I/O. */ - public void update( - byte[] crcBuffer, int offset, int length, long bytesPerCrc) - throws IOException { + public void update(byte[] crcBuffer, int offset, int length, long bytesPerCrc) { if (length % CRC_SIZE_BYTES != 0) { - throw new IOException(String.format( + throw new IllegalArgumentException(String.format( "Trying to update CRC from byte array with length '%d' at offset " + "'%d' which is not a multiple of %d!", length, offset, CRC_SIZE_BYTES)); @@ -161,9 +154,8 @@ public void update( * * @param crcB crcB. * @param bytesPerCrc bytesPerCrc. - * @throws IOException raised on errors performing I/O. */ - public void update(int crcB, long bytesPerCrc) throws IOException { + public void update(int crcB, long bytesPerCrc) { if (curCompositeCrc == 0) { curCompositeCrc = crcB; } else if (bytesPerCrc == bytesPerCrcHint) { @@ -177,7 +169,7 @@ public void update(int crcB, long bytesPerCrc) throws IOException { curPositionInStripe += bytesPerCrc; if (curPositionInStripe > stripeLength) { - throw new IOException(String.format( + throw new IllegalStateException(String.format( "Current position in stripe '%d' after advancing by bytesPerCrc '%d' " + "exceeds stripeLength '%d' without stripe alignment.", curPositionInStripe, bytesPerCrc, stripeLength)); diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcUtil.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcUtil.java index ae3533b1a9c7..96eaeabeffff 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcUtil.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcUtil.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.client.checksum; -import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.annotation.InterfaceStability; @@ -45,17 +44,15 @@ private CrcUtil() { * @param type type. * @return the int representation of the polynomial associated with the * CRC {@code type}, suitable for use with further CRC arithmetic. - * @throws IOException if there is no CRC polynomial applicable - * to the given {@code type}. */ - public static int getCrcPolynomialForType(DataChecksum.Type type) throws IOException { + public static int getCrcPolynomialForType(DataChecksum.Type type) { switch (type) { case CRC32: return GZIP_POLYNOMIAL; case CRC32C: return CASTAGNOLI_POLYNOMIAL; default: - throw new IOException( + throw new IllegalArgumentException( "No CRC polynomial could be associated with type: " + type); } } @@ -132,15 +129,7 @@ public static int compose(int crcA, int crcB, long lengthB, int mod) { */ public static byte[] intToBytes(int value) { byte[] buf = new byte[4]; - try { - writeInt(buf, 0, value); - } catch (IOException ioe) { - // Since this should only be able to occur from code bugs within this - // class rather than user input, we throw as a RuntimeException - // rather than requiring this method to declare throwing IOException - // for something the caller can't control. - throw new RuntimeException(ioe); - } + writeInt(buf, 0, value); return buf; } @@ -152,16 +141,14 @@ public static byte[] intToBytes(int value) { * @param buf buf size. * @param offset offset. * @param value value. - * @throws IOException raised on errors performing I/O. */ - public static void writeInt(byte[] buf, int offset, int value) - throws IOException { + public static void writeInt(byte[] buf, int offset, int value) { if (offset + 4 > buf.length) { - throw new IOException(String.format( + throw new ArrayIndexOutOfBoundsException(String.format( "writeInt out of bounds: buf.length=%d, offset=%d", buf.length, offset)); } - buf[offset + 0] = (byte)((value >>> 24) & 0xff); + buf[offset ] = (byte)((value >>> 24) & 0xff); buf[offset + 1] = (byte)((value >>> 16) & 0xff); buf[offset + 2] = (byte)((value >>> 8) & 0xff); buf[offset + 3] = (byte)(value & 0xff); @@ -174,20 +161,17 @@ public static void writeInt(byte[] buf, int offset, int value) * @param offset offset. * @param buf buf. * @return int. - * @throws IOException raised on errors performing I/O. */ - public static int readInt(byte[] buf, int offset) - throws IOException { + public static int readInt(byte[] buf, int offset) { if (offset + 4 > buf.length) { - throw new IOException(String.format( + throw new ArrayIndexOutOfBoundsException(String.format( "readInt out of bounds: buf.length=%d, offset=%d", buf.length, offset)); } - int value = ((buf[offset + 0] & 0xff) << 24) | + return ((buf[offset ] & 0xff) << 24) | ((buf[offset + 1] & 0xff) << 16) | ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff)); - return value; } /** @@ -196,13 +180,11 @@ public static int readInt(byte[] buf, int offset) * formatted value. * * @param bytes bytes. - * @throws IOException raised on errors performing I/O. * @return a list of hex formatted values. */ - public static String toSingleCrcString(final byte[] bytes) - throws IOException { + public static String toSingleCrcString(final byte[] bytes) { if (bytes.length != 4) { - throw new IOException((String.format( + throw new IllegalArgumentException((String.format( "Unexpected byte[] length '%d' for single CRC. Contents: %s", bytes.length, Arrays.toString(bytes)))); } @@ -215,13 +197,11 @@ public static String toSingleCrcString(final byte[] bytes) * hex formatted values. * * @param bytes bytes. - * @throws IOException raised on errors performing I/O. * @return a list of hex formatted values. */ - public static String toMultiCrcString(final byte[] bytes) - throws IOException { + public static String toMultiCrcString(final byte[] bytes) { if (bytes.length % 4 != 0) { - throw new IOException((String.format( + throw new IllegalArgumentException((String.format( "Unexpected byte[] length '%d' not divisible by 4. Contents: %s", bytes.length, Arrays.toString(bytes)))); } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java new file mode 100644 index 000000000000..9c846c3c9574 --- /dev/null +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.checksum; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.Objects; +import java.util.Random; +import org.apache.hadoop.util.DataChecksum; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** Test {@link CrcComposer}. */ +@Timeout(10) +public class TestCrcComposer { + + private final Random rand = new Random(1234); + + private final DataChecksum.Type type = DataChecksum.Type.CRC32C; + private final DataChecksum checksum = Objects.requireNonNull( + DataChecksum.newDataChecksum(type, Integer.MAX_VALUE)); + private final int dataSize = 75; + private final byte[] data = new byte[dataSize]; + private final int chunkSize = 10; + private final int cellSize = 20; + + private int fullCrc; + private int[] crcsByChunk; + + private byte[] crcBytesByChunk; + private byte[] crcBytesByCell; + + @BeforeEach + public void setup() { + rand.nextBytes(data); + fullCrc = getRangeChecksum(data, 0, dataSize); + + // 7 chunks of size chunkSize, 1 chunk of size (dataSize % chunkSize). + crcsByChunk = new int[8]; + for (int i = 0; i < 7; ++i) { + crcsByChunk[i] = getRangeChecksum(data, i * chunkSize, chunkSize); + } + crcsByChunk[7] = getRangeChecksum( + data, (crcsByChunk.length - 1) * chunkSize, dataSize % chunkSize); + + // 3 cells of size cellSize, 1 cell of size (dataSize % cellSize). + int[] crcsByCell = new int[4]; + for (int i = 0; i < 3; ++i) { + crcsByCell[i] = getRangeChecksum(data, i * cellSize, cellSize); + } + crcsByCell[3] = getRangeChecksum( + data, (crcsByCell.length - 1) * cellSize, dataSize % cellSize); + + crcBytesByChunk = intArrayToByteArray(crcsByChunk); + crcBytesByCell = intArrayToByteArray(crcsByCell); + } + + private int getRangeChecksum(byte[] buf, int offset, int length) { + checksum.reset(); + checksum.update(buf, offset, length); + return (int) checksum.getValue(); + } + + private byte[] intArrayToByteArray(int[] values) { + byte[] bytes = new byte[values.length * 4]; + for (int i = 0; i < values.length; ++i) { + CrcUtil.writeInt(bytes, i * 4, values[i]); + } + return bytes; + } + + @Test + public void testUnstripedIncorrectChunkSize() { + CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + + // If we incorrectly specify that all CRCs ingested correspond to chunkSize + // when the last CRC in the array actually corresponds to + // dataSize % chunkSize then we expect the resulting CRC to not be equal to + // the fullCrc. + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length, chunkSize); + byte[] digest = digester.digest(); + assertEquals(4, digest.length); + int calculatedCrc = CrcUtil.readInt(digest, 0); + assertNotEquals(fullCrc, calculatedCrc); + } + + @Test + public void testUnstripedByteArray() { + CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); + digester.update(crcBytesByChunk, crcBytesByChunk.length - 4, 4, dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertEquals(4, digest.length); + int calculatedCrc = CrcUtil.readInt(digest, 0); + assertEquals(fullCrc, calculatedCrc); + } + + @Test + public void testUnstripedDataInputStream() throws IOException { + CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); + digester.update(input, crcsByChunk.length - 1, chunkSize); + digester.update(input, 1, dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertEquals(4, digest.length); + int calculatedCrc = CrcUtil.readInt(digest, 0); + assertEquals(fullCrc, calculatedCrc); + } + + @Test + public void testUnstripedSingleCrcs() { + CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + for (int i = 0; i < crcsByChunk.length - 1; ++i) { + digester.update(crcsByChunk[i], chunkSize); + } + digester.update(crcsByChunk[crcsByChunk.length - 1], dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertEquals(4, digest.length); + int calculatedCrc = CrcUtil.readInt(digest, 0); + assertEquals(fullCrc, calculatedCrc); + } + + @Test + public void testStripedByteArray() { + CrcComposer digester = + CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); + digester.update( + crcBytesByChunk, crcBytesByChunk.length - 4, 4, dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertArrayEquals(crcBytesByCell, digest); + } + + @Test + public void testStripedDataInputStream() throws IOException { + CrcComposer digester = + CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + DataInputStream input = + new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); + digester.update(input, crcsByChunk.length - 1, chunkSize); + digester.update(input, 1, dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertArrayEquals(crcBytesByCell, digest); + } + + @Test + public void testStripedSingleCrcs() { + CrcComposer digester = + CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + for (int i = 0; i < crcsByChunk.length - 1; ++i) { + digester.update(crcsByChunk[i], chunkSize); + } + digester.update(crcsByChunk[crcsByChunk.length - 1], dataSize % chunkSize); + + byte[] digest = digester.digest(); + assertArrayEquals(crcBytesByCell, digest); + } + + @Test + public void testMultiStageMixed() throws IOException { + CrcComposer digester = + CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + + // First combine chunks into cells. + DataInputStream input = + new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); + digester.update(input, crcsByChunk.length - 1, chunkSize); + digester.update(input, 1, dataSize % chunkSize); + byte[] digest = digester.digest(); + + // Second, individually combine cells into full crc. + digester = + CrcComposer.newCrcComposer(type, cellSize); + for (int i = 0; i < digest.length - 4; i += 4) { + int cellCrc = CrcUtil.readInt(digest, i); + digester.update(cellCrc, cellSize); + } + digester.update(digest, digest.length - 4, 4, dataSize % cellSize); + digest = digester.digest(); + assertEquals(4, digest.length); + int calculatedCrc = CrcUtil.readInt(digest, 0); + assertEquals(fullCrc, calculatedCrc); + } + + @Test + public void testUpdateMismatchesStripe() { + CrcComposer digester = + CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + + digester.update(crcsByChunk[0], chunkSize); + + // Going from chunkSize to chunkSize + cellSize will cross a cellSize + // boundary in a single CRC, which is not allowed, since we'd lack a + // CRC corresponding to the actual cellSize boundary. + assertThrows(IllegalStateException.class, + () -> digester.update(crcsByChunk[1], cellSize), + "stripe"); + } + + @Test + public void testUpdateByteArrayLengthUnalignedWithCrcSize() { + CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + + assertThrows(IllegalArgumentException.class, + () -> digester.update(crcBytesByChunk, 0, 6, chunkSize), + "length"); + } +} diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java new file mode 100644 index 000000000000..f6a47cdb95d7 --- /dev/null +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.checksum; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** Test {@link CrcUtil}. */ +@Timeout(10) +public class TestCrcUtil { + @Test + public void testIntSerialization() { + byte[] bytes = CrcUtil.intToBytes(0xCAFEBEEF); + assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0)); + + bytes = new byte[8]; + CrcUtil.writeInt(bytes, 0, 0xCAFEBEEF); + assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0)); + CrcUtil.writeInt(bytes, 4, 0xABCDABCD); + assertEquals(0xABCDABCD, CrcUtil.readInt(bytes, 4)); + + // Assert big-endian format for general Java consistency. + assertEquals(0xBEEFABCD, CrcUtil.readInt(bytes, 2)); + } + + @Test + public void testToSingleCrcStringBadLength() { + assertThrows(IllegalArgumentException.class, + () -> CrcUtil.toSingleCrcString(new byte[8]), + "length"); + } + + @Test + public void testToSingleCrcString() { + byte[] buf = CrcUtil.intToBytes(0xcafebeef); + assertEquals("0xcafebeef", CrcUtil.toSingleCrcString(buf)); + } + + @Test + public void testToMultiCrcStringBadLength() { + assertThrows(IllegalArgumentException.class, + () -> CrcUtil.toMultiCrcString(new byte[6]), + "length"); + } + + @Test + public void testToMultiCrcStringMultipleElements() { + byte[] buf = new byte[12]; + CrcUtil.writeInt(buf, 0, 0xcafebeef); + CrcUtil.writeInt(buf, 4, 0xababcccc); + CrcUtil.writeInt(buf, 8, 0xddddefef); + assertEquals("[0xcafebeef, 0xababcccc, 0xddddefef]", CrcUtil.toMultiCrcString(buf)); + } + + @Test + public void testToMultiCrcStringSingleElement() { + byte[] buf = new byte[4]; + CrcUtil.writeInt(buf, 0, 0xcafebeef); + assertEquals("[0xcafebeef]", CrcUtil.toMultiCrcString(buf)); + } + + @Test + public void testToMultiCrcStringNoElements() { + assertEquals("[]", CrcUtil.toMultiCrcString(new byte[0])); + } +} From d734e0c7f65cabd1e1df2e8684a775b77bbf34f3 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 25 Apr 2026 11:45:39 -0700 Subject: [PATCH 2/3] Fix findbugs --- .../client/checksum/TestCrcComposer.java | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java index 9c846c3c9574..ea241a750af5 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java @@ -35,16 +35,16 @@ /** Test {@link CrcComposer}. */ @Timeout(10) public class TestCrcComposer { + private static final int DATA_SIZE = 75; + private static final int CELL_SIZE = 20; + private static final int CHUNK_SIZE = 10; private final Random rand = new Random(1234); private final DataChecksum.Type type = DataChecksum.Type.CRC32C; private final DataChecksum checksum = Objects.requireNonNull( DataChecksum.newDataChecksum(type, Integer.MAX_VALUE)); - private final int dataSize = 75; - private final byte[] data = new byte[dataSize]; - private final int chunkSize = 10; - private final int cellSize = 20; + private final byte[] data = new byte[DATA_SIZE]; private int fullCrc; private int[] crcsByChunk; @@ -55,23 +55,23 @@ public class TestCrcComposer { @BeforeEach public void setup() { rand.nextBytes(data); - fullCrc = getRangeChecksum(data, 0, dataSize); + fullCrc = getRangeChecksum(data, 0, DATA_SIZE); // 7 chunks of size chunkSize, 1 chunk of size (dataSize % chunkSize). crcsByChunk = new int[8]; for (int i = 0; i < 7; ++i) { - crcsByChunk[i] = getRangeChecksum(data, i * chunkSize, chunkSize); + crcsByChunk[i] = getRangeChecksum(data, i * CHUNK_SIZE, CHUNK_SIZE); } crcsByChunk[7] = getRangeChecksum( - data, (crcsByChunk.length - 1) * chunkSize, dataSize % chunkSize); + data, (crcsByChunk.length - 1) * CHUNK_SIZE, DATA_SIZE % CHUNK_SIZE); // 3 cells of size cellSize, 1 cell of size (dataSize % cellSize). int[] crcsByCell = new int[4]; for (int i = 0; i < 3; ++i) { - crcsByCell[i] = getRangeChecksum(data, i * cellSize, cellSize); + crcsByCell[i] = getRangeChecksum(data, i * CELL_SIZE, CELL_SIZE); } crcsByCell[3] = getRangeChecksum( - data, (crcsByCell.length - 1) * cellSize, dataSize % cellSize); + data, (crcsByCell.length - 1) * CELL_SIZE, DATA_SIZE % CELL_SIZE); crcBytesByChunk = intArrayToByteArray(crcsByChunk); crcBytesByCell = intArrayToByteArray(crcsByCell); @@ -93,13 +93,13 @@ private byte[] intArrayToByteArray(int[] values) { @Test public void testUnstripedIncorrectChunkSize() { - CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); // If we incorrectly specify that all CRCs ingested correspond to chunkSize // when the last CRC in the array actually corresponds to // dataSize % chunkSize then we expect the resulting CRC to not be equal to // the fullCrc. - digester.update(crcBytesByChunk, 0, crcBytesByChunk.length, chunkSize); + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length, CHUNK_SIZE); byte[] digest = digester.digest(); assertEquals(4, digest.length); int calculatedCrc = CrcUtil.readInt(digest, 0); @@ -108,9 +108,9 @@ public void testUnstripedIncorrectChunkSize() { @Test public void testUnstripedByteArray() { - CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); - digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); - digester.update(crcBytesByChunk, crcBytesByChunk.length - 4, 4, dataSize % chunkSize); + CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, CHUNK_SIZE); + digester.update(crcBytesByChunk, crcBytesByChunk.length - 4, 4, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertEquals(4, digest.length); @@ -120,10 +120,10 @@ public void testUnstripedByteArray() { @Test public void testUnstripedDataInputStream() throws IOException { - CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); - digester.update(input, crcsByChunk.length - 1, chunkSize); - digester.update(input, 1, dataSize % chunkSize); + digester.update(input, crcsByChunk.length - 1, CHUNK_SIZE); + digester.update(input, 1, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertEquals(4, digest.length); @@ -133,11 +133,11 @@ public void testUnstripedDataInputStream() throws IOException { @Test public void testUnstripedSingleCrcs() { - CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); for (int i = 0; i < crcsByChunk.length - 1; ++i) { - digester.update(crcsByChunk[i], chunkSize); + digester.update(crcsByChunk[i], CHUNK_SIZE); } - digester.update(crcsByChunk[crcsByChunk.length - 1], dataSize % chunkSize); + digester.update(crcsByChunk[crcsByChunk.length - 1], DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertEquals(4, digest.length); @@ -148,10 +148,10 @@ public void testUnstripedSingleCrcs() { @Test public void testStripedByteArray() { CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); - digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); + CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); + digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, CHUNK_SIZE); digester.update( - crcBytesByChunk, crcBytesByChunk.length - 4, 4, dataSize % chunkSize); + crcBytesByChunk, crcBytesByChunk.length - 4, 4, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertArrayEquals(crcBytesByCell, digest); @@ -160,11 +160,11 @@ public void testStripedByteArray() { @Test public void testStripedDataInputStream() throws IOException { CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); - digester.update(input, crcsByChunk.length - 1, chunkSize); - digester.update(input, 1, dataSize % chunkSize); + digester.update(input, crcsByChunk.length - 1, CHUNK_SIZE); + digester.update(input, 1, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertArrayEquals(crcBytesByCell, digest); @@ -173,11 +173,11 @@ public void testStripedDataInputStream() throws IOException { @Test public void testStripedSingleCrcs() { CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); for (int i = 0; i < crcsByChunk.length - 1; ++i) { - digester.update(crcsByChunk[i], chunkSize); + digester.update(crcsByChunk[i], CHUNK_SIZE); } - digester.update(crcsByChunk[crcsByChunk.length - 1], dataSize % chunkSize); + digester.update(crcsByChunk[crcsByChunk.length - 1], DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertArrayEquals(crcBytesByCell, digest); @@ -186,23 +186,23 @@ public void testStripedSingleCrcs() { @Test public void testMultiStageMixed() throws IOException { CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); // First combine chunks into cells. DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); - digester.update(input, crcsByChunk.length - 1, chunkSize); - digester.update(input, 1, dataSize % chunkSize); + digester.update(input, crcsByChunk.length - 1, CHUNK_SIZE); + digester.update(input, 1, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); // Second, individually combine cells into full crc. digester = - CrcComposer.newCrcComposer(type, cellSize); + CrcComposer.newCrcComposer(type, CELL_SIZE); for (int i = 0; i < digest.length - 4; i += 4) { int cellCrc = CrcUtil.readInt(digest, i); - digester.update(cellCrc, cellSize); + digester.update(cellCrc, CELL_SIZE); } - digester.update(digest, digest.length - 4, 4, dataSize % cellSize); + digester.update(digest, digest.length - 4, 4, DATA_SIZE % CELL_SIZE); digest = digester.digest(); assertEquals(4, digest.length); int calculatedCrc = CrcUtil.readInt(digest, 0); @@ -212,24 +212,24 @@ public void testMultiStageMixed() throws IOException { @Test public void testUpdateMismatchesStripe() { CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); + CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); - digester.update(crcsByChunk[0], chunkSize); + digester.update(crcsByChunk[0], CHUNK_SIZE); // Going from chunkSize to chunkSize + cellSize will cross a cellSize // boundary in a single CRC, which is not allowed, since we'd lack a // CRC corresponding to the actual cellSize boundary. assertThrows(IllegalStateException.class, - () -> digester.update(crcsByChunk[1], cellSize), + () -> digester.update(crcsByChunk[1], CELL_SIZE), "stripe"); } @Test public void testUpdateByteArrayLengthUnalignedWithCrcSize() { - CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); + CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); assertThrows(IllegalArgumentException.class, - () -> digester.update(crcBytesByChunk, 0, 6, chunkSize), + () -> digester.update(crcBytesByChunk, 0, 6, CHUNK_SIZE), "length"); } } From b322a8f8f866544a5fbb24bb4a3dbdf3fb54be79 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sun, 26 Apr 2026 09:50:53 -0700 Subject: [PATCH 3/3] Address review comments. --- .../ozone/client/checksum/CrcComposer.java | 20 +++----- .../client/checksum/TestCrcComposer.java | 51 +++++++------------ .../ozone/client/checksum/TestCrcUtil.java | 18 ++++--- 3 files changed, 39 insertions(+), 50 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java index f4ece694aba6..ca00c61558b2 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/checksum/CrcComposer.java @@ -55,8 +55,7 @@ public class CrcComposer { * @param bytesPerCrcHint bytesPerCrcHint. * @return a CrcComposer which will collapse all ingested CRCs into a single value. */ - public static CrcComposer newCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint) { + public static CrcComposer newCrcComposer(DataChecksum.Type type, long bytesPerCrcHint) { return newStripedCrcComposer(type, bytesPerCrcHint, Long.MAX_VALUE); } @@ -76,12 +75,11 @@ public static CrcComposer newCrcComposer( * @return a CrcComposer which will collapse CRCs for every combined. * underlying data size which aligns with the specified stripe boundary. */ - public static CrcComposer newStripedCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { - int polynomial = CrcUtil.getCrcPolynomialForType(type); + public static CrcComposer newStripedCrcComposer(DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { + final int polynomial = CrcUtil.getCrcPolynomialForType(type); return new CrcComposer( polynomial, - org.apache.hadoop.ozone.client.checksum.CrcUtil.getMonomial(bytesPerCrcHint, polynomial), + CrcUtil.getMonomial(bytesPerCrcHint, polynomial), bytesPerCrcHint, stripeLength); } @@ -123,7 +121,7 @@ public void update(byte[] crcBuffer, int offset, int length, long bytesPerCrc) { } int limit = offset + length; while (offset < limit) { - int crcB = org.apache.hadoop.ozone.client.checksum.CrcUtil.readInt(crcBuffer, offset); + final int crcB = CrcUtil.readInt(crcBuffer, offset); update(crcB, bytesPerCrc); offset += CRC_SIZE_BYTES; } @@ -159,11 +157,9 @@ public void update(int crcB, long bytesPerCrc) { if (curCompositeCrc == 0) { curCompositeCrc = crcB; } else if (bytesPerCrc == bytesPerCrcHint) { - curCompositeCrc = org.apache.hadoop.ozone.client.checksum.CrcUtil.composeWithMonomial( - curCompositeCrc, crcB, precomputedMonomialForHint, crcPolynomial); + curCompositeCrc = CrcUtil.composeWithMonomial(curCompositeCrc, crcB, precomputedMonomialForHint, crcPolynomial); } else { - curCompositeCrc = org.apache.hadoop.ozone.client.checksum.CrcUtil.compose( - curCompositeCrc, crcB, bytesPerCrc, crcPolynomial); + curCompositeCrc = CrcUtil.compose(curCompositeCrc, crcB, bytesPerCrc, crcPolynomial); } curPositionInStripe += bytesPerCrc; @@ -176,7 +172,7 @@ public void update(int crcB, long bytesPerCrc) { } else if (curPositionInStripe == stripeLength) { // Hit a stripe boundary; flush the curCompositeCrc and reset for next // stripe. - digestOut.write(org.apache.hadoop.ozone.client.checksum.CrcUtil.intToBytes(curCompositeCrc), 0, CRC_SIZE_BYTES); + digestOut.write(CrcUtil.intToBytes(curCompositeCrc), 0, CRC_SIZE_BYTES); curCompositeCrc = 0; curPositionInStripe = 0; } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java index ea241a750af5..b00621f6d40c 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcComposer.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.client.checksum; +import static org.apache.hadoop.ozone.client.checksum.TestCrcUtil.assertContains; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -25,7 +26,6 @@ import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; -import java.util.Objects; import java.util.Random; import org.apache.hadoop.util.DataChecksum; import org.junit.jupiter.api.BeforeEach; @@ -42,8 +42,7 @@ public class TestCrcComposer { private final Random rand = new Random(1234); private final DataChecksum.Type type = DataChecksum.Type.CRC32C; - private final DataChecksum checksum = Objects.requireNonNull( - DataChecksum.newDataChecksum(type, Integer.MAX_VALUE)); + private final DataChecksum checksum = DataChecksum.newDataChecksum(type, Integer.MAX_VALUE); private final byte[] data = new byte[DATA_SIZE]; private int fullCrc; @@ -62,16 +61,14 @@ public void setup() { for (int i = 0; i < 7; ++i) { crcsByChunk[i] = getRangeChecksum(data, i * CHUNK_SIZE, CHUNK_SIZE); } - crcsByChunk[7] = getRangeChecksum( - data, (crcsByChunk.length - 1) * CHUNK_SIZE, DATA_SIZE % CHUNK_SIZE); + crcsByChunk[7] = getRangeChecksum(data, (crcsByChunk.length - 1) * CHUNK_SIZE, DATA_SIZE % CHUNK_SIZE); // 3 cells of size cellSize, 1 cell of size (dataSize % cellSize). int[] crcsByCell = new int[4]; for (int i = 0; i < 3; ++i) { crcsByCell[i] = getRangeChecksum(data, i * CELL_SIZE, CELL_SIZE); } - crcsByCell[3] = getRangeChecksum( - data, (crcsByCell.length - 1) * CELL_SIZE, DATA_SIZE % CELL_SIZE); + crcsByCell[3] = getRangeChecksum(data, (crcsByCell.length - 1) * CELL_SIZE, DATA_SIZE % CELL_SIZE); crcBytesByChunk = intArrayToByteArray(crcsByChunk); crcBytesByCell = intArrayToByteArray(crcsByCell); @@ -147,11 +144,9 @@ public void testUnstripedSingleCrcs() { @Test public void testStripedByteArray() { - CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); + CrcComposer digester = CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, CHUNK_SIZE); - digester.update( - crcBytesByChunk, crcBytesByChunk.length - 4, 4, DATA_SIZE % CHUNK_SIZE); + digester.update(crcBytesByChunk, crcBytesByChunk.length - 4, 4, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); assertArrayEquals(crcBytesByCell, digest); @@ -159,10 +154,8 @@ public void testStripedByteArray() { @Test public void testStripedDataInputStream() throws IOException { - CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); - DataInputStream input = - new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); + CrcComposer digester = CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); + DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); digester.update(input, crcsByChunk.length - 1, CHUNK_SIZE); digester.update(input, 1, DATA_SIZE % CHUNK_SIZE); @@ -172,8 +165,7 @@ public void testStripedDataInputStream() throws IOException { @Test public void testStripedSingleCrcs() { - CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); + CrcComposer digester = CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); for (int i = 0; i < crcsByChunk.length - 1; ++i) { digester.update(crcsByChunk[i], CHUNK_SIZE); } @@ -185,19 +177,16 @@ public void testStripedSingleCrcs() { @Test public void testMultiStageMixed() throws IOException { - CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); + CrcComposer digester = CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); // First combine chunks into cells. - DataInputStream input = - new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); + DataInputStream input = new DataInputStream(new ByteArrayInputStream(crcBytesByChunk)); digester.update(input, crcsByChunk.length - 1, CHUNK_SIZE); digester.update(input, 1, DATA_SIZE % CHUNK_SIZE); byte[] digest = digester.digest(); // Second, individually combine cells into full crc. - digester = - CrcComposer.newCrcComposer(type, CELL_SIZE); + digester = CrcComposer.newCrcComposer(type, CELL_SIZE); for (int i = 0; i < digest.length - 4; i += 4) { int cellCrc = CrcUtil.readInt(digest, i); digester.update(cellCrc, CELL_SIZE); @@ -211,25 +200,23 @@ public void testMultiStageMixed() throws IOException { @Test public void testUpdateMismatchesStripe() { - CrcComposer digester = - CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); - + CrcComposer digester = CrcComposer.newStripedCrcComposer(type, CHUNK_SIZE, CELL_SIZE); digester.update(crcsByChunk[0], CHUNK_SIZE); // Going from chunkSize to chunkSize + cellSize will cross a cellSize // boundary in a single CRC, which is not allowed, since we'd lack a // CRC corresponding to the actual cellSize boundary. - assertThrows(IllegalStateException.class, - () -> digester.update(crcsByChunk[1], CELL_SIZE), - "stripe"); + final IllegalStateException e = assertThrows(IllegalStateException.class, + () -> digester.update(crcsByChunk[1], CELL_SIZE)); + assertContains(e, "stripe"); } @Test public void testUpdateByteArrayLengthUnalignedWithCrcSize() { CrcComposer digester = CrcComposer.newCrcComposer(type, CHUNK_SIZE); - assertThrows(IllegalArgumentException.class, - () -> digester.update(crcBytesByChunk, 0, 6, CHUNK_SIZE), - "length"); + final IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> digester.update(crcBytesByChunk, 0, 6, CHUNK_SIZE)); + assertContains(e, "length"); } } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java index f6a47cdb95d7..3b4cc2e9016c 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/client/checksum/TestCrcUtil.java @@ -19,6 +19,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; @@ -26,6 +27,11 @@ /** Test {@link CrcUtil}. */ @Timeout(10) public class TestCrcUtil { + /** Assert that the given throwable contains the given message. */ + public static void assertContains(Throwable t, String message) { + assertTrue(t.getMessage().contains(message), () -> "Message \"" + message + "\" not found: " + t); + } + @Test public void testIntSerialization() { byte[] bytes = CrcUtil.intToBytes(0xCAFEBEEF); @@ -43,9 +49,9 @@ public void testIntSerialization() { @Test public void testToSingleCrcStringBadLength() { - assertThrows(IllegalArgumentException.class, - () -> CrcUtil.toSingleCrcString(new byte[8]), - "length"); + final IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> CrcUtil.toSingleCrcString(new byte[8])); + assertContains(e, "length"); } @Test @@ -56,9 +62,9 @@ public void testToSingleCrcString() { @Test public void testToMultiCrcStringBadLength() { - assertThrows(IllegalArgumentException.class, - () -> CrcUtil.toMultiCrcString(new byte[6]), - "length"); + final IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> CrcUtil.toMultiCrcString(new byte[6])); + assertContains(e, "length"); } @Test