Skip to content

Commit

Permalink
[CODEC-280] Added strict decoding property to BaseNCodec. (#35)
Browse files Browse the repository at this point in the history
* [CODEC-280] Added strict decoding property to BaseNCodec.

The default is lenient encoding. Any trailing bits that cannot be
interpreted as part of the encoding are discarded. Combinations of
trailing characters that cannot be created by a valid encoding are
partially interpreted.

If set to strict encoding the codec will raise an exception when
trailing bits are present that are not part of a valid encoding.

Added tests to show that in lenient mode re-encoding will create a
different byte array.

In strict mode the re-encoding should create the same byte array as the
original (with appropriate pad characters on the input).

* [CODEC-280] Added strict decoding property to BCodec.

* [CODEC-280] Removed unused import @ignore from BCodecTest
  • Loading branch information
aherbert authored and garydgregory committed Jan 28, 2020
1 parent f5a61f0 commit 3c21223
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 35 deletions.
1 change: 1 addition & 0 deletions src/changes/changes.xml
Expand Up @@ -44,6 +44,7 @@ The <action> type attribute can be add,update,fix,remove.

<release version="1.15" date="YYYY-MM-DD" description="Feature and fix release.">
<action issue="CODEC-264" dev="aherbert" due-to="Andy Seaborne" type="fix">MurmurHash3: Ensure hash128 maintains the sign extension bug.</action>
<action issue="CODEC-280" dev="aherbert" type="update">Base32/Base64/BCodec: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
</release>

<release version="1.14" date="2019-12-30" description="Feature and fix release.">
Expand Down
52 changes: 36 additions & 16 deletions src/main/java/org/apache/commons/codec/binary/Base32.java
Expand Up @@ -114,10 +114,6 @@ public class Base32 extends BaseNCodec {
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
};

/** Mask used to extract 7 bits, used when decoding final trailing character. */
private static final long MASK_7BITS = 0x7fL;
/** Mask used to extract 6 bits, used when decoding final trailing character. */
private static final long MASK_6BITS = 0x3fL;
/** Mask used to extract 5 bits, used when encoding Base32 bytes */
private static final int MASK_5BITS = 0x1f;
/** Mask used to extract 4 bits, used when decoding final trailing character. */
Expand Down Expand Up @@ -387,17 +383,26 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
// Two forms of EOF as far as Base32 decoder is concerned: actual
// EOF (-1) and first time '=' character is encountered in stream.
// This approach makes the '=' padding characters completely optional.
if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
final byte[] buffer = ensureBufferSize(decodeSize, context);

// we ignore partial bytes, i.e. only multiples of 8 count
// We ignore partial bytes, i.e. only multiples of 8 count.
// Any combination not part of a valid encoding is either partially decoded
// or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
// It is not possible to encode with 1, 3, 6 trailing characters.
// For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
// See the encode(byte[]) method EOF section.
switch (context.modulus) {
// case 0 : // impossible, as excluded above
case 1 : // 5 bits - either ignore entirely, or raise an exception
validateTrailingCharacters();
case 2 : // 10 bits, drop 2 and output one byte
validateCharacter(MASK_2BITS, context);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS);
break;
case 3 : // 15 bits, drop 7 and output 1 byte
validateCharacter(MASK_7BITS, context);
case 3 : // 15 bits, drop 7 and output 1 byte, or raise an exception
validateTrailingCharacters();
// Not possible from a valid encoding but decode anyway
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS);
break;
case 4 : // 20 bits = 2*8 + 4
Expand All @@ -406,21 +411,22 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 5 : // 25bits = 3*8 + 1
case 5 : // 25 bits = 3*8 + 1
validateCharacter(MASK_1BITS, context);
context.lbitWorkArea = context.lbitWorkArea >> 1;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 6 : // 30bits = 3*8 + 6
validateCharacter(MASK_6BITS, context);
case 6 : // 30 bits = 3*8 + 6, or raise an exception
validateTrailingCharacters();
// Not possible from a valid encoding but decode anyway
context.lbitWorkArea = context.lbitWorkArea >> 6;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
break;
case 7 : // 35 = 4*8 +3
case 7 : // 35 bits = 4*8 +3
validateCharacter(MASK_3BITS, context);
context.lbitWorkArea = context.lbitWorkArea >> 3;
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
Expand Down Expand Up @@ -559,6 +565,20 @@ public boolean isInAlphabet(final byte octet) {
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
}

/**
* Validates whether decoding allows final trailing characters that cannot be
* created during encoding.
*
* @throws IllegalArgumentException if strict decoding is enabled
*/
private void validateTrailingCharacters() {
if (isStrictDecoding()) {
throw new IllegalArgumentException(
"Strict decoding: Last encoded character(s) (before the paddings if any) are valid base 32 alphabet but not a possible encoding. " +
"Decoding requries either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
}
}

/**
* Validates whether decoding the final trailing character is possible in the context
* of the set of possible base 32 values.
Expand All @@ -571,12 +591,12 @@ public boolean isInAlphabet(final byte octet) {
*
* @throws IllegalArgumentException if the bits being checked contain any non-zero value
*/
private static void validateCharacter(final long emptyBitsMask, final Context context) {
private void validateCharacter(final long emptyBitsMask, final Context context) {
// Use the long bit work area
if ((context.lbitWorkArea & emptyBitsMask) != 0) {
if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
throw new IllegalArgumentException(
"Last encoded character (before the paddings if any) is a valid base 32 alphabet but not a possible value. " +
"Expected the discarded bits to be zero.");
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 32 alphabet but not a possible encoding. " +
"Expected the discarded bits from the character to be zero.");
}
}
}
26 changes: 20 additions & 6 deletions src/main/java/org/apache/commons/codec/binary/Base64.java
Expand Up @@ -470,8 +470,8 @@ void decode(final byte[] in, int inPos, final int inAvail, final Context context
// Output all whole multiples of 8 bits and ignore the rest
switch (context.modulus) {
// case 0 : // impossible, as excluded above
case 1 : // 6 bits - ignore entirely
// TODO not currently tested; perhaps it is impossible?
case 1 : // 6 bits - either ignore entirely, or raise an exception
validateTrailingCharacter();
break;
case 2 : // 12 bits = 8 + 4
validateCharacter(MASK_4BITS, context);
Expand Down Expand Up @@ -786,6 +786,20 @@ protected boolean isInAlphabet(final byte octet) {
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
}

/**
* Validates whether decoding allows an entire final trailing character that cannot be
* used for a complete byte.
*
* @throws IllegalArgumentException if strict decoding is enabled
*/
private void validateTrailingCharacter() {
if (isStrictDecoding()) {
throw new IllegalArgumentException(
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
"Decoding requries at least two trailing 6-bit characters to create bytes.");
}
}

/**
* Validates whether decoding the final trailing character is possible in the context
* of the set of possible base 64 values.
Expand All @@ -798,11 +812,11 @@ protected boolean isInAlphabet(final byte octet) {
*
* @throws IllegalArgumentException if the bits being checked contain any non-zero value
*/
private static void validateCharacter(final int emptyBitsMask, final Context context) {
if ((context.ibitWorkArea & emptyBitsMask) != 0) {
private void validateCharacter(final int emptyBitsMask, final Context context) {
if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
throw new IllegalArgumentException(
"Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible value. " +
"Expected the discarded bits to be zero.");
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
"Expected the discarded bits from the character to be zero.");
}
}
}
48 changes: 48 additions & 0 deletions src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
Expand Up @@ -23,6 +23,7 @@
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.binary.BaseNCodec.Context;

/**
* Abstract superclass for Base-N encoders and decoders.
Expand Down Expand Up @@ -190,6 +191,12 @@ public String toString() {
*/
private final int chunkSeparatorLength;

/**
* If true then decoding should throw an exception for impossible combinations of bits at the
* end of the byte input. The default is to decode as much of them as possible.
*/
private boolean strictDecoding;

/**
* Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
* If {@code chunkSeparatorLength} is zero, then chunking is disabled.
Expand Down Expand Up @@ -223,6 +230,47 @@ protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
this.pad = pad;
}

/**
* Sets the decoding behaviour when the input bytes contain leftover trailing bits that
* cannot be created by a valid encoding. These can be bits that are unused from the final
* character or entire characters. The default mode is lenient decoding. Set this to
* {@code true} to enable strict decoding.
* <ul>
* <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible.
* The remainder are discarded.
* <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits
* are not part of a valid encoding. Any unused bits from the final character must
* be zero. Impossible counts of entire final characters are not allowed.
* </ul>
*
* <p>When strict decoding is enabled it is expected that the decoded bytes will be re-encoded
* to a byte array that matches the original, i.e. no changes occur on the final
* character. This requires that the input bytes use the same padding and alphabet
* as the encoder.
*
* @param strictDecoding Set to true to enable strict decoding; otherwise use lenient decoding.
* @see #encode(byte[])
* @since 1.15
*/
public void setStrictDecoding(boolean strictDecoding) {
this.strictDecoding = strictDecoding;
}

/**
* Returns true if decoding behaviour is strict. Decoding will raise an
* {@link IllegalArgumentException} if trailing bits are not part of a valid encoding.
*
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
* into 8-bit bytes and discard the remainder.
*
* @return true if using strict decoding
* @see #setStrictDecoding(boolean)
* @since 1.15
*/
public boolean isStrictDecoding() {
return strictDecoding;
}

/**
* Returns true if this object has buffered data for reading.
*
Expand Down
44 changes: 43 additions & 1 deletion src/main/java/org/apache/commons/codec/net/BCodec.java
Expand Up @@ -48,6 +48,12 @@ public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder
*/
private final Charset charset;

/**
* If true then decoding should throw an exception for impossible combinations of bits at the
* end of the byte input. The default is to decode as much of them as possible.
*/
private boolean strictDecoding;

/**
* Default constructor.
*/
Expand Down Expand Up @@ -82,6 +88,40 @@ public BCodec(final String charsetName) {
this(Charset.forName(charsetName));
}

/**
* Sets the decoding behaviour when the input bytes contain leftover trailing bits that
* cannot be created by a valid Base64 encoding. This setting is transferred to the instance
* of {@link Base64} used to perform decoding.
*
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
* into 8-bit bytes and discard the remainder.
*
* <p>Set to true to enable strict decoding. Decoding will raise a
* {@link DecoderException} if trailing bits are not part of a valid Base64 encoding.
*
* @param strictDecoding Set to true to enable strict decoding; otherwise use lenient decoding.
* @see org.apache.commons.codec.binary.BaseNCodec#setStrictDecoding(boolean) BaseNCodec.setStrictDecoding(boolean)
* @since 1.15
*/
public void setStrictDecoding(boolean strictDecoding) {
this.strictDecoding = strictDecoding;
}

/**
* Returns true if decoding behaviour is strict. Decoding will raise a
* {@link DecoderException} if trailing bits are not part of a valid Base64 encoding.
*
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
* into 8-bit bytes and discard the remainder.
*
* @return true if using strict decoding
* @see #setStrictDecoding(boolean)
* @since 1.15
*/
public boolean isStrictDecoding() {
return strictDecoding;
}

@Override
protected String getEncoding() {
return "B";
Expand All @@ -100,7 +140,9 @@ protected byte[] doDecoding(final byte[] bytes) {
if (bytes == null) {
return null;
}
return Base64.decodeBase64(bytes);
final Base64 codec = new Base64();
codec.setStrictDecoding(strictDecoding);
return codec.decode(bytes);
}

/**
Expand Down
36 changes: 31 additions & 5 deletions src/test/java/org/apache/commons/codec/binary/Base32Test.java
Expand Up @@ -19,8 +19,10 @@
package org.apache.commons.codec.binary;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.nio.charset.Charset;
Expand Down Expand Up @@ -299,6 +301,7 @@ public void testBase32HexImpossibleSamples() {
}

private void testImpossibleCases(final Base32 codec, final String[] impossible_cases) {
codec.setStrictDecoding(true);
for (final String impossible : impossible_cases) {
try {
codec.decode(impossible);
Expand All @@ -309,6 +312,11 @@ private void testImpossibleCases(final Base32 codec, final String[] impossible_c
}
}

@Test
public void testBase32DecodingOfTrailing5Bits() {
assertBase32DecodingOfTrailingBits(5);
}

@Test
public void testBase32DecodingOfTrailing10Bits() {
assertBase32DecodingOfTrailingBits(10);
Expand Down Expand Up @@ -349,31 +357,49 @@ public void testBase32DecodingOfTrailing35Bits() {
*/
private static void assertBase32DecodingOfTrailingBits(final int nbits) {
final Base32 codec = new Base32();
// Create the encoded bytes. The first characters must be valid so fill with 'zero'.
final byte[] encoded = new byte[nbits / 5];
Arrays.fill(encoded, ENCODE_TABLE[0]);
// Requires strict decoding
codec.setStrictDecoding(true);
assertTrue(codec.isStrictDecoding());
// A lenient decoder should not re-encode to the same bytes
final Base32 defaultCodec = new Base32();
assertFalse(defaultCodec.isStrictDecoding());

// Create the encoded bytes. The first characters must be valid so fill with 'zero'
// then pad to the block size.
final int length = nbits / 5;
final byte[] encoded = new byte[8];
Arrays.fill(encoded, 0, length, ENCODE_TABLE[0]);
Arrays.fill(encoded, length, encoded.length, (byte) '=');
// Compute how many bits would be discarded from 8-bit bytes
final int discard = nbits % 8;
final int emptyBitsMask = (1 << discard) - 1;
// Special case when an impossible number of trailing characters
final boolean invalid = length == 1 || length == 3 || length == 6;
// Enumerate all 32 possible final characters in the last position
final int last = encoded.length - 1;
final int last = length - 1;
for (int i = 0; i < 32; i++) {
encoded[last] = ENCODE_TABLE[i];
// If the lower bits are set we expect an exception. This is not a valid
// final character.
if ((i & emptyBitsMask) != 0) {
if (invalid || (i & emptyBitsMask) != 0) {
try {
codec.decode(encoded);
fail("Final base-32 digit should not be allowed");
} catch (final IllegalArgumentException ex) {
// expected
}
// The default lenient mode should decode this
final byte[] decoded = defaultCodec.decode(encoded);
// Re-encoding should not match the original array as it was invalid
assertFalse(Arrays.equals(encoded, defaultCodec.encode(decoded)));
} else {
// Otherwise this should decode
final byte[] decoded = codec.decode(encoded);
// Compute the bits that were encoded. This should match the final decoded byte.
final int bitsEncoded = i >> discard;
assertEquals("Invalid decoding of last character", bitsEncoded, decoded[decoded.length - 1]);
// Re-encoding should match the original array (requires the same padding character)
assertArrayEquals(encoded, codec.encode(decoded));
}
}
}
Expand Down

0 comments on commit 3c21223

Please sign in to comment.