diff --git a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java index 805ceef4927..00e94a024ff 100644 --- a/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java +++ b/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java @@ -110,26 +110,21 @@ public int translate(final CharSequence input, final int index, final Writer out if (input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index + 1) == '#') { int start = index + 2; boolean isHex = false; - final char firstChar = input.charAt(start); if (firstChar == 'x' || firstChar == 'X') { start++; isHex = true; - // Check there's more than just an x after the &# if (start == seqEnd) { return 0; } } - int end = start; // Note that this supports character codes without a ; on the end while (end < seqEnd && CharUtils.isHex(input.charAt(end))) { end++; } - final boolean semiNext = end != seqEnd && input.charAt(end) == ';'; - if (!semiNext) { if (isSet(OPTION.semiColonRequired)) { return 0; @@ -138,7 +133,6 @@ public int translate(final CharSequence input, final int index, final Writer out throw new IllegalArgumentException("Semi-colon required at end of numeric entity"); } } - final int entityValue; try { if (isHex) { @@ -149,7 +143,9 @@ public int translate(final CharSequence input, final int index, final Writer out } catch (final NumberFormatException nfe) { return 0; } - + if (entityValue < Character.MIN_CODE_POINT || entityValue > Character.MAX_CODE_POINT) { + return 0; + } if (entityValue > 0xFFFF) { final char[] chars = Character.toChars(entityValue); out.write(chars[0]); @@ -157,7 +153,6 @@ public int translate(final CharSequence input, final int index, final Writer out } else { out.write(entityValue); } - return 2 + end - start + (isHex ? 1 : 0) + (semiNext ? 1 : 0); } return 0; diff --git a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java index 22c5639c760..dab4c884a93 100644 --- a/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java +++ b/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityEscaperTest.java @@ -19,6 +19,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.StringWriter; + import org.apache.commons.lang3.AbstractLangTest; import org.junit.jupiter.api.Test; @@ -31,7 +33,6 @@ class NumericEntityEscaperTest extends AbstractLangTest { @Test void testAbove() { final NumericEntityEscaper nee = NumericEntityEscaper.above('F'); - final String input = "ADFGZ"; final String result = nee.translate(input); assertEquals("ADFGZ", result, "Failed to escape numeric entities via the above method"); @@ -40,7 +41,6 @@ void testAbove() { @Test void testBelow() { final NumericEntityEscaper nee = NumericEntityEscaper.below('F'); - final String input = "ADFGZ"; final String result = nee.translate(input); assertEquals("ADFGZ", result, "Failed to escape numeric entities via the below method"); @@ -49,7 +49,6 @@ void testBelow() { @Test void testBetween() { final NumericEntityEscaper nee = NumericEntityEscaper.between('F', 'L'); - final String input = "ADFGZ"; final String result = nee.translate(input); assertEquals("ADFGZ", result, "Failed to escape numeric entities via the between method"); @@ -61,10 +60,32 @@ void testSupplementary() { final NumericEntityEscaper nee = new NumericEntityEscaper(); final String input = "\uD803\uDC22"; final String expected = "𐰢"; - final String result = nee.translate(input); assertEquals(expected, result, "Failed to escape numeric entities supplementary characters"); + } + @Test + void testNumericEntityOverflow() throws Exception { + // cp = 1234567890 > Character.MAX_CODE_POINT (0x10FFFF = 1114111). + // Pre-patch: IAE escapes from Character.toChars. + // Post-patch: return 0, no write, no exception. + final NumericEntityUnescaper u = new NumericEntityUnescaper(); + final StringWriter sw = new StringWriter(); + int consumed = u.translate("�", 0, sw); + assertEquals(0, consumed); + assertEquals("", sw.toString()); + consumed = u.translate("---�---", 0, sw); + assertEquals(0, consumed); + assertEquals("", sw.toString()); } + @Test + void testValidCodePoint() throws Exception { + // Negative control: 'A' = 'A' must translate successfully. + final NumericEntityUnescaper u = new NumericEntityUnescaper(); + final StringWriter sw = new StringWriter(); + final int consumed = u.translate("A", 0, sw); + assertEquals("A", sw.toString()); + assertEquals(5, consumed); + } }