Finds the first index in the {@code CharSequence} that matches the - * specified character.
+ * Returns the index withincs of the first occurrence of the
+ * specified character, starting the search at the specified index.
+ *
+ * If a character with value searchChar occurs in the
+ * character sequence represented by the cs
+ * object at an index no smaller than start, then
+ * the index of the first such occurrence is returned. For values
+ * of searchChar in the range from 0 to 0xFFFF (inclusive),
+ * this is the smallest value k such that:
+ *
+ * is true. For other values of+ * (this.charAt(k) == searchChar) && (k >= start) + *
searchChar, it is the
+ * smallest value k such that:
+ * + * is true. In either case, if no such character occurs inm+ * (this.codePointAt(k) == searchChar) && (k >= start) + *
cs
+ * at or after position start, then
+ * -1 is returned.
+ *
+ *
+ * There is no restriction on the value of start. If it
+ * is negative, it has the same effect as if it were zero: the entire
+ * CharSequence may be searched. If it is greater than
+ * the length of cs, it has the same effect as if it were
+ * equal to the length of cs: -1 is returned.
+ *
+ *
All indices are specified in char values
+ * (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed, not null
* @param searchChar the char to be searched for
* @param start the start index, negative starts at the string start
* @return the index where the search char was found, -1 if not found
+ * @since 3.6 updated to behave more like String
*/
static int indexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
@@ -75,9 +104,22 @@ static int indexOf(final CharSequence cs, final int searchChar, int start) {
if (start < 0) {
start = 0;
}
- for (int i = start; i < sz; i++) {
- if (cs.charAt(i) == searchChar) {
- return i;
+ if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+ for (int i = start; i < sz; i++) {
+ if (cs.charAt(i) == searchChar) {
+ return i;
+ }
+ }
+ }
+ //supplementary characters (LANG1300)
+ if (searchChar <= Character.MAX_CODE_POINT) {
+ char[] chars = Character.toChars(searchChar);
+ for (int i = start; i < sz - 1; i++) {
+ char high = cs.charAt(i);
+ char low = cs.charAt(i + 1);
+ if (high == chars[0] && low == chars[1]) {
+ return i;
+ }
}
}
return NOT_FOUND;
@@ -105,13 +147,30 @@ static int indexOf(final CharSequence cs, final CharSequence searchChar, final i
}
/**
- *
Finds the last index in the {@code CharSequence} that matches the - * specified character.
+ * Returns the index withincs of the last occurrence of
+ * the specified character, searching backward starting at the
+ * specified index. For values of searchChar in the range
+ * from 0 to 0xFFFF (inclusive), the index returned is the largest
+ * value k such that:
+ * + * is true. For other values of+ * (this.charAt(k) == searchChar) && (k <= start) + *
searchChar, it is the
+ * largest value k such that:
+ * + * is true. In either case, if no such character occurs in+ * (this.codePointAt(k) == searchChar) && (k <= start) + *
cs
+ * at or before position start, then -1 is returned.
+ *
+ * All indices are specified in char values
+ * (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed
* @param searchChar the char to be searched for
* @param start the start index, negative returns -1, beyond length starts at end
* @return the index where the search char was found, -1 if not found
+ * @since 3.6 updated to behave more like String
*/
static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
@@ -124,9 +183,27 @@ static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
if (start >= sz) {
start = sz - 1;
}
- for (int i = start; i >= 0; --i) {
- if (cs.charAt(i) == searchChar) {
- return i;
+ if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+ for (int i = start; i >= 0; --i) {
+ if (cs.charAt(i) == searchChar) {
+ return i;
+ }
+ }
+ }
+ //supplementary characters (LANG1300)
+ //NOTE - we must do a forward traversal for this to avoid duplicating code points
+ if (searchChar <= Character.MAX_CODE_POINT) {
+ char[] chars = Character.toChars(searchChar);
+ //make sure it's not the last index
+ if (start == sz - 1) {
+ return NOT_FOUND;
+ }
+ for (int i = start; i >= 0; i--) {
+ char high = cs.charAt(i);
+ char low = cs.charAt(i + 1);
+ if (chars[0] == high && chars[1] == low) {
+ return i;
+ }
}
}
return NOT_FOUND;
diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java
index 2250595f5ab..dcb335dede7 100644
--- a/src/main/java/org/apache/commons/lang3/StringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/StringUtils.java
@@ -1276,10 +1276,26 @@ public static boolean equalsAnyIgnoreCase(final CharSequence string, final CharS
// IndexOf
//-----------------------------------------------------------------------
/**
- *
Finds the first index within a CharSequence, handling {@code null}. - * This method uses {@link String#indexOf(int, int)} if possible.
- * - *A {@code null} or empty ("") CharSequence will return {@code INDEX_NOT_FOUND (-1)}.
+ * Returns the index withinseq of the first occurrence of
+ * the specified character. If a character with value
+ * searchChar occurs in the character sequence represented by
+ * seq CharSequence object, then the index (in Unicode
+ * code units) of the first such occurrence is returned. For
+ * values of searchChar in the range from 0 to 0xFFFF
+ * (inclusive), this is the smallest value k such that:
+ * + * is true. For other values of+ * this.charAt(k) == searchChar + *
searchChar, it is the
+ * smallest value k such that:
+ * + * is true. In either case, if no such character occurs in+ * this.codePointAt(k) == searchChar + *
seq,
+ * then {@code INDEX_NOT_FOUND (-1)} is returned.
+ *
+ * Furthermore, a {@code null} or empty ("") CharSequence will + * return {@code INDEX_NOT_FOUND (-1)}.
* *
* StringUtils.indexOf(null, *) = -1
@@ -1294,6 +1310,7 @@ public static boolean equalsAnyIgnoreCase(final CharSequence string, final CharS
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from indexOf(String, int) to indexOf(CharSequence, int)
+ * @since 3.6 Updated {@link CharSequenceUtils} call to behave more like String
*/
public static int indexOf(final CharSequence seq, final int searchChar) {
if (isEmpty(seq)) {
@@ -1303,13 +1320,39 @@ public static int indexOf(final CharSequence seq, final int searchChar) {
}
/**
- * Finds the first index within a CharSequence from a start position,
- * handling {@code null}.
- * This method uses {@link String#indexOf(int, int)} if possible.
*
- * A {@code null} or empty ("") CharSequence will return {@code (INDEX_NOT_FOUND) -1}.
- * A negative start position is treated as zero.
- * A start position greater than the string length returns {@code -1}.
+ * Returns the index within seq of the first occurrence of the
+ * specified character, starting the search at the specified index.
+ *
+ * If a character with value searchChar occurs in the
+ * character sequence represented by the seq CharSequence
+ * object at an index no smaller than startPos, then
+ * the index of the first such occurrence is returned. For values
+ * of searchChar in the range from 0 to 0xFFFF (inclusive),
+ * this is the smallest value k such that:
+ *
+ * (this.charAt(k) == searchChar) && (k >= startPos)
+ *
+ * is true. For other values of searchChar, it is the
+ * smallest value k such that:
+ *
+ * (this.codePointAt(k) == searchChar) && (k >= startPos)
+ *
+ * is true. In either case, if no such character occurs in seq
+ * at or after position startPos, then
+ * -1 is returned.
+ *
+ *
+ * There is no restriction on the value of startPos. If it
+ * is negative, it has the same effect as if it were zero: this entire
+ * string may be searched. If it is greater than the length of this
+ * string, it has the same effect as if it were equal to the length of
+ * this string: {@code (INDEX_NOT_FOUND) -1} is returned. Furthermore, a
+ * {@code null} or empty ("") CharSequence will
+ * return {@code (INDEX_NOT_FOUND) -1}.
+ *
+ *
All indices are specified in char values
+ * (Unicode code units).
*
*
* StringUtils.indexOf(null, *, *) = -1
@@ -1327,6 +1370,7 @@ public static int indexOf(final CharSequence seq, final int searchChar) {
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from indexOf(String, int, int) to indexOf(CharSequence, int, int)
+ * @since 3.6 Updated {@link CharSequenceUtils} call to behave more like String
*/
public static int indexOf(final CharSequence seq, final int searchChar, final int startPos) {
if (isEmpty(seq)) {
@@ -1586,10 +1630,23 @@ public static int indexOfIgnoreCase(final CharSequence str, final CharSequence s
// LastIndexOf
//-----------------------------------------------------------------------
/**
- * Finds the last index within a CharSequence, handling {@code null}.
- * This method uses {@link String#lastIndexOf(int)} if possible.
- *
- * A {@code null} or empty ("") CharSequence will return {@code -1}.
+ * Returns the index within seq of the last occurrence of
+ * the specified character. For values of searchChar in the
+ * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
+ * units) returned is the largest value k such that:
+ *
+ * this.charAt(k) == searchChar
+ *
+ * is true. For other values of searchChar, it is the
+ * largest value k such that:
+ *
+ * this.codePointAt(k) == searchChar
+ *
+ * is true. In either case, if no such character occurs in this
+ * string, then -1 is returned. Furthermore, a {@code null} or empty ("")
+ * CharSequence will return {@code -1}. The
+ * seq CharSequence object is searched backwards
+ * starting at the last character.
*
*
* StringUtils.lastIndexOf(null, *) = -1
@@ -1598,12 +1655,13 @@ public static int indexOfIgnoreCase(final CharSequence str, final CharSequence s
* StringUtils.lastIndexOf("aabaabaa", 'b') = 5
*
*
- * @param seq the CharSequence to check, may be null
+ * @param seq the CharSequence to check, may be null
* @param searchChar the character to find
* @return the last index of the search character,
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from lastIndexOf(String, int) to lastIndexOf(CharSequence, int)
+ * @since 3.6 Updated {@link CharSequenceUtils} call to behave more like String
*/
public static int lastIndexOf(final CharSequence seq, final int searchChar) {
if (isEmpty(seq)) {
@@ -1613,16 +1671,29 @@ public static int lastIndexOf(final CharSequence seq, final int searchChar) {
}
/**
- * Finds the last index within a CharSequence from a start position,
- * handling {@code null}.
- * This method uses {@link String#lastIndexOf(int, int)} if possible.
- *
- * A {@code null} or empty ("") CharSequence will return {@code -1}.
- * A negative start position returns {@code -1}.
- * A start position greater than the string length searches the whole string.
- * The search starts at the startPos and works backwards; matches starting after the start
- * position are ignored.
- *
+ * Returns the index within seq of the last occurrence of
+ * the specified character, searching backward starting at the
+ * specified index. For values of searchChar in the range
+ * from 0 to 0xFFFF (inclusive), the index returned is the largest
+ * value k such that:
+ *
+ * (this.charAt(k) == searchChar) && (k <= startPos)
+ *
+ * is true. For other values of searchChar, it is the
+ * largest value k such that:
+ *
+ * (this.codePointAt(k) == searchChar) && (k <= startPos)
+ *
+ * is true. In either case, if no such character occurs in seq
+ * at or before position startPos, then
+ * -1 is returned. Furthermore, a {@code null} or empty ("")
+ * CharSequence will return {@code -1}. A start position greater
+ * than the string length searches the whole string.
+ * The search starts at the startPos and works backwards;
+ * matches starting after the start position are ignored.
+ *
+ * All indices are specified in char values
+ * (Unicode code units).
*
*
* StringUtils.lastIndexOf(null, *, *) = -1
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
index 9014bfa9ee1..4635a54aa23 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java
@@ -294,6 +294,28 @@ public void testIndexOf_charInt() {
assertEquals(2, StringUtils.indexOf("aabaabaa", 'b', -1));
assertEquals(5, StringUtils.indexOf(new StringBuilder("aabaabaa"), 'b', 3));
+
+ //LANG-1300 tests go here
+ final int CODE_POINT = 0x2070E;
+ StringBuilder builder = new StringBuilder();
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(0, StringUtils.indexOf(builder, CODE_POINT, 0));
+ assertEquals(0, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(2, StringUtils.indexOf(builder, CODE_POINT, 1));
+ assertEquals(2, StringUtils.indexOf(builder.toString(), CODE_POINT, 1));
+ //inner branch on the supplementary character block
+ char[] tmp = {(char) 55361};
+ builder = new StringBuilder();
+ builder.append(tmp);
+ assertEquals(-1, StringUtils.indexOf(builder, CODE_POINT, 0));
+ assertEquals(-1, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(1, StringUtils.indexOf(builder, CODE_POINT, 0));
+ assertEquals(1, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
+ assertEquals(-1, StringUtils.indexOf(builder, CODE_POINT, 2));
+ assertEquals(-1, StringUtils.indexOf(builder.toString(), CODE_POINT, 2));
+
}
@Test
@@ -525,6 +547,33 @@ public void testLastIndexOf_charInt() {
assertEquals(0, StringUtils.lastIndexOf("aabaabaa", 'a', 0));
assertEquals(2, StringUtils.lastIndexOf(new StringBuilder("aabaabaa"), 'b', 2));
+
+ //LANG-1300 addition test
+ final int CODE_POINT = 0x2070E;
+ StringBuilder builder = new StringBuilder();
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
+ assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 1));
+ assertEquals(2, StringUtils.lastIndexOf(builder, CODE_POINT, 2));
+
+
+
+ builder.append("aaaaa");
+ assertEquals(2, StringUtils.lastIndexOf(builder, CODE_POINT, 4));
+ //inner branch on the supplementary character block
+ char[] tmp = {(char) 55361};
+ builder = new StringBuilder();
+ builder.append(tmp);
+ assertEquals(-1, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
+ builder.appendCodePoint(CODE_POINT);
+ assertEquals(-1, StringUtils.lastIndexOf(builder, CODE_POINT, 0 ));
+ assertEquals(1, StringUtils.lastIndexOf(builder, CODE_POINT, 1 ));
+ assertEquals(-1, StringUtils.lastIndexOf(builder.toString(), CODE_POINT, 0));
+ assertEquals(1, StringUtils.lastIndexOf(builder.toString(), CODE_POINT, 1));
+
+
}
@Test