Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@
<contributor>
<name>Adrian Ber</name>
</contributor>
<contributor>
<name>Mark Dacek</name>
</contributor>
</contributors>

<!-- Lang should depend on very little -->
Expand Down
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove.
<body>

<release version="3.6" date="2017-MM-DD" description="TBD">
<action issue="LANG-1300" type="fix" dev="chtompki" due-to="Mark Dacek">Clarify or improve behaviour of int-based indexOf methods in StringUtils</action>
<action issue="LANG-1299" type="add" dev="djones">Add method for converting string to an array of code points</action>
<action issue="LANG-1286" type="fix" dev="djones">RandomStringUtils random method can overflow and return characters outside of specified range</action>
<action issue="LANG-660" type="add" dev="djones">Add methods to insert arrays into arrays at an index</action>
Expand Down
97 changes: 87 additions & 10 deletions src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,42 @@ public static CharSequence subSequence(final CharSequence cs, final int start) {

//-----------------------------------------------------------------------
/**
* <p>Finds the first index in the {@code CharSequence} that matches the
* specified character.</p>
* Returns the index within <code>cs</code> of the first occurrence of the
* specified character, starting the search at the specified index.
* <p>
* If a character with value <code>searchChar</code> occurs in the
* character sequence represented by the <code>cs</code>
* object at an index no smaller than <code>start</code>, then
* the index of the first such occurrence is returned. For values
* of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
* this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
* </pre></blockquote>
* is true. In either case, if no such character occurs inm <code>cs</code>
* at or after position <code>start</code>, then
* <code>-1</code> is returned.
*
* <p>
* There is no restriction on the value of <code>start</code>. If it
* is negative, it has the same effect as if it were zero: the entire
* <code>CharSequence</code> may be searched. If it is greater than
* the length of <code>cs</code>, it has the same effect as if it were
* equal to the length of <code>cs</code>: <code>-1</code> is returned.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed, not null
* @param searchChar the char to be searched for
* @param start the start index, negative starts at the string start
* @return the index where the search char was found, -1 if not found
* @since 3.6 updated to behave more like <code>String</code>
*/
static int indexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
Expand All @@ -75,9 +104,22 @@ static int indexOf(final CharSequence cs, final int searchChar, int start) {
if (start < 0) {
start = 0;
}
for (int i = start; i < sz; i++) {
if (cs.charAt(i) == searchChar) {
return i;
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
for (int i = start; i < sz; i++) {
if (cs.charAt(i) == searchChar) {
return i;
}
}
}
//supplementary characters (LANG1300)
if (searchChar <= Character.MAX_CODE_POINT) {
char[] chars = Character.toChars(searchChar);
for (int i = start; i < sz - 1; i++) {
char high = cs.charAt(i);
char low = cs.charAt(i + 1);
if (high == chars[0] && low == chars[1]) {
return i;
}
}
}
return NOT_FOUND;
Expand Down Expand Up @@ -105,13 +147,30 @@ static int indexOf(final CharSequence cs, final CharSequence searchChar, final i
}

/**
* <p>Finds the last index in the {@code CharSequence} that matches the
* specified character.</p>
* Returns the index within <code>cs</code> of the last occurrence of
* the specified character, searching backward starting at the
* specified index. For values of <code>searchChar</code> in the range
* from 0 to 0xFFFF (inclusive), the index returned is the largest
* value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
* </pre></blockquote>
* is true. In either case, if no such character occurs in <code>cs</code>
* at or before position <code>start</code>, then <code>-1</code> is returned.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* @param cs the {@code CharSequence} to be processed
* @param searchChar the char to be searched for
* @param start the start index, negative returns -1, beyond length starts at end
* @return the index where the search char was found, -1 if not found
* @since 3.6 updated to behave more like <code>String</code>
*/
static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
if (cs instanceof String) {
Expand All @@ -124,9 +183,27 @@ static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
if (start >= sz) {
start = sz - 1;
}
for (int i = start; i >= 0; --i) {
if (cs.charAt(i) == searchChar) {
return i;
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
for (int i = start; i >= 0; --i) {
if (cs.charAt(i) == searchChar) {
return i;
}
}
}
//supplementary characters (LANG1300)
//NOTE - we must do a forward traversal for this to avoid duplicating code points
if (searchChar <= Character.MAX_CODE_POINT) {
char[] chars = Character.toChars(searchChar);
//make sure it's not the last index
if (start == sz - 1) {
return NOT_FOUND;
}
for (int i = start; i >= 0; i--) {
char high = cs.charAt(i);
char low = cs.charAt(i + 1);
if (chars[0] == high && chars[1] == low) {
return i;
}
}
}
return NOT_FOUND;
Expand Down
121 changes: 96 additions & 25 deletions src/main/java/org/apache/commons/lang3/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -1276,10 +1276,26 @@ public static boolean equalsAnyIgnoreCase(final CharSequence string, final CharS
// IndexOf
//-----------------------------------------------------------------------
/**
* <p>Finds the first index within a CharSequence, handling {@code null}.
* This method uses {@link String#indexOf(int, int)} if possible.</p>
*
* <p>A {@code null} or empty ("") CharSequence will return {@code INDEX_NOT_FOUND (-1)}.</p>
* Returns the index within <code>seq</code> of the first occurrence of
* the specified character. If a character with value
* <code>searchChar</code> occurs in the character sequence represented by
* <code>seq</code> <code>CharSequence</code> object, then the index (in Unicode
* code units) of the first such occurrence is returned. For
* values of <code>searchChar</code> in the range from 0 to 0xFFFF
* (inclusive), this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* this.charAt(<i>k</i>) == searchChar
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* this.codePointAt(<i>k</i>) == searchChar
* </pre></blockquote>
* is true. In either case, if no such character occurs in <code>seq</code>,
* then {@code INDEX_NOT_FOUND (-1)} is returned.
*
* <p>Furthermore, a {@code null} or empty ("") CharSequence will
* return {@code INDEX_NOT_FOUND (-1)}.</p>
*
* <pre>
* StringUtils.indexOf(null, *) = -1
Expand All @@ -1294,6 +1310,7 @@ public static boolean equalsAnyIgnoreCase(final CharSequence string, final CharS
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from indexOf(String, int) to indexOf(CharSequence, int)
* @since 3.6 Updated {@link CharSequenceUtils} call to behave more like <code>String</code>
*/
public static int indexOf(final CharSequence seq, final int searchChar) {
if (isEmpty(seq)) {
Expand All @@ -1303,13 +1320,39 @@ public static int indexOf(final CharSequence seq, final int searchChar) {
}

/**
* <p>Finds the first index within a CharSequence from a start position,
* handling {@code null}.
* This method uses {@link String#indexOf(int, int)} if possible.</p>
*
* <p>A {@code null} or empty ("") CharSequence will return {@code (INDEX_NOT_FOUND) -1}.
* A negative start position is treated as zero.
* A start position greater than the string length returns {@code -1}.</p>
* Returns the index within <code>seq</code> of the first occurrence of the
* specified character, starting the search at the specified index.
* <p>
* If a character with value <code>searchChar</code> occurs in the
* character sequence represented by the <code>seq</code> <code>CharSequence</code>
* object at an index no smaller than <code>startPos</code>, then
* the index of the first such occurrence is returned. For values
* of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
* this is the smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= startPos)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* smallest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= startPos)
* </pre></blockquote>
* is true. In either case, if no such character occurs in <code>seq</code>
* at or after position <code>startPos</code>, then
* <code>-1</code> is returned.
*
* <p>
* There is no restriction on the value of <code>startPos</code>. If it
* is negative, it has the same effect as if it were zero: this entire
* string may be searched. If it is greater than the length of this
* string, it has the same effect as if it were equal to the length of
* this string: {@code (INDEX_NOT_FOUND) -1} is returned. Furthermore, a
* {@code null} or empty ("") CharSequence will
* return {@code (INDEX_NOT_FOUND) -1}.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* <pre>
* StringUtils.indexOf(null, *, *) = -1
Expand All @@ -1327,6 +1370,7 @@ public static int indexOf(final CharSequence seq, final int searchChar) {
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from indexOf(String, int, int) to indexOf(CharSequence, int, int)
* @since 3.6 Updated {@link CharSequenceUtils} call to behave more like <code>String</code>
*/
public static int indexOf(final CharSequence seq, final int searchChar, final int startPos) {
if (isEmpty(seq)) {
Expand Down Expand Up @@ -1586,10 +1630,23 @@ public static int indexOfIgnoreCase(final CharSequence str, final CharSequence s
// LastIndexOf
//-----------------------------------------------------------------------
/**
* <p>Finds the last index within a CharSequence, handling {@code null}.
* This method uses {@link String#lastIndexOf(int)} if possible.</p>
*
* <p>A {@code null} or empty ("") CharSequence will return {@code -1}.</p>
* Returns the index within <code>seq</code> of the last occurrence of
* the specified character. For values of <code>searchChar</code> in the
* range from 0 to 0xFFFF (inclusive), the index (in Unicode code
* units) returned is the largest value <i>k</i> such that:
* <blockquote><pre>
* this.charAt(<i>k</i>) == searchChar
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* this.codePointAt(<i>k</i>) == searchChar
* </pre></blockquote>
* is true. In either case, if no such character occurs in this
* string, then <code>-1</code> is returned. Furthermore, a {@code null} or empty ("")
* <code>CharSequence</code> will return {@code -1}. The
* <code>seq</code> <code>CharSequence</code> object is searched backwards
* starting at the last character.
*
* <pre>
* StringUtils.lastIndexOf(null, *) = -1
Expand All @@ -1598,12 +1655,13 @@ public static int indexOfIgnoreCase(final CharSequence str, final CharSequence s
* StringUtils.lastIndexOf("aabaabaa", 'b') = 5
* </pre>
*
* @param seq the CharSequence to check, may be null
* @param seq the <code>CharSequence</code> to check, may be null
* @param searchChar the character to find
* @return the last index of the search character,
* -1 if no match or {@code null} string input
* @since 2.0
* @since 3.0 Changed signature from lastIndexOf(String, int) to lastIndexOf(CharSequence, int)
* @since 3.6 Updated {@link CharSequenceUtils} call to behave more like <code>String</code>
*/
public static int lastIndexOf(final CharSequence seq, final int searchChar) {
if (isEmpty(seq)) {
Expand All @@ -1613,16 +1671,29 @@ public static int lastIndexOf(final CharSequence seq, final int searchChar) {
}

/**
* <p>Finds the last index within a CharSequence from a start position,
* handling {@code null}.
* This method uses {@link String#lastIndexOf(int, int)} if possible.</p>
*
* <p>A {@code null} or empty ("") CharSequence will return {@code -1}.
* A negative start position returns {@code -1}.
* A start position greater than the string length searches the whole string.
* The search starts at the startPos and works backwards; matches starting after the start
* position are ignored.
* </p>
* Returns the index within <code>seq</code> of the last occurrence of
* the specified character, searching backward starting at the
* specified index. For values of <code>searchChar</code> in the range
* from 0 to 0xFFFF (inclusive), the index returned is the largest
* value <i>k</i> such that:
* <blockquote><pre>
* (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= startPos)
* </pre></blockquote>
* is true. For other values of <code>searchChar</code>, it is the
* largest value <i>k</i> such that:
* <blockquote><pre>
* (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= startPos)
* </pre></blockquote>
* is true. In either case, if no such character occurs in <code>seq</code>
* at or before position <code>startPos</code>, then
* <code>-1</code> is returned. Furthermore, a {@code null} or empty ("")
* <code>CharSequence</code> will return {@code -1}. A start position greater
* than the string length searches the whole string.
* The search starts at the <code>startPos</code> and works backwards;
* matches starting after the start position are ignored.
*
* <p>All indices are specified in <code>char</code> values
* (Unicode code units).
*
* <pre>
* StringUtils.lastIndexOf(null, *, *) = -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,28 @@ public void testIndexOf_charInt() {
assertEquals(2, StringUtils.indexOf("aabaabaa", 'b', -1));

assertEquals(5, StringUtils.indexOf(new StringBuilder("aabaabaa"), 'b', 3));

//LANG-1300 tests go here
final int CODE_POINT = 0x2070E;
StringBuilder builder = new StringBuilder();
builder.appendCodePoint(CODE_POINT);
assertEquals(0, StringUtils.indexOf(builder, CODE_POINT, 0));
assertEquals(0, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
builder.appendCodePoint(CODE_POINT);
assertEquals(2, StringUtils.indexOf(builder, CODE_POINT, 1));
assertEquals(2, StringUtils.indexOf(builder.toString(), CODE_POINT, 1));
//inner branch on the supplementary character block
char[] tmp = {(char) 55361};
builder = new StringBuilder();
builder.append(tmp);
assertEquals(-1, StringUtils.indexOf(builder, CODE_POINT, 0));
assertEquals(-1, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
builder.appendCodePoint(CODE_POINT);
assertEquals(1, StringUtils.indexOf(builder, CODE_POINT, 0));
assertEquals(1, StringUtils.indexOf(builder.toString(), CODE_POINT, 0));
assertEquals(-1, StringUtils.indexOf(builder, CODE_POINT, 2));
assertEquals(-1, StringUtils.indexOf(builder.toString(), CODE_POINT, 2));

}

@Test
Expand Down Expand Up @@ -525,6 +547,33 @@ public void testLastIndexOf_charInt() {
assertEquals(0, StringUtils.lastIndexOf("aabaabaa", 'a', 0));

assertEquals(2, StringUtils.lastIndexOf(new StringBuilder("aabaabaa"), 'b', 2));

//LANG-1300 addition test
final int CODE_POINT = 0x2070E;
StringBuilder builder = new StringBuilder();
builder.appendCodePoint(CODE_POINT);
assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
builder.appendCodePoint(CODE_POINT);
assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
assertEquals(0, StringUtils.lastIndexOf(builder, CODE_POINT, 1));
assertEquals(2, StringUtils.lastIndexOf(builder, CODE_POINT, 2));



builder.append("aaaaa");
assertEquals(2, StringUtils.lastIndexOf(builder, CODE_POINT, 4));
//inner branch on the supplementary character block
char[] tmp = {(char) 55361};
builder = new StringBuilder();
builder.append(tmp);
assertEquals(-1, StringUtils.lastIndexOf(builder, CODE_POINT, 0));
builder.appendCodePoint(CODE_POINT);
assertEquals(-1, StringUtils.lastIndexOf(builder, CODE_POINT, 0 ));
assertEquals(1, StringUtils.lastIndexOf(builder, CODE_POINT, 1 ));
assertEquals(-1, StringUtils.lastIndexOf(builder.toString(), CODE_POINT, 0));
assertEquals(1, StringUtils.lastIndexOf(builder.toString(), CODE_POINT, 1));


}

@Test
Expand Down