Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* Benchmarks for the ASCII class.
*
* @author Kevin Bourrillion
* @author François Martin
*/
public class AsciiBenchmark {
private static final String ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
Expand Down Expand Up @@ -169,4 +170,30 @@ static String charSequenceToUpperCase(CharSequence chars) {
}
return String.valueOf(newChars);
}

@Benchmark
int indexOfIgnoreCaseInAdvance(int reps) {
int halfTestStringLength = testString.length() / 2;
String lhs = testString;
String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength);

int dummy = -1;
for (int i = 0; i < reps; i++) {
dummy ^= Ascii.toLowerCase(lhs).indexOf(Ascii.toLowerCase(rhs));
}
return dummy;
}

@Benchmark
int indexOfIgnoreCaseAscii(int reps) {
int halfTestStringLength = testString.length() / 2;
String lhs = testString;
String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength);

int dummy = -1;
for (int i = 0; i < reps; i++) {
dummy ^= Ascii.indexOfIgnoreCase(lhs, rhs);
}
return dummy;
}
}
139 changes: 139 additions & 0 deletions guava-tests/test/com/google/common/base/AsciiTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Unit test for {@link Ascii}.
*
* @author Craig Berry
* @author François Martin
*/
@GwtCompatible
public class AsciiTest extends TestCase {
Expand Down Expand Up @@ -151,4 +152,142 @@ public void testEqualsIgnoreCaseUnicodeEquivalence() {
assertFalse("pa\u00dfword".equalsIgnoreCase("PASSWORD")); // [*]
assertFalse(Ascii.equalsIgnoreCase("pa\u00dfword", "PASSWORD"));
}

public void testIndexOfIgnoreCase() {
assertEquals(0, Ascii.indexOfIgnoreCase("", ""));
assertEquals(-1, Ascii.indexOfIgnoreCase("", "x"));
assertEquals(0, Ascii.indexOfIgnoreCase("x", ""));
assertEquals(0, Ascii.indexOfIgnoreCase(LOWER, UPPER));
assertEquals(0, Ascii.indexOfIgnoreCase(UPPER, LOWER));
// Create new strings here to avoid early-out logic.
assertEquals(0, Ascii.indexOfIgnoreCase(new String(IGNORED), new String(IGNORED)));
// Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
assertEquals(-1, Ascii.indexOfIgnoreCase("@", "`"));
assertEquals(-1, Ascii.indexOfIgnoreCase("[", "{"));
// Test matched substrings
assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "a")); // first
assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "abc")); // beginning
assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bcd")); // end
assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bc")); // middle
assertEquals(-1, Ascii.indexOfIgnoreCase("abcd", "efgh")); // non-matching
assertEquals(3, Ascii.indexOfIgnoreCase("abcd", "d")); // last
// Test for case insensitivity
assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "A")); // first
assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "AbC")); // beginning
assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bCd")); // end
assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bC")); // middle
assertEquals(-1, Ascii.indexOfIgnoreCase("aBcD", "EFGH")); // non-matching
assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d")); // last
// Test with fromIndex < 0
assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d", -1));
}

public void testContainsIgnoreCase() {
assertTrue(Ascii.containsIgnoreCase("", ""));
assertFalse(Ascii.containsIgnoreCase("", "x"));
assertTrue(Ascii.containsIgnoreCase("x", ""));
assertTrue(Ascii.containsIgnoreCase(LOWER, UPPER));
assertTrue(Ascii.containsIgnoreCase(UPPER, LOWER));
// Create new strings here to avoid early-out logic.
assertTrue(Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED)));
assertTrue(
Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
);
assertFalse(
Ascii.containsIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
);
// Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
assertFalse(Ascii.containsIgnoreCase("@", "`"));
assertFalse(Ascii.containsIgnoreCase("[", "{"));
// Test matched substrings
assertTrue(Ascii.containsIgnoreCase("abcd", "abc")); // beginning
assertTrue(Ascii.containsIgnoreCase("abcd", "bcd")); // end
assertTrue(Ascii.containsIgnoreCase("abcd", "bc")); // middle
assertFalse(Ascii.containsIgnoreCase("abcd", "efgh")); // non-matching
// Test for case insensitivity
assertTrue(Ascii.containsIgnoreCase("aBcD", "AbC")); // beginning
assertTrue(Ascii.containsIgnoreCase("aBcD", "bCd")); // end
assertTrue(Ascii.containsIgnoreCase("aBcD", "bC")); // middle
assertFalse(Ascii.containsIgnoreCase("aBcD", "EFGH")); // non-matching
}

public void testStartsWithIgnoreCase() {
assertTrue(Ascii.startsWithIgnoreCase("", ""));
assertFalse(Ascii.startsWithIgnoreCase("", "x"));
assertTrue(Ascii.startsWithIgnoreCase("x", ""));
assertTrue(Ascii.startsWithIgnoreCase(LOWER, UPPER));
assertTrue(Ascii.startsWithIgnoreCase(UPPER, LOWER));
// Create new strings here to avoid early-out logic.
assertTrue(Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED)));
assertFalse(
Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
);
assertFalse(
Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
);
assertTrue(
Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(0,6))
);
assertFalse(
Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(0,6), new String(IGNORED))
);
// Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
assertFalse(Ascii.startsWithIgnoreCase("@", "`"));
assertFalse(Ascii.startsWithIgnoreCase("[", "{"));
// Test matched substrings
assertTrue(Ascii.startsWithIgnoreCase("abcd", "abc")); // beginning
assertFalse(Ascii.startsWithIgnoreCase("abcd", "bcd")); // end
assertFalse(Ascii.startsWithIgnoreCase("abcd", "bc")); // middle
assertFalse(Ascii.startsWithIgnoreCase("abcd", "efgh")); // non-matching
// Test for case insensitivity
assertTrue(Ascii.startsWithIgnoreCase("aBcD", "AbC")); // beginning
assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bCd")); // end
assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bC")); // middle
assertFalse(Ascii.startsWithIgnoreCase("aBcD", "EFGH")); // non-matching
// Test with different indices
assertTrue(Ascii.startsWithIgnoreCase("aaa", "a", 1));
assertTrue(Ascii.startsWithIgnoreCase("baa", "a", 1));
assertTrue(Ascii.startsWithIgnoreCase("bba", "a", 2));
}

public void testEndsWithIgnoreCase() {
assertTrue(Ascii.endsWithIgnoreCase("", ""));
assertFalse(Ascii.endsWithIgnoreCase("", "x"));
assertTrue(Ascii.endsWithIgnoreCase("x", ""));
assertTrue(Ascii.endsWithIgnoreCase(LOWER, UPPER));
assertTrue(Ascii.endsWithIgnoreCase(UPPER, LOWER));
// Create new strings here to avoid early-out logic.
assertTrue(Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED)));
assertFalse(
Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
);
assertFalse(
Ascii.endsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
);
assertTrue(
Ascii.endsWithIgnoreCase(
new String(IGNORED), new String(IGNORED).subSequence(3,IGNORED.length())
)
);
assertFalse(
Ascii.endsWithIgnoreCase(
new String(IGNORED).subSequence(3,IGNORED.length()), new String(IGNORED)
)
);
// Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
assertFalse(Ascii.endsWithIgnoreCase("@", "`"));
assertFalse(Ascii.endsWithIgnoreCase("[", "{"));
// Test matched substrings
assertFalse(Ascii.endsWithIgnoreCase("abcd", "abc")); // beginning
assertTrue(Ascii.endsWithIgnoreCase("abcd", "bcd")); // end
assertFalse(Ascii.endsWithIgnoreCase("abcd", "bc")); // middle
assertFalse(Ascii.endsWithIgnoreCase("abcd", "efgh")); // non-matching
// Test for case insensitivity
assertFalse(Ascii.endsWithIgnoreCase("aBcD", "AbC")); // beginning
assertTrue(Ascii.endsWithIgnoreCase("aBcD", "bCd")); // end
assertFalse(Ascii.endsWithIgnoreCase("aBcD", "bC")); // middle
assertFalse(Ascii.endsWithIgnoreCase("aBcD", "EFGH")); // non-matching
// Test for multiple occurences
assertTrue(Ascii.endsWithIgnoreCase("aaa", "a"));
}
}
183 changes: 183 additions & 0 deletions guava/src/com/google/common/base/Ascii.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
*
* @author Craig Berry
* @author Gregory Kick
* @author François Martin
* @since 7.0
*/
@GwtCompatible
Expand Down Expand Up @@ -629,4 +630,186 @@ private static int getAlphaIndex(char c) {
// Fold upper-case ASCII to lower-case and make zero-indexed and unsigned (by casting to char).
return (char) ((c | 0x20) - 'a');
}

/**
* Searches through {@code source} to find {@code target}, ignoring the case of
* any ASCII alphabetic characters between {@code 'a'} and {@code 'z'}
* or {@code 'A'} and {@code 'Z'} inclusive.
*
* @param source the characters being searched.
* @param sourceOffset offset of the source string.
* @param sourceCount count of the source string.
* @param target the characters being searched for.
* @param targetOffset offset of the target string.
* @param targetCount count of the target string.
* @param fromIndex the index to begin searching from.
*/
private static int indexOfIgnoreCase(CharSequence source, int sourceOffset, int sourceCount,
CharSequence target, int targetOffset, int targetCount,
int fromIndex) {
if (fromIndex >= sourceCount) {
return (targetCount == 0 ? sourceCount : -1);
}
if (fromIndex < 0) {
fromIndex = 0;
}
if (targetCount == 0) {
return fromIndex;
}

char first = target.charAt(targetOffset);
int firstAlphaIndex = getAlphaIndex(first);
int max = sourceOffset + (sourceCount - targetCount);

for (int i = sourceOffset + fromIndex; i <= max; i++) {
/* Look for first character. */
while (i <= max) {
char sourceI = source.charAt(i);
if (sourceI == first) {
break;
}
int sourceIAlphaIndex = getAlphaIndex(sourceI);
if (sourceIAlphaIndex >= 26 || sourceIAlphaIndex != firstAlphaIndex) {
++i;
continue;
}
break;
}

/* Found first character, now look at the rest of v2 */
if (i <= max) {
int j = i + 1;
int end = j + targetCount - 1;
char sourceJ;
int sourceJAlphaIndex;
char targetK;
for (int k = targetOffset + 1; j < end; j++, k++) {
sourceJ = source.charAt(j);
targetK = target.charAt(k);
if (sourceJ == targetK) {
continue;
}
sourceJAlphaIndex = getAlphaIndex(sourceJ);
if (sourceJAlphaIndex < 26 && sourceJAlphaIndex == getAlphaIndex(targetK)) {
continue;
}
break;
}

if (j == end) {
/* Found whole string. */
return i - sourceOffset;
}
}
}
return -1;
}

/**
* Returns the index within the {@code sequence} of the first occurrence of {@code subSequence},
* starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters
* between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
*
* @param sequence the sequence to be searched in.
* @param subSequence the subsequence to search for.
* @param fromIndex the index from which to start the search.
* @return the index of the first occurrence of the {@code subSequence}, or {@code -1} if there is
* no such occurrence.
*
* @since NEXT
*/
public static int indexOfIgnoreCase(
CharSequence sequence, CharSequence subSequence, int fromIndex) {
return indexOfIgnoreCase(sequence, 0, sequence.length(),
subSequence, 0, subSequence.length(), fromIndex);
}

/**
* Returns the index within the {@code sequence} of the first occurrence of {@code subSequence},
* ignoring the case of any ASCII alphabetic characters
* between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
*
* @param sequence the sequence to be searched in.
* @param subSequence the subsequence to search for.
* @return the index of the first occurrence of the {@code subSequence},
* or {@code -1} if there is no such occurrence.
*
* @since NEXT
*/
public static int indexOfIgnoreCase(CharSequence sequence, CharSequence subSequence) {
return indexOfIgnoreCase(sequence, subSequence, 0);
}

/**
* Indicates whether the character sequence {@code sequence} contains the {@code subSequence},
* ignoring the case of any ASCII alphabetic characters between {@code 'a'} and {@code 'z'}
* or {@code 'A'} and {@code 'Z'} inclusive.
*
* @since NEXT
*/
public static boolean containsIgnoreCase(CharSequence sequence, CharSequence subSequence) {
// Calling length() is the null pointer check (so do it before we can exit early).
int length = sequence.length();
if (sequence == subSequence) {
return true;
}
// if subSequence is longer than sequence, it is impossible for sequence to contain subSequence
if (subSequence.length() > length) {
return false;
}
return indexOfIgnoreCase(sequence, subSequence) > -1;
}

/**
* Returns if the character sequence {@code seq} starts with the character sequence {@code prefix}
* starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters
* between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
*
* @since NEXT
*/
public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix, int fromIndex) {
int seqOffset = fromIndex;
int prefixOffset = 0;
int prefixCounter = prefix.length();
// Note: fromIndex might be near -1>>>1.
if ((fromIndex < 0) || (fromIndex > seq.length() - prefixCounter)) {
return false;
}
while (--prefixCounter >= 0) {
char charSeq = seq.charAt(seqOffset++);
char charPrefix = prefix.charAt(prefixOffset++);
if (charSeq == charPrefix) {
continue;
}
int seqAlphaIndex = getAlphaIndex(charSeq);
if (seqAlphaIndex < 26 && seqAlphaIndex == getAlphaIndex(charPrefix)) {
continue;
}
return false;
}
return true;
}

/**
* Returns if the character sequence {@code seq} starts with the character sequence {@code prefix}
* ignoring the case of any ASCII alphabetic characters
* between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
*
* @since NEXT
*/
public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix) {
return startsWithIgnoreCase(seq, prefix, 0);
}

/**
* Returns if the character sequence {@code seq} ends with the character sequence {@code suffix}
* ignoring the case of any ASCII alphabetic characters
* between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
*
* @since NEXT
*/
public static boolean endsWithIgnoreCase(CharSequence seq, CharSequence suffix) {
return startsWithIgnoreCase(seq, suffix, seq.length() - suffix.length());
}

}