diff --git a/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java b/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java index 6ed8006ee903..55f90d0c7d13 100644 --- a/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java +++ b/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java @@ -30,6 +30,7 @@ * Benchmarks for the ASCII class. * * @author Kevin Bourrillion + * @author François Martin */ public class AsciiBenchmark { private static final String ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; @@ -169,4 +170,30 @@ static String charSequenceToUpperCase(CharSequence chars) { } return String.valueOf(newChars); } + + @Benchmark + int indexOfIgnoreCaseInAdvance(int reps) { + int halfTestStringLength = testString.length() / 2; + String lhs = testString; + String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength); + + int dummy = -1; + for (int i = 0; i < reps; i++) { + dummy ^= Ascii.toLowerCase(lhs).indexOf(Ascii.toLowerCase(rhs)); + } + return dummy; + } + + @Benchmark + int indexOfIgnoreCaseAscii(int reps) { + int halfTestStringLength = testString.length() / 2; + String lhs = testString; + String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength); + + int dummy = -1; + for (int i = 0; i < reps; i++) { + dummy ^= Ascii.indexOfIgnoreCase(lhs, rhs); + } + return dummy; + } } diff --git a/guava-tests/test/com/google/common/base/AsciiTest.java b/guava-tests/test/com/google/common/base/AsciiTest.java index d3a1f8f6597e..c2b1d2d4d7ff 100644 --- a/guava-tests/test/com/google/common/base/AsciiTest.java +++ b/guava-tests/test/com/google/common/base/AsciiTest.java @@ -24,6 +24,7 @@ * Unit test for {@link Ascii}. * * @author Craig Berry + * @author François Martin */ @GwtCompatible public class AsciiTest extends TestCase { @@ -151,4 +152,142 @@ public void testEqualsIgnoreCaseUnicodeEquivalence() { assertFalse("pa\u00dfword".equalsIgnoreCase("PASSWORD")); // [*] assertFalse(Ascii.equalsIgnoreCase("pa\u00dfword", "PASSWORD")); } + + public void testIndexOfIgnoreCase() { + assertEquals(0, Ascii.indexOfIgnoreCase("", "")); + assertEquals(-1, Ascii.indexOfIgnoreCase("", "x")); + assertEquals(0, Ascii.indexOfIgnoreCase("x", "")); + assertEquals(0, Ascii.indexOfIgnoreCase(LOWER, UPPER)); + assertEquals(0, Ascii.indexOfIgnoreCase(UPPER, LOWER)); + // Create new strings here to avoid early-out logic. + assertEquals(0, Ascii.indexOfIgnoreCase(new String(IGNORED), new String(IGNORED))); + // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1) + assertEquals(-1, Ascii.indexOfIgnoreCase("@", "`")); + assertEquals(-1, Ascii.indexOfIgnoreCase("[", "{")); + // Test matched substrings + assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "a")); // first + assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "abc")); // beginning + assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bcd")); // end + assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bc")); // middle + assertEquals(-1, Ascii.indexOfIgnoreCase("abcd", "efgh")); // non-matching + assertEquals(3, Ascii.indexOfIgnoreCase("abcd", "d")); // last + // Test for case insensitivity + assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "A")); // first + assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "AbC")); // beginning + assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bCd")); // end + assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bC")); // middle + assertEquals(-1, Ascii.indexOfIgnoreCase("aBcD", "EFGH")); // non-matching + assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d")); // last + // Test with fromIndex < 0 + assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d", -1)); + } + + public void testContainsIgnoreCase() { + assertTrue(Ascii.containsIgnoreCase("", "")); + assertFalse(Ascii.containsIgnoreCase("", "x")); + assertTrue(Ascii.containsIgnoreCase("x", "")); + assertTrue(Ascii.containsIgnoreCase(LOWER, UPPER)); + assertTrue(Ascii.containsIgnoreCase(UPPER, LOWER)); + // Create new strings here to avoid early-out logic. + assertTrue(Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED))); + assertTrue( + Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6)) + ); + assertFalse( + Ascii.containsIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED)) + ); + // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1) + assertFalse(Ascii.containsIgnoreCase("@", "`")); + assertFalse(Ascii.containsIgnoreCase("[", "{")); + // Test matched substrings + assertTrue(Ascii.containsIgnoreCase("abcd", "abc")); // beginning + assertTrue(Ascii.containsIgnoreCase("abcd", "bcd")); // end + assertTrue(Ascii.containsIgnoreCase("abcd", "bc")); // middle + assertFalse(Ascii.containsIgnoreCase("abcd", "efgh")); // non-matching + // Test for case insensitivity + assertTrue(Ascii.containsIgnoreCase("aBcD", "AbC")); // beginning + assertTrue(Ascii.containsIgnoreCase("aBcD", "bCd")); // end + assertTrue(Ascii.containsIgnoreCase("aBcD", "bC")); // middle + assertFalse(Ascii.containsIgnoreCase("aBcD", "EFGH")); // non-matching + } + + public void testStartsWithIgnoreCase() { + assertTrue(Ascii.startsWithIgnoreCase("", "")); + assertFalse(Ascii.startsWithIgnoreCase("", "x")); + assertTrue(Ascii.startsWithIgnoreCase("x", "")); + assertTrue(Ascii.startsWithIgnoreCase(LOWER, UPPER)); + assertTrue(Ascii.startsWithIgnoreCase(UPPER, LOWER)); + // Create new strings here to avoid early-out logic. + assertTrue(Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED))); + assertFalse( + Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6)) + ); + assertFalse( + Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED)) + ); + assertTrue( + Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(0,6)) + ); + assertFalse( + Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(0,6), new String(IGNORED)) + ); + // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1) + assertFalse(Ascii.startsWithIgnoreCase("@", "`")); + assertFalse(Ascii.startsWithIgnoreCase("[", "{")); + // Test matched substrings + assertTrue(Ascii.startsWithIgnoreCase("abcd", "abc")); // beginning + assertFalse(Ascii.startsWithIgnoreCase("abcd", "bcd")); // end + assertFalse(Ascii.startsWithIgnoreCase("abcd", "bc")); // middle + assertFalse(Ascii.startsWithIgnoreCase("abcd", "efgh")); // non-matching + // Test for case insensitivity + assertTrue(Ascii.startsWithIgnoreCase("aBcD", "AbC")); // beginning + assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bCd")); // end + assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bC")); // middle + assertFalse(Ascii.startsWithIgnoreCase("aBcD", "EFGH")); // non-matching + // Test with different indices + assertTrue(Ascii.startsWithIgnoreCase("aaa", "a", 1)); + assertTrue(Ascii.startsWithIgnoreCase("baa", "a", 1)); + assertTrue(Ascii.startsWithIgnoreCase("bba", "a", 2)); + } + + public void testEndsWithIgnoreCase() { + assertTrue(Ascii.endsWithIgnoreCase("", "")); + assertFalse(Ascii.endsWithIgnoreCase("", "x")); + assertTrue(Ascii.endsWithIgnoreCase("x", "")); + assertTrue(Ascii.endsWithIgnoreCase(LOWER, UPPER)); + assertTrue(Ascii.endsWithIgnoreCase(UPPER, LOWER)); + // Create new strings here to avoid early-out logic. + assertTrue(Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED))); + assertFalse( + Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6)) + ); + assertFalse( + Ascii.endsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED)) + ); + assertTrue( + Ascii.endsWithIgnoreCase( + new String(IGNORED), new String(IGNORED).subSequence(3,IGNORED.length()) + ) + ); + assertFalse( + Ascii.endsWithIgnoreCase( + new String(IGNORED).subSequence(3,IGNORED.length()), new String(IGNORED) + ) + ); + // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1) + assertFalse(Ascii.endsWithIgnoreCase("@", "`")); + assertFalse(Ascii.endsWithIgnoreCase("[", "{")); + // Test matched substrings + assertFalse(Ascii.endsWithIgnoreCase("abcd", "abc")); // beginning + assertTrue(Ascii.endsWithIgnoreCase("abcd", "bcd")); // end + assertFalse(Ascii.endsWithIgnoreCase("abcd", "bc")); // middle + assertFalse(Ascii.endsWithIgnoreCase("abcd", "efgh")); // non-matching + // Test for case insensitivity + assertFalse(Ascii.endsWithIgnoreCase("aBcD", "AbC")); // beginning + assertTrue(Ascii.endsWithIgnoreCase("aBcD", "bCd")); // end + assertFalse(Ascii.endsWithIgnoreCase("aBcD", "bC")); // middle + assertFalse(Ascii.endsWithIgnoreCase("aBcD", "EFGH")); // non-matching + // Test for multiple occurences + assertTrue(Ascii.endsWithIgnoreCase("aaa", "a")); + } } diff --git a/guava/src/com/google/common/base/Ascii.java b/guava/src/com/google/common/base/Ascii.java index 564230eff9e3..a7d357e9ee92 100644 --- a/guava/src/com/google/common/base/Ascii.java +++ b/guava/src/com/google/common/base/Ascii.java @@ -34,6 +34,7 @@ * * @author Craig Berry * @author Gregory Kick + * @author François Martin * @since 7.0 */ @GwtCompatible @@ -629,4 +630,186 @@ private static int getAlphaIndex(char c) { // Fold upper-case ASCII to lower-case and make zero-indexed and unsigned (by casting to char). return (char) ((c | 0x20) - 'a'); } + + /** + * Searches through {@code source} to find {@code target}, ignoring the case of + * any ASCII alphabetic characters between {@code 'a'} and {@code 'z'} + * or {@code 'A'} and {@code 'Z'} inclusive. + * + * @param source the characters being searched. + * @param sourceOffset offset of the source string. + * @param sourceCount count of the source string. + * @param target the characters being searched for. + * @param targetOffset offset of the target string. + * @param targetCount count of the target string. + * @param fromIndex the index to begin searching from. + */ + private static int indexOfIgnoreCase(CharSequence source, int sourceOffset, int sourceCount, + CharSequence target, int targetOffset, int targetCount, + int fromIndex) { + if (fromIndex >= sourceCount) { + return (targetCount == 0 ? sourceCount : -1); + } + if (fromIndex < 0) { + fromIndex = 0; + } + if (targetCount == 0) { + return fromIndex; + } + + char first = target.charAt(targetOffset); + int firstAlphaIndex = getAlphaIndex(first); + int max = sourceOffset + (sourceCount - targetCount); + + for (int i = sourceOffset + fromIndex; i <= max; i++) { + /* Look for first character. */ + while (i <= max) { + char sourceI = source.charAt(i); + if (sourceI == first) { + break; + } + int sourceIAlphaIndex = getAlphaIndex(sourceI); + if (sourceIAlphaIndex >= 26 || sourceIAlphaIndex != firstAlphaIndex) { + ++i; + continue; + } + break; + } + + /* Found first character, now look at the rest of v2 */ + if (i <= max) { + int j = i + 1; + int end = j + targetCount - 1; + char sourceJ; + int sourceJAlphaIndex; + char targetK; + for (int k = targetOffset + 1; j < end; j++, k++) { + sourceJ = source.charAt(j); + targetK = target.charAt(k); + if (sourceJ == targetK) { + continue; + } + sourceJAlphaIndex = getAlphaIndex(sourceJ); + if (sourceJAlphaIndex < 26 && sourceJAlphaIndex == getAlphaIndex(targetK)) { + continue; + } + break; + } + + if (j == end) { + /* Found whole string. */ + return i - sourceOffset; + } + } + } + return -1; + } + + /** + * Returns the index within the {@code sequence} of the first occurrence of {@code subSequence}, + * starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters + * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive. + * + * @param sequence the sequence to be searched in. + * @param subSequence the subsequence to search for. + * @param fromIndex the index from which to start the search. + * @return the index of the first occurrence of the {@code subSequence}, or {@code -1} if there is + * no such occurrence. + * + * @since NEXT + */ + public static int indexOfIgnoreCase( + CharSequence sequence, CharSequence subSequence, int fromIndex) { + return indexOfIgnoreCase(sequence, 0, sequence.length(), + subSequence, 0, subSequence.length(), fromIndex); + } + + /** + * Returns the index within the {@code sequence} of the first occurrence of {@code subSequence}, + * ignoring the case of any ASCII alphabetic characters + * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive. + * + * @param sequence the sequence to be searched in. + * @param subSequence the subsequence to search for. + * @return the index of the first occurrence of the {@code subSequence}, + * or {@code -1} if there is no such occurrence. + * + * @since NEXT + */ + public static int indexOfIgnoreCase(CharSequence sequence, CharSequence subSequence) { + return indexOfIgnoreCase(sequence, subSequence, 0); + } + + /** + * Indicates whether the character sequence {@code sequence} contains the {@code subSequence}, + * ignoring the case of any ASCII alphabetic characters between {@code 'a'} and {@code 'z'} + * or {@code 'A'} and {@code 'Z'} inclusive. + * + * @since NEXT + */ + public static boolean containsIgnoreCase(CharSequence sequence, CharSequence subSequence) { + // Calling length() is the null pointer check (so do it before we can exit early). + int length = sequence.length(); + if (sequence == subSequence) { + return true; + } + // if subSequence is longer than sequence, it is impossible for sequence to contain subSequence + if (subSequence.length() > length) { + return false; + } + return indexOfIgnoreCase(sequence, subSequence) > -1; + } + + /** + * Returns if the character sequence {@code seq} starts with the character sequence {@code prefix} + * starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters + * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive. + * + * @since NEXT + */ + public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix, int fromIndex) { + int seqOffset = fromIndex; + int prefixOffset = 0; + int prefixCounter = prefix.length(); + // Note: fromIndex might be near -1>>>1. + if ((fromIndex < 0) || (fromIndex > seq.length() - prefixCounter)) { + return false; + } + while (--prefixCounter >= 0) { + char charSeq = seq.charAt(seqOffset++); + char charPrefix = prefix.charAt(prefixOffset++); + if (charSeq == charPrefix) { + continue; + } + int seqAlphaIndex = getAlphaIndex(charSeq); + if (seqAlphaIndex < 26 && seqAlphaIndex == getAlphaIndex(charPrefix)) { + continue; + } + return false; + } + return true; + } + + /** + * Returns if the character sequence {@code seq} starts with the character sequence {@code prefix} + * ignoring the case of any ASCII alphabetic characters + * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive. + * + * @since NEXT + */ + public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix) { + return startsWithIgnoreCase(seq, prefix, 0); + } + + /** + * Returns if the character sequence {@code seq} ends with the character sequence {@code suffix} + * ignoring the case of any ASCII alphabetic characters + * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive. + * + * @since NEXT + */ + public static boolean endsWithIgnoreCase(CharSequence seq, CharSequence suffix) { + return startsWithIgnoreCase(seq, suffix, seq.length() - suffix.length()); + } + }