google · martinfrancois · Jan 3, 2018
diff --git a/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java b/guava-tests/benchmark/com/google/common/base/AsciiBenchmark.java
@@ -30,6 +30,7 @@
  * Benchmarks for the ASCII class.
  *
  * @author Kevin Bourrillion
+ * @author François Martin
  */
 public class AsciiBenchmark {
   private static final String ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
@@ -169,4 +170,30 @@ static String charSequenceToUpperCase(CharSequence chars) {
     }
     return String.valueOf(newChars);
   }
+
+  @Benchmark
+  int indexOfIgnoreCaseInAdvance(int reps) {
+    int halfTestStringLength = testString.length() / 2;
+    String lhs = testString;
+    String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength);
+
+    int dummy = -1;
+    for (int i = 0; i < reps; i++) {
+      dummy ^= Ascii.toLowerCase(lhs).indexOf(Ascii.toLowerCase(rhs));
+    }
+    return dummy;
+  }
+
+  @Benchmark
+  int indexOfIgnoreCaseAscii(int reps) {
+    int halfTestStringLength = testString.length() / 2;
+    String lhs = testString;
+    String rhs = testString.toUpperCase().substring(halfTestStringLength, halfTestStringLength);
+
+    int dummy = -1;
+    for (int i = 0; i < reps; i++) {
+      dummy ^= Ascii.indexOfIgnoreCase(lhs, rhs);
+    }
+    return dummy;
+  }
 }
diff --git a/guava-tests/test/com/google/common/base/AsciiTest.java b/guava-tests/test/com/google/common/base/AsciiTest.java
@@ -24,6 +24,7 @@
  * Unit test for {@link Ascii}.
  *
  * @author Craig Berry
+ * @author François Martin
  */
 @GwtCompatible
 public class AsciiTest extends TestCase {
@@ -151,4 +152,142 @@ public void testEqualsIgnoreCaseUnicodeEquivalence() {
     assertFalse("pa\u00dfword".equalsIgnoreCase("PASSWORD")); // [*]
     assertFalse(Ascii.equalsIgnoreCase("pa\u00dfword", "PASSWORD"));
   }
+
+  public void testIndexOfIgnoreCase() {
+    assertEquals(0, Ascii.indexOfIgnoreCase("", ""));
+    assertEquals(-1, Ascii.indexOfIgnoreCase("", "x"));
+    assertEquals(0, Ascii.indexOfIgnoreCase("x", ""));
+    assertEquals(0, Ascii.indexOfIgnoreCase(LOWER, UPPER));
+    assertEquals(0, Ascii.indexOfIgnoreCase(UPPER, LOWER));
+    // Create new strings here to avoid early-out logic.
+    assertEquals(0, Ascii.indexOfIgnoreCase(new String(IGNORED), new String(IGNORED)));
+    // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
+    assertEquals(-1, Ascii.indexOfIgnoreCase("@", "`"));
+    assertEquals(-1, Ascii.indexOfIgnoreCase("[", "{"));
+    // Test matched substrings
+    assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "a"));     // first
+    assertEquals(0, Ascii.indexOfIgnoreCase("abcd", "abc"));   // beginning
+    assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bcd"));   // end
+    assertEquals(1, Ascii.indexOfIgnoreCase("abcd", "bc"));    // middle
+    assertEquals(-1, Ascii.indexOfIgnoreCase("abcd", "efgh")); // non-matching
+    assertEquals(3, Ascii.indexOfIgnoreCase("abcd", "d"));     // last
+    // Test for case insensitivity
+    assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "A"));     // first
+    assertEquals(0, Ascii.indexOfIgnoreCase("aBcD", "AbC"));   // beginning
+    assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bCd"));   // end
+    assertEquals(1, Ascii.indexOfIgnoreCase("aBcD", "bC"));    // middle
+    assertEquals(-1, Ascii.indexOfIgnoreCase("aBcD", "EFGH")); // non-matching
+    assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d"));     // last
+    // Test with fromIndex < 0
+    assertEquals(3, Ascii.indexOfIgnoreCase("aBcD", "d", -1));
+  }
+
+  public void testContainsIgnoreCase() {
+    assertTrue(Ascii.containsIgnoreCase("", ""));
+    assertFalse(Ascii.containsIgnoreCase("", "x"));
+    assertTrue(Ascii.containsIgnoreCase("x", ""));
+    assertTrue(Ascii.containsIgnoreCase(LOWER, UPPER));
+    assertTrue(Ascii.containsIgnoreCase(UPPER, LOWER));
+    // Create new strings here to avoid early-out logic.
+    assertTrue(Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED)));
+    assertTrue(
+        Ascii.containsIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
+    );
+    assertFalse(
+        Ascii.containsIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
+    );
+    // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
+    assertFalse(Ascii.containsIgnoreCase("@", "`"));
+    assertFalse(Ascii.containsIgnoreCase("[", "{"));
+    // Test matched substrings
+    assertTrue(Ascii.containsIgnoreCase("abcd", "abc"));   // beginning
+    assertTrue(Ascii.containsIgnoreCase("abcd", "bcd"));   // end
+    assertTrue(Ascii.containsIgnoreCase("abcd", "bc"));    // middle
+    assertFalse(Ascii.containsIgnoreCase("abcd", "efgh")); // non-matching
+    // Test for case insensitivity
+    assertTrue(Ascii.containsIgnoreCase("aBcD", "AbC"));   // beginning
+    assertTrue(Ascii.containsIgnoreCase("aBcD", "bCd"));   // end
+    assertTrue(Ascii.containsIgnoreCase("aBcD", "bC"));    // middle
+    assertFalse(Ascii.containsIgnoreCase("aBcD", "EFGH")); // non-matching
+  }
+
+  public void testStartsWithIgnoreCase() {
+    assertTrue(Ascii.startsWithIgnoreCase("", ""));
+    assertFalse(Ascii.startsWithIgnoreCase("", "x"));
+    assertTrue(Ascii.startsWithIgnoreCase("x", ""));
+    assertTrue(Ascii.startsWithIgnoreCase(LOWER, UPPER));
+    assertTrue(Ascii.startsWithIgnoreCase(UPPER, LOWER));
+    // Create new strings here to avoid early-out logic.
+    assertTrue(Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED)));
+    assertFalse(
+        Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
+    );
+    assertFalse(
+        Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
+    );
+    assertTrue(
+        Ascii.startsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(0,6))
+    );
+    assertFalse(
+        Ascii.startsWithIgnoreCase(new String(IGNORED).subSequence(0,6), new String(IGNORED))
+    );
+    // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
+    assertFalse(Ascii.startsWithIgnoreCase("@", "`"));
+    assertFalse(Ascii.startsWithIgnoreCase("[", "{"));
+    // Test matched substrings
+    assertTrue(Ascii.startsWithIgnoreCase("abcd", "abc"));   // beginning
+    assertFalse(Ascii.startsWithIgnoreCase("abcd", "bcd"));  // end
+    assertFalse(Ascii.startsWithIgnoreCase("abcd", "bc"));   // middle
+    assertFalse(Ascii.startsWithIgnoreCase("abcd", "efgh")); // non-matching
+    // Test for case insensitivity
+    assertTrue(Ascii.startsWithIgnoreCase("aBcD", "AbC"));   // beginning
+    assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bCd"));  // end
+    assertFalse(Ascii.startsWithIgnoreCase("aBcD", "bC"));   // middle
+    assertFalse(Ascii.startsWithIgnoreCase("aBcD", "EFGH")); // non-matching
+    // Test with different indices
+    assertTrue(Ascii.startsWithIgnoreCase("aaa", "a", 1));
+    assertTrue(Ascii.startsWithIgnoreCase("baa", "a", 1));
+    assertTrue(Ascii.startsWithIgnoreCase("bba", "a", 2));
+  }
+
+  public void testEndsWithIgnoreCase() {
+    assertTrue(Ascii.endsWithIgnoreCase("", ""));
+    assertFalse(Ascii.endsWithIgnoreCase("", "x"));
+    assertTrue(Ascii.endsWithIgnoreCase("x", ""));
+    assertTrue(Ascii.endsWithIgnoreCase(LOWER, UPPER));
+    assertTrue(Ascii.endsWithIgnoreCase(UPPER, LOWER));
+    // Create new strings here to avoid early-out logic.
+    assertTrue(Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED)));
+    assertFalse(
+        Ascii.endsWithIgnoreCase(new String(IGNORED), new String(IGNORED).subSequence(3,6))
+    );
+    assertFalse(
+        Ascii.endsWithIgnoreCase(new String(IGNORED).subSequence(3,6), new String(IGNORED))
+    );
+    assertTrue(
+        Ascii.endsWithIgnoreCase(
+            new String(IGNORED), new String(IGNORED).subSequence(3,IGNORED.length())
+        )
+    );
+    assertFalse(
+        Ascii.endsWithIgnoreCase(
+            new String(IGNORED).subSequence(3,IGNORED.length()), new String(IGNORED)
+        )
+    );
+    // Test chars just outside the alphabetic range ('A'-1 vs 'a'-1, 'Z'+1 vs 'z'+1)
+    assertFalse(Ascii.endsWithIgnoreCase("@", "`"));
+    assertFalse(Ascii.endsWithIgnoreCase("[", "{"));
+    // Test matched substrings
+    assertFalse(Ascii.endsWithIgnoreCase("abcd", "abc"));  // beginning
+    assertTrue(Ascii.endsWithIgnoreCase("abcd", "bcd"));   // end
+    assertFalse(Ascii.endsWithIgnoreCase("abcd", "bc"));   // middle
+    assertFalse(Ascii.endsWithIgnoreCase("abcd", "efgh")); // non-matching
+    // Test for case insensitivity
+    assertFalse(Ascii.endsWithIgnoreCase("aBcD", "AbC"));  // beginning
+    assertTrue(Ascii.endsWithIgnoreCase("aBcD", "bCd"));   // end
+    assertFalse(Ascii.endsWithIgnoreCase("aBcD", "bC"));   // middle
+    assertFalse(Ascii.endsWithIgnoreCase("aBcD", "EFGH")); // non-matching
+    // Test for multiple occurences
+    assertTrue(Ascii.endsWithIgnoreCase("aaa", "a"));
+  }
 }
diff --git a/guava/src/com/google/common/base/Ascii.java b/guava/src/com/google/common/base/Ascii.java
@@ -34,6 +34,7 @@
  *
  * @author Craig Berry
  * @author Gregory Kick
+ * @author François Martin
  * @since 7.0
  */
 @GwtCompatible
@@ -629,4 +630,186 @@ private static int getAlphaIndex(char c) {
     // Fold upper-case ASCII to lower-case and make zero-indexed and unsigned (by casting to char).
     return (char) ((c | 0x20) - 'a');
   }
+
+  /**
+   * Searches through {@code source} to find {@code target}, ignoring the case of
+   * any ASCII alphabetic characters between {@code 'a'} and {@code 'z'}
+   * or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @param source       the characters being searched.
+   * @param sourceOffset offset of the source string.
+   * @param sourceCount  count of the source string.
+   * @param target       the characters being searched for.
+   * @param targetOffset offset of the target string.
+   * @param targetCount  count of the target string.
+   * @param fromIndex    the index to begin searching from.
+   */
+  private static int indexOfIgnoreCase(CharSequence source, int sourceOffset, int sourceCount,
+                                       CharSequence target, int targetOffset, int targetCount,
+                                       int fromIndex) {
+    if (fromIndex >= sourceCount) {
+      return (targetCount == 0 ? sourceCount : -1);
+    }
+    if (fromIndex < 0) {
+      fromIndex = 0;
+    }
+    if (targetCount == 0) {
+      return fromIndex;
+    }
+
+    char first = target.charAt(targetOffset);
+    int firstAlphaIndex = getAlphaIndex(first);
+    int max = sourceOffset + (sourceCount - targetCount);
+
+    for (int i = sourceOffset + fromIndex; i <= max; i++) {
+      /* Look for first character. */
+      while (i <= max) {
+        char sourceI = source.charAt(i);
+        if (sourceI == first) {
+          break;
+        }
+        int sourceIAlphaIndex = getAlphaIndex(sourceI);
+        if (sourceIAlphaIndex >= 26 || sourceIAlphaIndex != firstAlphaIndex) {
+          ++i;
+          continue;
+        }
+        break;
+      }
+
+      /* Found first character, now look at the rest of v2 */
+      if (i <= max) {
+        int j = i + 1;
+        int end = j + targetCount - 1;
+        char sourceJ;
+        int sourceJAlphaIndex;
+        char targetK;
+        for (int k = targetOffset + 1; j < end; j++, k++) {
+          sourceJ = source.charAt(j);
+          targetK = target.charAt(k);
+          if (sourceJ == targetK) {
+            continue;
+          }
+          sourceJAlphaIndex = getAlphaIndex(sourceJ);
+          if (sourceJAlphaIndex < 26 && sourceJAlphaIndex == getAlphaIndex(targetK)) {
+            continue;
+          }
+          break;
+        }
+
+        if (j == end) {
+          /* Found whole string. */
+          return i - sourceOffset;
+        }
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * Returns the index within the {@code sequence} of the first occurrence of {@code subSequence},
+   * starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters
+   * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @param sequence    the sequence to be searched in.
+   * @param subSequence the subsequence to search for.
+   * @param fromIndex the index from which to start the search.
+   * @return the index of the first occurrence of the {@code subSequence}, or {@code -1} if there is
+   *     no such occurrence.
+   *
+   * @since NEXT
+   */
+  public static int indexOfIgnoreCase(
+      CharSequence sequence, CharSequence subSequence, int fromIndex) {
+    return indexOfIgnoreCase(sequence, 0, sequence.length(),
+        subSequence, 0, subSequence.length(), fromIndex);
+  }
+
+  /**
+   * Returns the index within the {@code sequence} of the first occurrence of {@code subSequence},
+   * ignoring the case of any ASCII alphabetic characters
+   * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @param sequence the sequence to be searched in.
+   * @param subSequence the subsequence to search for.
+   * @return the index of the first occurrence of the {@code subSequence},
+   *     or {@code -1} if there is no such occurrence.
+   *
+   * @since NEXT
+   */
+  public static int indexOfIgnoreCase(CharSequence sequence, CharSequence subSequence) {
+    return indexOfIgnoreCase(sequence, subSequence, 0);
+  }
+
+  /**
+   * Indicates whether the character sequence {@code sequence} contains the {@code subSequence},
+   * ignoring the case of any ASCII alphabetic characters between {@code 'a'} and {@code 'z'}
+   * or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @since NEXT
+   */
+  public static boolean containsIgnoreCase(CharSequence sequence, CharSequence subSequence) {
+    // Calling length() is the null pointer check (so do it before we can exit early).
+    int length = sequence.length();
+    if (sequence == subSequence) {
+      return true;
+    }
+    // if subSequence is longer than sequence, it is impossible for sequence to contain subSequence
+    if (subSequence.length() > length) {
+      return false;
+    }
+    return indexOfIgnoreCase(sequence, subSequence) > -1;
+  }
+
+  /**
+   * Returns if the character sequence {@code seq} starts with the character sequence {@code prefix}
+   * starting at {@code fromIndex}, ignoring the case of any ASCII alphabetic characters
+   * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @since NEXT
+   */
+  public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix, int fromIndex) {
+    int seqOffset = fromIndex;
+    int prefixOffset = 0;
+    int prefixCounter = prefix.length();
+    // Note: fromIndex might be near -1>>>1.
+    if ((fromIndex < 0) || (fromIndex > seq.length() - prefixCounter)) {
+      return false;
+    }
+    while (--prefixCounter >= 0) {
+      char charSeq = seq.charAt(seqOffset++);
+      char charPrefix = prefix.charAt(prefixOffset++);
+      if (charSeq == charPrefix) {
+        continue;
+      }
+      int seqAlphaIndex = getAlphaIndex(charSeq);
+      if (seqAlphaIndex < 26 && seqAlphaIndex == getAlphaIndex(charPrefix)) {
+        continue;
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Returns if the character sequence {@code seq} starts with the character sequence {@code prefix}
+   * ignoring the case of any ASCII alphabetic characters
+   * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @since NEXT
+   */
+  public static boolean startsWithIgnoreCase(CharSequence seq, CharSequence prefix) {
+    return startsWithIgnoreCase(seq, prefix, 0);
+  }
+
+  /**
+   * Returns if the character sequence {@code seq} ends with the character sequence {@code suffix}
+   * ignoring the case of any ASCII alphabetic characters
+   * between {@code 'a'} and {@code 'z'} or {@code 'A'} and {@code 'Z'} inclusive.
+   *
+   * @since NEXT
+   */
+  public static boolean endsWithIgnoreCase(CharSequence seq, CharSequence suffix) {
+    return startsWithIgnoreCase(seq, suffix, seq.length() - suffix.length());
+  }
+
 }