diff --git a/src/corefx/System.Globalization.Native/collation.cpp b/src/corefx/System.Globalization.Native/collation.cpp index 7cf32b9419c9..ca82a1bca616 100644 --- a/src/corefx/System.Globalization.Native/collation.cpp +++ b/src/corefx/System.Globalization.Native/collation.cpp @@ -125,6 +125,79 @@ extern "C" int32_t LastIndexOf( return result; } +/* +Static Function: +AreEqualOrdinalIgnoreCase +*/ +static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two) +{ + // Return whether the two characters are identical or would be identical if they were upper-cased. + + if (one == two) + { + return true; + } + + if (one == 0x0131 || two == 0x0131) + { + // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) + // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). + // We special case it to match the Windows invariant behavior. + return false; + } + + return u_toupper(one) == u_toupper(two); +} + +/* +Function: +IndexOfOrdinalIgnoreCase +*/ +extern "C" int32_t +IndexOfOrdinalIgnoreCase( + const UChar* lpTarget, int32_t cwTargetLength, + const UChar* lpSource, int32_t cwSourceLength, + int32_t findLast) +{ + int32_t result = -1; + + int32_t endIndex = cwSourceLength - cwTargetLength; + assert(endIndex >= 0); + + int32_t i = 0; + while (i <= endIndex) + { + int32_t srcIdx = i, trgIdx = 0; + const UChar *src = lpSource, *trg = lpTarget; + UChar32 srcCodepoint, trgCodepoint; + + bool match = true; + while (trgIdx < cwTargetLength) + { + U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint); + U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint); + if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint)) + { + match = false; + break; + } + } + + if (match) + { + result = i; + if (!findLast) + { + break; + } + } + + U16_FWD_1(lpSource, i, cwSourceLength); + } + + return result; +} + /* Return value is a "Win32 BOOL" (1 = true, 0 = false) */ diff --git a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs index b600fa59ef1c..c236c03cfa27 100644 --- a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs +++ b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs @@ -18,6 +18,9 @@ internal static partial class GlobalizationInterop [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options); + [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] + internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast); + [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] [return: MarshalAs(UnmanagedType.Bool)] internal unsafe static extern bool StartsWith(byte[] localeName, string target, string source, int cwSourceLength, CompareOptions options); diff --git a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs index ba658d0f5e4b..9a2a35c04a8b 100644 --- a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs +++ b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs @@ -20,7 +20,7 @@ internal unsafe CompareInfo(CultureInfo culture) m_sortNameAsUtf8 = System.Text.Encoding.UTF8.GetBytes(m_sortName); } - internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + internal static unsafe int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) { Contract.Assert(source != null); Contract.Assert(value != null); @@ -30,33 +30,41 @@ internal static int IndexOfOrdinal(string source, string value, int startIndex, return startIndex; } - // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring. + if (count < value.Length) + { + return -1; + } if (ignoreCase) { - source = source.ToUpper(CultureInfo.InvariantCulture); - value = value.ToUpper(CultureInfo.InvariantCulture); + fixed (char* pSource = source) + { + int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false); + return index != -1 ? + startIndex + index : + -1; + } } - source = source.Substring(startIndex, count); - - for (int i = 0; i + value.Length <= source.Length; i++) + int endIndex = startIndex + (count - value.Length); + for (int i = startIndex; i <= endIndex; i++) { - for (int j = 0; j < value.Length; j++) { - if (source[i + j] != value[j]) { - break; - } + int valueIndex, sourceIndex; + + for (valueIndex = 0, sourceIndex = i; + valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; + valueIndex++, sourceIndex++) ; - if (j == value.Length - 1) { - return i + startIndex; - } + if (valueIndex == value.Length) + { + return i; } } return -1; } - internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + internal static unsafe int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) { Contract.Assert(source != null); Contract.Assert(value != null); @@ -66,27 +74,41 @@ internal static int LastIndexOfOrdinal(string source, string value, int startInd return startIndex; } - // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring. + if (count < value.Length) + { + return -1; + } + + // startIndex is the index into source where we start search backwards from. + // leftStartIndex is the index into source of the start of the string that is + // count characters away from startIndex. + int leftStartIndex = startIndex - count + 1; if (ignoreCase) { - source = source.ToUpper(CultureInfo.InvariantCulture); - value = value.ToUpper(CultureInfo.InvariantCulture); + fixed (char* pSource = source) + { + int lastIndex = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true); + return lastIndex != -1 ? + leftStartIndex + lastIndex : + -1; + } } - source = source.Substring(startIndex - count + 1, count); + for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--) + { + int valueIndex, sourceIndex; - int last = -1; + for (valueIndex = 0, sourceIndex = i; + valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; + valueIndex++, sourceIndex++) ; - int cur = 0; - while ((cur = IndexOfOrdinal(source, value, last + 1, source.Length - last - 1, false)) != -1) - { - last = cur; + if (valueIndex == value.Length) { + return i; + } } - return last >= 0 ? - last + startIndex - count + 1 : - -1; + return -1; } private unsafe int GetHashCodeOfStringCore(string source, CompareOptions options) @@ -138,9 +160,9 @@ private unsafe int IndexOfCore(string source, string target, int startIndex, int fixed (char* pSource = source) { - int lastIndex = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options); + int index = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options); - return lastIndex != -1 ? lastIndex + startIndex : -1; + return index != -1 ? index + startIndex : -1; } }