Skip to content

Commit

Permalink
BUG: Lucene.Net.Analysis.Common: Fixed classes that were originally u…
Browse files Browse the repository at this point in the history
…sing invariant culture to do so again. J2N's Character class default is to use the current culture, which had changed from the prior Character class that used invariant culture. Fixes TestICUFoldingFilter::TestRandomStrings().
  • Loading branch information
NightOwl888 committed Aug 2, 2020
1 parent 3c4cfa4 commit 9ce76e9
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using J2N;
using Lucene.Net.Util;
using System.Globalization;
using System.IO;

namespace Lucene.Net.Analysis.Core
Expand Down Expand Up @@ -73,11 +74,11 @@ public LowerCaseTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeF

/// <summary>
/// Converts char to lower case
/// <see cref="Character.ToLower(int)"/>.
/// <see cref="Character.ToLower(int, CultureInfo)"/> in the invariant culture.
/// </summary>
protected override int Normalize(int c)
{
return Character.ToLower(c);
return Character.ToLower(c, CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant culture to match Java
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
using System.Collections.Generic;
using System.Globalization;
using System.IO;

namespace Lucene.Net.Analysis.Miscellaneous
Expand Down Expand Up @@ -134,7 +135,7 @@ public BytesRef Get(char[] buffer, int bufferLen, FST.Arc<BytesRef> scratchArc,
while (bufUpto < bufferLen)
{
int codePoint = Character.CodePointAt(buffer, bufUpto, bufferLen);
if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null)
if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, scratchArc, scratchArc, fstReader) == null)
{
return null;
}
Expand Down Expand Up @@ -192,7 +193,7 @@ public virtual bool Add(string input, string output)
char[] buffer = charsSpare.Chars;
for (int i = 0; i < length;)
{
i += Character.ToChars(Character.ToLower(Character.CodePointAt(input, i)), buffer, i);
i += Character.ToChars(Character.ToLower(Character.CodePointAt(input, i), CultureInfo.InvariantCulture), buffer, i);
}
UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
}
Expand Down
10 changes: 6 additions & 4 deletions src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using Lucene.Net.Util.Fst;
using System;
using System.Diagnostics;
using System.Globalization;

namespace Lucene.Net.Analysis.Synonym
{
Expand Down Expand Up @@ -252,9 +253,10 @@ public virtual void Add(char[] output, int offset, int len, int endOffset, int p

/// <param name="input"> input tokenstream </param>
/// <param name="synonyms"> synonym map </param>
/// <param name="ignoreCase"> case-folds input for matching with <see cref="Character.ToLower(int)"/>.
/// Note, if you set this to true, its your responsibility to lowercase
/// the input entries when you create the <see cref="SynonymMap"/> </param>
/// <param name="ignoreCase"> case-folds input for matching with <see cref="Character.ToLower(int, CultureInfo)"/>
/// in using <see cref="CultureInfo.InvariantCulture"/>.
/// Note, if you set this to <c>true</c>, its your responsibility to lowercase
/// the input entries when you create the <see cref="SynonymMap"/>.</param>
public SynonymFilter(TokenStream input, SynonymMap synonyms, bool ignoreCase)
: base(input)
{
Expand Down Expand Up @@ -411,7 +413,7 @@ private void Parse()
while (bufUpto < bufferLen)
{
int codePoint = Character.CodePointAt(buffer, bufUpto, bufferLen);
if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null)
if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, scratchArc, scratchArc, fstReader) == null)
{
//System.out.println(" stop");
goto byTokenBreak;
Expand Down
12 changes: 6 additions & 6 deletions src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ private bool Equals(char[] text1, int offset, int length, char[] text2)
for (int i = 0; i < length;)
{
var codePointAt = charUtils.CodePointAt(text1, offset + i, limit);
if (Character.ToLower(codePointAt) != charUtils.CodePointAt(text2, i, text2.Length))
if (Character.ToLower(codePointAt, CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) // LUCENENET specific - need to use invariant culture to match Java
{
return false;
}
Expand Down Expand Up @@ -696,7 +696,7 @@ private bool Equals(ICharSequence text1, char[] text2)
for (int i = 0; i < length;)
{
int codePointAt = charUtils.CodePointAt(text1, i);
if (Character.ToLower(codePointAt) != charUtils.CodePointAt(text2, i, text2.Length))
if (Character.ToLower(codePointAt, CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) // LUCENENET specific - need to use invariant culture to match Java
{
return false;
}
Expand Down Expand Up @@ -728,7 +728,7 @@ private bool Equals(string text1, char[] text2)
for (int i = 0; i < length;)
{
int codePointAt = charUtils.CodePointAt(text1, i);
if (Character.ToLower(codePointAt) != charUtils.CodePointAt(text2, i, text2.Length))
if (Character.ToLower(codePointAt, CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) // LUCENENET specific - need to use invariant culture to match Java
{
return false;
}
Expand Down Expand Up @@ -811,7 +811,7 @@ private int GetHashCode(char[] text, int offset, int length)
for (int i = offset; i < stop;)
{
int codePointAt = charUtils.CodePointAt(text, i, stop);
code = code * 31 + Character.ToLower(codePointAt);
code = code * 31 + Character.ToLower(codePointAt, CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant culture to match Java
i += Character.CharCount(codePointAt);
}
}
Expand Down Expand Up @@ -839,7 +839,7 @@ private int GetHashCode(ICharSequence text)
for (int i = 0; i < length;)
{
int codePointAt = charUtils.CodePointAt(text, i);
code = code * 31 + Character.ToLower(codePointAt);
code = code * 31 + Character.ToLower(codePointAt, CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant culture to match Java
i += Character.CharCount(codePointAt);
}
}
Expand Down Expand Up @@ -867,7 +867,7 @@ private int GetHashCode(string text)
for (int i = 0; i < length;)
{
int codePointAt = charUtils.CodePointAt(text, i);
code = code * 31 + Character.ToLower(codePointAt);
code = code * 31 + Character.ToLower(codePointAt, CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant culture to match Java
i += Character.CharCount(codePointAt);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using Debug = Lucene.Net.Diagnostics.Debug; // LUCENENET NOTE: We cannot use System.Diagnostics.Debug because those calls will be optimized out of the release!
using RegExp = Lucene.Net.Util.Automaton.RegExp;
using Assert = Lucene.Net.TestFramework.Assert;
using System.Globalization;

namespace Lucene.Net.Analysis
{
Expand Down Expand Up @@ -290,7 +291,7 @@ protected virtual bool IsTokenChar(int c)

protected virtual int Normalize(int c)
{
return lowerCase ? Character.ToLower(c) : c;
return lowerCase ? Character.ToLower(c, CultureInfo.InvariantCulture) : c; // LUCENENET specific - need to use invariant culture to match Java
}

public override void Reset()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ public virtual void TestCopyCharArraySetBWCompat()
IList<string> stopwordsUpper = new List<string>();
foreach (string @string in stopwords)
{
stopwordsUpper.Add(@string.ToUpper());
stopwordsUpper.Add(@string.ToUpperInvariant());
}
setIngoreCase.addAll(TEST_STOP_WORDS);
setIngoreCase.Add(Convert.ToInt32(1));
Expand Down Expand Up @@ -472,7 +472,7 @@ public virtual void TestCopyCharArraySet()
IList<string> stopwordsUpper = new List<string>();
foreach (string @string in stopwords)
{
stopwordsUpper.Add(@string.ToUpper());
stopwordsUpper.Add(@string.ToUpperInvariant());
}
setIngoreCase.addAll(TEST_STOP_WORDS);
setIngoreCase.Add(Convert.ToInt32(1));
Expand Down Expand Up @@ -523,7 +523,7 @@ public virtual void TestCopyJDKSet()
IList<string> stopwordsUpper = new List<string>();
foreach (string @string in stopwords)
{
stopwordsUpper.Add(@string.ToUpper());
stopwordsUpper.Add(@string.ToUpperInvariant());
}
set.addAll(TEST_STOP_WORDS);

Expand Down

0 comments on commit 9ce76e9

Please sign in to comment.