diff --git a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/PresentationFramework.csproj b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/PresentationFramework.csproj
index 2336c88b662..4398e142e22 100644
--- a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/PresentationFramework.csproj
+++ b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/PresentationFramework.csproj
@@ -1085,6 +1085,7 @@
+
diff --git a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/NLGSpellerInterop.cs b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/NLGSpellerInterop.cs
index 2b4ac76b2f6..87b26d83ec3 100644
--- a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/NLGSpellerInterop.cs
+++ b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/NLGSpellerInterop.cs
@@ -643,6 +643,12 @@ private void EnumerateSubSegments()
#region SpellerInteropBase.ISpellerSegment
+ ///
+ public string SourceString { get; }
+
+ ///
+ public string Text => SourceString?.Substring(TextRange.Start, TextRange.Length);
+
///
/// Returns a read-only list of sub-segments of this segment
///
diff --git a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/SpellerInteropBase.cs b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/SpellerInteropBase.cs
index 56f926de50c..7016a856d4f 100644
--- a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/SpellerInteropBase.cs
+++ b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/SpellerInteropBase.cs
@@ -35,6 +35,11 @@ internal interface ITextRange
///
internal interface ISpellerSegment
{
+ ///
+ /// Source String for which provides a position
+ ///
+ string SourceString { get; }
+
///
/// Identifies sub-words, if any.
///
@@ -45,6 +50,11 @@ internal interface ISpellerSegment
///
ITextRange TextRange { get; }
+ ///
+ /// Text represented by
+ ///
+ string Text { get; }
+
///
/// Queries the spell-checker to obtain suggestions for this segment
///
diff --git a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInterop.cs b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInterop.cs
index 0f1ff2d0e34..460aa317e4d 100644
--- a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInterop.cs
+++ b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInterop.cs
@@ -852,9 +852,9 @@ private SpellChecker CurrentSpellChecker
#endregion Private Fields
- #region Private Types
+ #region Internal Types
- private struct TextRange: SpellerInteropBase.ITextRange
+ internal readonly struct TextRange: SpellerInteropBase.ITextRange
{
public TextRange(MS.Internal.WindowsRuntime.Windows.Data.Text.TextSegment textSegment)
{
@@ -862,6 +862,17 @@ public TextRange(MS.Internal.WindowsRuntime.Windows.Data.Text.TextSegment textSe
_start = (int)textSegment.StartPosition;
}
+ public TextRange(int start, int length)
+ {
+ _start = start;
+ _length = length;
+ }
+
+ public TextRange(ITextRange textRange) :
+ this(textRange.Start, textRange.Length)
+ {
+ }
+
public static explicit operator TextRange(MS.Internal.WindowsRuntime.Windows.Data.Text.TextSegment textSegment)
{
return new TextRange(textSegment);
@@ -886,16 +897,18 @@ public int Length
}
[DebuggerDisplay("SubSegments.Count = {SubSegments.Count} TextRange = {TextRange.Start},{TextRange.Length}")]
- private class SpellerSegment: ISpellerSegment
+ internal class SpellerSegment: ISpellerSegment
{
#region Constructor
- public SpellerSegment(WordSegment segment, SpellChecker spellChecker, WinRTSpellerInterop owner)
+ public SpellerSegment(string sourceString, ITextRange textRange, SpellChecker spellChecker, WinRTSpellerInterop owner)
{
- _segment = segment;
_spellChecker = spellChecker;
_suggestions = null;
- _owner = owner;
+ Owner = owner;
+
+ SourceString = sourceString;
+ TextRange = textRange;
}
static SpellerSegment()
@@ -920,9 +933,9 @@ private void EnumerateSuggestions()
List spellingErrors = null;
- using (new SpellerCOMActionTraceLogger(_owner, SpellerCOMActionTraceLogger.Actions.ComprehensiveCheck))
+ using (new SpellerCOMActionTraceLogger(Owner, SpellerCOMActionTraceLogger.Actions.ComprehensiveCheck))
{
- spellingErrors = _spellChecker.ComprehensiveCheck(_segment.Text);
+ spellingErrors = Text != null ? _spellChecker.ComprehensiveCheck(Text) : null;
}
if (spellingErrors == null)
@@ -947,6 +960,16 @@ private void EnumerateSuggestions()
#region SpellerInteropBase.ISpellerSegment
+ ///
+ ///
+ ///
+ public string SourceString { get; }
+
+ ///
+ ///
+ ///
+ public string Text => SourceString?.Substring(TextRange.Start, TextRange.Length);
+
///
/// Returns a read-only list of sub-segments of this segment
/// WinRT word-segmenter doesn't really support sub-segments,
@@ -960,13 +983,7 @@ public IReadOnlyList SubSegments
}
}
- public ITextRange TextRange
- {
- get
- {
- return new TextRange(_segment.SourceTextSegment);
- }
- }
+ public ITextRange TextRange { get; }
public IReadOnlyList Suggestions
{
@@ -994,6 +1011,13 @@ public bool IsClean
}
}
+ ///
+ /// This field is used only to support TraceLogging telemetry
+ /// logged using . It
+ /// has no other functional use.
+ ///
+ internal WinRTSpellerInterop Owner { get; }
+
public void EnumSubSegments(EnumTextSegmentsCallback segmentCallback, object data)
{
bool result = true;
@@ -1008,7 +1032,6 @@ public void EnumSubSegments(EnumTextSegmentsCallback segmentCallback, object dat
#region Private Fields
- private WordSegment _segment;
SpellChecker _spellChecker;
private IReadOnlyList _suggestions;
@@ -1016,16 +1039,13 @@ public void EnumSubSegments(EnumTextSegmentsCallback segmentCallback, object dat
private static readonly IReadOnlyList _empty;
- ///
- /// This field is used only to support TraceLogging telemetry
- /// logged using . It
- /// has no other functional use.
- ///
- private WinRTSpellerInterop _owner;
-
#endregion Private Fields
}
+ #endregion Internal Types
+
+ #region Private Types
+
[DebuggerDisplay("Sentence = {_sentence}")]
private class SpellerSentence: ISpellerSentence
{
@@ -1046,14 +1066,7 @@ public IReadOnlyList Segments
{
if (_segments == null)
{
- List segments = new List();
-
- foreach (var wordSegment in _wordBreaker.GetTokens(_sentence))
- {
- segments.Add(new SpellerSegment(wordSegment, _spellChecker, _owner));
- }
-
- _segments = segments.AsReadOnly();
+ _segments = _wordBreaker.ComprehensiveGetTokens(_sentence, _spellChecker, _owner);
}
return _segments;
diff --git a/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInteropExtensions.cs b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInteropExtensions.cs
new file mode 100644
index 00000000000..37ea34c0d86
--- /dev/null
+++ b/src/Microsoft.DotNet.Wpf/src/PresentationFramework/System/Windows/Documents/WinRTSpellerInteropExtensions.cs
@@ -0,0 +1,191 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using MS.Internal.WindowsRuntime.Windows.Data.Text;
+using System.Collections.Generic;
+
+using System.Windows.Documents.MsSpellCheckLib;
+using System.Windows.Documents.Tracing;
+using static System.Windows.Documents.WinRTSpellerInterop;
+
+namespace System.Windows.Documents
+{
+ internal static class WinRTSpellerInteropExtensions
+ {
+ ///
+ /// Tokenizes using , and then identifies fixes-up
+ /// the tokens to account for any missed text "in-between" those tokens.
+ ///
+ /// Word-breaker instance
+ /// The text being tokenized
+ /// The spell-checker instance used to augment the tokenizing process
+ /// Calling instance
+ ///
+ ///
+ /// Windows.Data.Text.WordsSegmenter tends to drop punctuation characters like period ('.')
+ /// when tokenizing text. Though this behavior is compatible with a vast majority of text-processing
+ /// scenarios (like word-counting), it is not ideal for spell-checking.
+ ///
+ /// In this method, the following augmented heuristic is applied to update the token-list generated by
+ /// .
+ ///
+ /// - Identify if any text 'missingFragment' has been dropped by the
+ /// - If the token immediately preceding 'missingFragment', previousToken, has a spelling error, then attempt to
+ /// create new candiate tokens in the following order:
+ ///
+ /// previousToken + missingFragment[0..0]
+ /// previousToken + missingFragment[0..1]
+ /// previousToken + missingFragment[0..2]
+ /// ...
+ /// ...
+ /// previousToken + missingFragment[0..LEN-1], where LEN = LEN(missingFragment)
+ ///
+ /// - Select the first candidate token that is free of spelling errors, and replace 'previousToken' with it.
+ /// - For performance reasons, we choose a constant MAXLEN = 4 such that when LEN > MAXLEN, only MAXLEN
+ /// tokens are considered.
+ /// - MAXLEN = 4 is a somewhat arbitrary choice, though it seems more than sufficient to address common
+ /// problems this heuristic is intended to help with.
+ ///
+ /// - Typical word-breaking problems that have been observed empirically involve only one missed character,
+ /// for which MAXLEN=1 would be sufficient. MAXLEN=4 is chosen as a sufficiently-large tradeoff between
+ /// correctness and performance.
+ ///
+ /// - Also see https://github.com/dotnet/wpf/pull/2753#issuecomment-602120768 for a discussion related to this.
+ ///
+ public static IReadOnlyList ComprehensiveGetTokens(
+ this WordsSegmenter segmenter,
+ string text,
+ SpellChecker spellChecker,
+ WinRTSpellerInterop owner)
+ {
+ IReadOnlyList tokens = segmenter?.GetTokens(text) ?? Array.Empty();
+ if (tokens.Count == 0)
+ {
+ return Array.Empty();
+ }
+
+ var allTokens = new List();
+ int predictedNextTokenStartPosition = 0;
+
+ for (int i = 0; i < tokens.Count; i++)
+ {
+ int nextTokenStartPosition = (int)tokens[i].SourceTextSegment.StartPosition;
+ int nextTokenLength = (int)tokens[i].SourceTextSegment.Length;
+
+ if (spellChecker != null)
+ {
+ if (nextTokenStartPosition > predictedNextTokenStartPosition)
+ {
+ // There is a "gap" between the last recorded token and the current token.
+ // Identify the missing token and add it as a "supplementary word segment" - but only if the token
+ // turns out to be a substantial one (i.e., if the string is non-blank/non-empty).
+ var missingFragment =
+ new SpellerSegment(
+ text,
+ new WinRTSpellerInterop.TextRange(
+ predictedNextTokenStartPosition,
+ nextTokenStartPosition - predictedNextTokenStartPosition),
+ spellChecker,
+ owner);
+ if (allTokens.Count > 0)
+ {
+ var substToken = GetSpellCheckCleanSubstitutionToken(spellChecker, text, allTokens[allTokens.Count - 1], missingFragment);
+ if (substToken != null)
+ {
+ allTokens[allTokens.Count - 1] = new SpellerSegment(text, substToken.Value, spellChecker, owner);
+ }
+ }
+ }
+ }
+
+
+ allTokens.Add(
+ new SpellerSegment(
+ text,
+ new WinRTSpellerInterop.TextRange(
+ nextTokenStartPosition,
+ nextTokenLength),
+ spellChecker,
+ owner));
+ predictedNextTokenStartPosition = nextTokenStartPosition + nextTokenLength;
+ }
+
+ if (tokens.Count > 0 &&
+ spellChecker?.ComprehensiveCheck(tokens[tokens.Count - 1].Text)?.Count != 0 &&
+ predictedNextTokenStartPosition < text.Length)
+ {
+ // There is a token possibly missing at the end of the string
+ var missingFragment =
+ new SpellerSegment(
+ text,
+ new WinRTSpellerInterop.TextRange(
+ predictedNextTokenStartPosition,
+ text.Length - predictedNextTokenStartPosition),
+ spellChecker,
+ owner);
+
+ if (allTokens.Count > 0)
+ {
+ var substToken = GetSpellCheckCleanSubstitutionToken(spellChecker, text, allTokens[allTokens.Count - 1], missingFragment);
+ if (substToken != null)
+ {
+ allTokens[allTokens.Count - 1] = new SpellerSegment(text, substToken.Value, spellChecker, owner);
+ }
+ }
+ }
+
+ return allTokens.AsReadOnly();
+ }
+
+ ///
+ /// Checks through combinations of + substrings() and
+ /// returns the first spellcheck-clean result.
+ ///
+ /// Spell-checker
+ /// Overall document text within which the text-ranges are computed
+ /// Previous token immediately preceding
+ /// The missing-fragment identified immediately after
+ ///
+ ///
+ /// See note about MAXLEN in
+ /// which explains the rationale behind the value of the constant AlternateFormsMaximumCount.
+ ///
+ private static WinRTSpellerInterop.TextRange? GetSpellCheckCleanSubstitutionToken(
+ SpellChecker spellChecker,
+ string documentText,
+ SpellerSegment lastToken,
+ SpellerSegment missingFragment)
+ {
+ const int AlternateFormsMaximumCount = 4;
+
+ if (string.IsNullOrWhiteSpace(missingFragment?.Text) ||
+ string.IsNullOrWhiteSpace(lastToken?.Text) ||
+ string.IsNullOrWhiteSpace(documentText))
+ {
+ return null;
+ }
+
+ int altFormsCount = Math.Min(missingFragment.TextRange.Length, AlternateFormsMaximumCount);
+ var spellingErrors = spellChecker?.ComprehensiveCheck(lastToken.Text);
+ if (spellingErrors?.Count != 0)
+ {
+ // One of the substring-permutations of the missingFragment - when concatenated with 'lastToken' - could be a viable
+ // replacement for 'lastToken'
+ for (int i = 1; i <= altFormsCount; i++)
+ {
+ var altForm = documentText.Substring(lastToken.TextRange.Start, lastToken.TextRange.Length + i).TrimEnd();
+ if (spellChecker?.ComprehensiveCheck(altForm)?.Count == 0)
+ {
+ // Use this altForm in place lastToken
+ return new WinRTSpellerInterop.TextRange(
+ lastToken.TextRange.Start,
+ altForm.Length);
+ }
+ }
+ }
+
+ return null;
+ }
+ }
+}