Skip to content

Commit 1dfd91b

Browse files
Stop filtering the results if the key is the same.
* When the key is the same, common prefix results are not redundant + e.g. さかい → 堺 and 堺井 * Will fix some over filtered conversions. * Added more comments. #typingquality #codehealth PiperOrigin-RevId: 780327139
1 parent fe703f8 commit 1dfd91b

File tree

2 files changed

+33
-13
lines changed

2 files changed

+33
-13
lines changed

src/prediction/dictionary_prediction_aggregator.cc

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,30 @@ constexpr size_t kSuggestionMaxResultsSize = 256;
8585
constexpr size_t kPredictionMaxResultsSize = 100000;
8686

8787
// Returns true if the |target| may be redundant result.
88-
bool MaybeRedundant(const absl::string_view reference,
89-
const absl::string_view target) {
88+
bool MaybeRedundant(const Result &reference_result,
89+
const Result &target_result) {
90+
const absl::string_view reference = reference_result.value;
91+
const absl::string_view target = target_result.value;
92+
93+
// Same value means the result is redundant.
94+
if (reference == target) {
95+
return true;
96+
}
97+
98+
// If the key is the same, the target is not redundant as value is different.
99+
if (reference_result.key == target_result.key) {
100+
return false;
101+
}
102+
103+
// target is not an appended value of the reference.
90104
if (!target.starts_with(reference)) {
91105
return false;
92106
}
107+
108+
// If the suffix is Emoji or unknown script, the result is not redundant.
109+
// For example, if the reference is "東京", "東京🗼" is not redundant, but
110+
// "東京タワー" is redundant.
93111
const absl::string_view suffix = target.substr(reference.size());
94-
if (suffix.empty()) {
95-
return true;
96-
}
97112
const Util::ScriptType script_type = Util::GetScriptType(suffix);
98113
return (script_type != Util::EMOJI && script_type != Util::UNKNOWN_SCRIPT);
99114
}
@@ -949,17 +964,18 @@ void DictionaryPredictionAggregator::AggregateUnigramForMixedConversion(
949964

950965
// Traverse all remaining elements and check if each result is redundant.
951966
for (Iter iter = min_iter; iter != max_iter;) {
952-
// - We do not filter user dictionary word.
953-
const bool should_check_redundant =
954-
!(iter->candidate_attributes & converter::Candidate::USER_DICTIONARY);
955-
if (should_check_redundant &&
956-
MaybeRedundant(reference_result.value, iter->value)) {
957-
// Swap out the redundant result.
967+
// We do not filter user dictionary word.
968+
if (iter->candidate_attributes & converter::Candidate::USER_DICTIONARY) {
969+
++iter;
970+
continue;
971+
}
972+
// If the result is redundant, swap it out.
973+
if (MaybeRedundant(reference_result, *iter)) {
958974
--max_iter;
959975
std::iter_swap(iter, max_iter);
960-
} else {
961-
++iter;
976+
continue;
962977
}
978+
++iter;
963979
}
964980
}
965981

src/prediction/dictionary_prediction_aggregator_test.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,9 @@ TEST_F(DictionaryPredictionAggregatorTest, MobileUnigram) {
11591159
{"とうきょう!", "東京!", 1100, kPosId, kPosId, Token::NONE},
11601160
{"とうきょう!?", "東京!?", 1200, kPosId, kPosId, Token::NONE},
11611161
{"とうきょう", "東京❤", 1300, kPosId, kPosId, Token::NONE},
1162+
// "とうきょう → 東京宇" is not an actual word, but an emulation of
1163+
// "さかい → (堺, 堺井)" and "いずみ → (泉, 泉水)".
1164+
{"とうきょう", "東京宇", 1400, kPosId, kPosId, Token::NONE},
11621165
}});
11631166
}
11641167

@@ -1167,6 +1170,7 @@ TEST_F(DictionaryPredictionAggregatorTest, MobileUnigram) {
11671170
aggregator.AggregateUnigramForMixedConversion(convreq, &results);
11681171

11691172
EXPECT_TRUE(FindResultByValue(results, "東京"));
1173+
EXPECT_TRUE(FindResultByValue(results, "東京宇"));
11701174

11711175
int prefix_count = 0;
11721176
for (const auto &result : results) {

0 commit comments

Comments
 (0)