Skip to content

Commit

Permalink
[text-spacing-trim] Use a Trie for HanKerning::GetCharType
Browse files Browse the repository at this point in the history
Last a few rules of "Fullwidth Punctuation Collapsing"[1]
require characters in ASCII, such as U+0028[2]. This makes
optimizing `HanKerning::GetCharType` by early return more
difficult.

This patch moves the logic to the ICU Trie data structure,
built on the build time. The time needed should be almost the
same as getting one Unicode property.

This patch has no behavior changes.

[1] https://drafts.csswg.org/css-text-4/#fullwidth-collapsing
[2] https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Agc%3DPs%3A%5D%5B%3Agc%3DPe%3A%5D&g=&i=

Bug: 1463891
Change-Id: I3b43ce33942e617839d0f2a1a05103a5cf4bf6c0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4950310
Commit-Queue: Koji Ishii <kojii@chromium.org>
Auto-Submit: Koji Ishii <kojii@chromium.org>
Commit-Queue: Kent Tamura <tkent@chromium.org>
Reviewed-by: Kent Tamura <tkent@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1211379}
  • Loading branch information
kojiishi authored and Chromium LUCI CQ committed Oct 18, 2023
1 parent e7ba811 commit 86afd63
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 67 deletions.
4 changes: 4 additions & 0 deletions third_party/blink/renderer/platform/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ executable("character_data_generator") {
"text/character_property.h",
"text/character_property_data.h",
"text/character_property_data_generator.cc",
"text/han_kerning_char_type.h",
"wtf/text/character_names.h",
"wtf/text/wtf_uchar.h",
]
configs += [ "//third_party/blink/renderer:config" ]
deps = [
Expand Down Expand Up @@ -1401,6 +1404,7 @@ component("platform") {
"text/date_time_format.cc",
"text/date_time_format.h",
"text/decode_escape_sequences.h",
"text/han_kerning_char_type.h",
"text/hyphenation.cc",
"text/hyphenation.h",
"text/icu_error.cc",
Expand Down
60 changes: 14 additions & 46 deletions third_party/blink/renderer/platform/fonts/shaping/han_kerning.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,53 +89,21 @@ void HanKerning::ResetFeatures() {
// https://drafts.csswg.org/css-text-4/#text-spacing-classes
HanKerning::CharType HanKerning::GetCharType(UChar ch,
const FontData& font_data) {
if (ch < kLeftSingleQuotationMarkCharacter) {
return CharType::kOther;
const CharType type = Character::GetHanKerningCharType(ch);
switch (type) {
case CharType::kOther:
case CharType::kOpen:
case CharType::kClose:
case CharType::kMiddle:
return type;
case CharType::kDot:
return font_data.type_for_dot;
case CharType::kColon:
return font_data.type_for_colon;
case CharType::kSemicolon:
return font_data.type_for_semicolon;
}
if (ch <= kRightDoubleQuotationMarkCharacter) {
switch (ch) {
case kLeftSingleQuotationMarkCharacter: // U+2018
case kLeftDoubleQuotationMarkCharacter: // U+201C
return CharType::kOpen;
case kRightSingleQuotationMarkCharacter: // U+2019
case kRightDoubleQuotationMarkCharacter: // U+201D
return CharType::kClose;
}
return CharType::kOther;
}
if (ch < kIdeographicSpaceCharacter) {
return CharType::kOther;
}
if (Character::IsBlockCjkSymbolsAndPunctuation(ch) ||
Character::IsEastAsianWidthFullwidth(ch)) {
switch (ch) {
case kIdeographicSpaceCharacter: // U+3000
return CharType::kMiddle;
case kIdeographicCommaCharacter: // U+3001
case kIdeographicFullStopCharacter: // U+3002
case kFullwidthComma: // U+FF0C
case kFullwidthFullStop: // U+FF0E
return font_data.type_for_dot;
case kFullwidthColon: // U+FF1A
return font_data.type_for_colon;
case kFullwidthSemicolon: // U+FF1B
return font_data.type_for_semicolon;
}
const auto gc = static_cast<UCharCategory>(u_charType(ch));
switch (gc) {
case UCharCategory::U_START_PUNCTUATION:
return CharType::kOpen;
case UCharCategory::U_END_PUNCTUATION:
return CharType::kClose;
default:
return CharType::kOther;
}
}
switch (ch) {
case kKatakanaMiddleDot: // U+30FB
return CharType::kMiddle;
}
return CharType::kOther;
NOTREACHED_NORETURN();
}

bool HanKerning::IsOpen(UChar ch) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_FONTS_SHAPING_HAN_KERNING_H_

#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
#include "third_party/blink/renderer/platform/text/han_kerning_char_type.h"
#include "third_party/blink/renderer/platform/wtf/forward.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"

Expand Down Expand Up @@ -68,12 +69,7 @@ class PLATFORM_EXPORT HanKerning {
}
}

enum class CharType : uint8_t {
kOther,
kOpen,
kClose,
kMiddle,
};
using CharType = HanKerningCharType;

// Data retrieved from fonts for `HanKerning`.
struct PLATFORM_EXPORT FontData {
Expand Down
22 changes: 16 additions & 6 deletions third_party/blink/renderer/platform/text/character.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@

namespace blink {

static UCPTrie* CreateTrie() {
namespace {

UCPTrie* CreateTrie() {
// Create a Trie from the value array.
ICUError error;
UCPTrie* trie = ucptrie_openFromBinary(
Expand All @@ -55,33 +57,41 @@ static UCPTrie* CreateTrie() {
return trie;
}

static bool HasProperty(UChar32 c, CharacterProperty property) {
unsigned GetProperty(UChar32 c, CharacterProperty property) {
static const UCPTrie* trie = CreateTrie();
return UCPTRIE_FAST_GET(trie, UCPTRIE_16, c) &
static_cast<CharacterPropertyType>(property);
}

} // namespace

bool Character::IsUprightInMixedVertical(UChar32 character) {
return u_getIntPropertyValue(character,
UProperty::UCHAR_VERTICAL_ORIENTATION) !=
UVerticalOrientation::U_VO_ROTATED;
}

bool Character::IsCJKIdeographOrSymbolSlow(UChar32 c) {
return HasProperty(c, CharacterProperty::kIsCJKIdeographOrSymbol);
return GetProperty(c, CharacterProperty::kIsCJKIdeographOrSymbol);
}

bool Character::IsPotentialCustomElementNameChar(UChar32 character) {
return HasProperty(character,
return GetProperty(character,
CharacterProperty::kIsPotentialCustomElementNameChar);
}

bool Character::IsBidiControl(UChar32 character) {
return HasProperty(character, CharacterProperty::kIsBidiControl);
return GetProperty(character, CharacterProperty::kIsBidiControl);
}

bool Character::IsHangulSlow(UChar32 character) {
return HasProperty(character, CharacterProperty::kIsHangul);
return GetProperty(character, CharacterProperty::kIsHangul);
}

HanKerningCharType Character::GetHanKerningCharType(UChar32 character) {
return static_cast<HanKerningCharType>(
GetProperty(character, CharacterProperty::kHanKerningShiftedMask) >>
static_cast<unsigned>(CharacterProperty::kHanKerningShift));
}

unsigned Character::ExpansionOpportunityCount(
Expand Down
3 changes: 3 additions & 0 deletions third_party/blink/renderer/platform/text/character.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "base/containers/span.h"
#include "third_party/blink/renderer/platform/platform_export.h"
#include "third_party/blink/renderer/platform/text/character_property.h"
#include "third_party/blink/renderer/platform/text/han_kerning_char_type.h"
#include "third_party/blink/renderer/platform/text/text_direction.h"
#include "third_party/blink/renderer/platform/text/text_run.h"
#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
Expand Down Expand Up @@ -123,6 +124,8 @@ class PLATFORM_EXPORT Character {
// http://unicode.org/reports/tr9/#Directional_Formatting_Characters
static bool IsBidiControl(UChar32 character);

static HanKerningCharType GetHanKerningCharType(UChar32 character);

// Collapsible white space characters defined in CSS:
// https://drafts.csswg.org/css-text-3/#collapsible-white-space
static bool IsCollapsibleSpace(UChar c) {
Expand Down
10 changes: 10 additions & 0 deletions third_party/blink/renderer/platform/text/character_property.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,17 @@ enum class CharacterProperty : CharacterPropertyType {
kIsPotentialCustomElementNameChar = 1 << 1,
kIsBidiControl = 1 << 2,
kIsHangul = 1 << 3,

// Bits to store `HanKerningCharType`.
kHanKerningShift = 4,
kHanKerningSize = 3,
kHanKerningMask = ((1 << kHanKerningSize) - 1),
kHanKerningShiftedMask = kHanKerningMask << kHanKerningShift,

kNumBits = kHanKerningShift + kHanKerningSize,
};
static_assert(static_cast<unsigned>(CharacterProperty::kNumBits) <=
sizeof(CharacterPropertyType) * 8);

inline CharacterProperty operator|(CharacterProperty a, CharacterProperty b) {
return static_cast<CharacterProperty>(static_cast<CharacterPropertyType>(a) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <stdio.h>
#include <unicode/ucptrie.h>
#include <unicode/umutablecptrie.h>
#include <unicode/uniset.h>
#include <unicode/unistr.h>

#include <cassert>
#include <cstring>
Expand All @@ -15,6 +17,8 @@

#include "base/check_op.h"
#include "third_party/blink/renderer/platform/text/character_property.h"
#include "third_party/blink/renderer/platform/text/han_kerning_char_type.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"

namespace blink {
namespace {
Expand All @@ -34,21 +38,75 @@ class CharacterPropertyValues {
void Initialize() {
memset(values_.get(), 0, sizeof(CharacterProperty) * kSize);

#define SET(name) \
SetRanges(name##Ranges, std::size(name##Ranges), CharacterProperty::name); \
SetValues(name##Array, std::size(name##Array), CharacterProperty::name);
#define SET(name) \
SetForRanges(name##Ranges, std::size(name##Ranges), \
CharacterProperty::name); \
SetForValues(name##Array, std::size(name##Array), CharacterProperty::name);

SET(kIsCJKIdeographOrSymbol);
SET(kIsPotentialCustomElementNameChar);
SET(kIsBidiControl);
#undef SET
SetRanges(kIsHangulRanges, std::size(kIsHangulRanges),
CharacterProperty::kIsHangul);
SetForRanges(kIsHangulRanges, std::size(kIsHangulRanges),
CharacterProperty::kIsHangul);
SetHanKerning();
}

void SetRanges(const UChar32* ranges,
size_t length,
CharacterProperty value) {
void SetHanKerning() {
// https://drafts.csswg.org/css-text-4/#text-spacing-classes
Set(kLeftSingleQuotationMarkCharacter, HanKerningCharType::kOpen);
Set(kLeftDoubleQuotationMarkCharacter, HanKerningCharType::kOpen);
Set(kRightSingleQuotationMarkCharacter, HanKerningCharType::kClose);
Set(kRightDoubleQuotationMarkCharacter, HanKerningCharType::kClose);
Set(kIdeographicSpaceCharacter, HanKerningCharType::kMiddle);
Set(kIdeographicCommaCharacter, HanKerningCharType::kDot);
Set(kIdeographicFullStopCharacter, HanKerningCharType::kDot);
Set(kFullwidthComma, HanKerningCharType::kDot);
Set(kFullwidthFullStop, HanKerningCharType::kDot);
Set(kFullwidthColon, HanKerningCharType::kColon);
Set(kFullwidthSemicolon, HanKerningCharType::kSemicolon);
Set(kKatakanaMiddleDot, HanKerningCharType::kMiddle);
SetForUnicodeSet("[[:blk=CJK_Symbols:][:ea=F:] & [:gc=Ps:]]",
HanKerningCharType::kOpen);
SetForUnicodeSet("[[:blk=CJK_Symbols:][:ea=F:] & [:gc=Pe:]]",
HanKerningCharType::kClose);
}

static CharacterProperty ToCharacterProperty(HanKerningCharType value) {
CHECK_EQ((static_cast<unsigned>(value) &
~static_cast<unsigned>(CharacterProperty::kHanKerningMask)),
0u);
return static_cast<CharacterProperty>(
static_cast<unsigned>(value)
<< static_cast<unsigned>(CharacterProperty::kHanKerningShift));
}

void SetForUnicodeSet(const char* pattern, HanKerningCharType type) {
SetForUnicodeSet(pattern, ToCharacterProperty(type),
CharacterProperty::kHanKerningShiftedMask);
}

// For `patterns`, see:
// https://unicode-org.github.io/icu/userguide/strings/unicodeset.html#unicodeset-patterns
void SetForUnicodeSet(const char* pattern,
CharacterProperty value,
CharacterProperty mask) {
UErrorCode error = U_ZERO_ERROR;
icu::UnicodeSet set(icu::UnicodeString(pattern), error);
CHECK_EQ(error, U_ZERO_ERROR);
const int32_t range_count = set.getRangeCount();
for (int32_t i = 0; i < range_count; ++i) {
const UChar32 end = set.getRangeEnd(i);
for (UChar32 ch = set.getRangeStart(i); ch <= end; ++ch) {
CHECK_EQ(static_cast<unsigned>(values_[ch] & mask), 0u);
values_[ch] |= value;
}
}
}

void SetForRanges(const UChar32* ranges,
size_t length,
CharacterProperty value) {
CHECK_EQ(length % 2, 0u);
const UChar32* end = ranges + length;
for (; ranges != end; ranges += 2) {
Expand All @@ -60,14 +118,24 @@ class CharacterPropertyValues {
}
}

void SetValues(const UChar32* begin, size_t length, CharacterProperty value) {
void SetForValues(const UChar32* begin,
size_t length,
CharacterProperty value) {
const UChar32* end = begin + length;
for (; begin != end; begin++) {
CHECK_LE(*begin, kMaxCodepoint);
values_[*begin] |= value;
}
}

void Set(UChar32 ch, HanKerningCharType type) {
const CharacterProperty value = ToCharacterProperty(type);
CHECK_EQ(static_cast<unsigned>(values_[ch] &
CharacterProperty::kHanKerningShiftedMask),
0u);
values_[ch] |= value;
}

std::unique_ptr<CharacterProperty[]> values_;
};

Expand Down
19 changes: 19 additions & 0 deletions third_party/blink/renderer/platform/text/character_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,25 @@ TEST(CharacterTest, TestIsCJKIdeographOrSymbol) {
TestSpecificUChar32RangeIdeographSymbol(0x1F1E6, 0x1F6FF);
}

TEST(CharacterTest, HanKerning) {
struct Data {
UChar32 ch;
HanKerningCharType type;
} data_list[] = {
{kLeftDoubleQuotationMarkCharacter, HanKerningCharType::kOpen},
{kRightDoubleQuotationMarkCharacter, HanKerningCharType::kClose},
{kIdeographicSpaceCharacter, HanKerningCharType::kMiddle},
{kFullwidthComma, HanKerningCharType::kDot},
{0x3008, HanKerningCharType::kOpen},
{0xFF5F, HanKerningCharType::kOpen},
{0x3009, HanKerningCharType::kClose},
{0xFF60, HanKerningCharType::kClose},
};
for (const Data& data : data_list) {
EXPECT_EQ(Character::GetHanKerningCharType(data.ch), data.type);
}
}

TEST(CharacterTest, CanTextDecorationSkipInk) {
// ASCII
EXPECT_TRUE(Character::CanTextDecorationSkipInk('a'));
Expand Down
34 changes: 34 additions & 0 deletions third_party/blink/renderer/platform/text/han_kerning_char_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HAN_KERNING_CHAR_TYPE_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HAN_KERNING_CHAR_TYPE_H_

#include <stdint.h>

namespace blink {

//
// Character types for the `HanKerning` class.
//
// https://drafts.csswg.org/css-text-4/#text-spacing-classes
//
enum class HanKerningCharType : uint8_t {
kOther,
kOpen,
kClose,
kMiddle,

// Following types depend on fonts. `HanKerning::GetCharType()` can resolve
// them to types above.
kDot,
kColon,
kSemicolon,

// When adding values, ensure `CharacterProperty` has enough storage.
};

} // namespace blink

#endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_HAN_KERNING_CHAR_TYPE_H_

0 comments on commit 86afd63

Please sign in to comment.