Skip to content

Commit 300e661

Browse files
committed
Greatly speed up duospace-width
duospace-width() is getting really heavy use in Terminal::Widgets, to the point of being a performance bottleneck on high-cardinality non-cacheable inputs. This commit replaces the core of the implementation with a *much* faster NQP version, making the overall function ~7x faster on large inputs. If the SGR color escape stripping functionality is NOT needed, the core is also exported directly as duospace-width-core(), which is ~10x faster than the original implementation.
1 parent d79b5c6 commit 300e661

File tree

1 file changed

+51
-17
lines changed

1 file changed

+51
-17
lines changed

lib/Text/MiscUtils/Layout.rakumod

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,61 @@ sub horizontal-ruler(UInt:D $width = 80) is export {
1919
}
2020

2121

22-
#| Calculate monospaced width of a single line of text, ignoring ANSI colors
22+
#| Calculate monospaced width of a single line of text, accounting for
23+
#| narrow and wide characters, ignoring ANSI SGR color/attribute escapes
2324
# XXXX: Does not handle cursor-movement control characters such as TAB
2425
sub duospace-width(Str:D $text, Bool :$wide-context = False) is export {
25-
# OLD APPROXIMATION, simply counting NFG characters
26-
# colorstrip($text).chars
27-
28-
# Unicode TR11 approximation, based on legacy character set display width
29-
# compatibility and General_Category visibility -- first strip out ANSI
30-
# codes and likely invisible/non-spacing Unicode characters, then sum the
31-
# counts of remaining characters in each width category
32-
my constant %ignore = < Mn Mc Me Cc Cf Cs Co Cn > Z=> 1 xx *;
33-
my $counts = colorstrip($text)
34-
.ords
35-
.map({ .uniprop('East_Asian_Width') unless %ignore{.uniprop} })
36-
.Bag;
37-
38-
$counts<N> + $counts<Na> + $counts<H> # Generally narrow
39-
+ 2 * ($counts<F> + $counts<W>) # Always wide
40-
+ (1 + $wide-context) * $counts<A> # Context-dependent
26+
duospace-width-core((my str $str = colorstrip($text)),
27+
(my int $context = +$wide-context))
4128
}
4229

30+
#| Optimized core for duospace-width, when colorstrip is known NOT needed.
31+
#| If you're not sure which to use, use the regular duospace-width routine.
32+
#|
33+
#| Like duospace-width, calculates monospace width of a single line of text,
34+
#| while being aware of narrow and wide codepoints using the Unicode TR11
35+
#| width approximation: ignore likely invisible/non-spacing codepoints then
36+
#| sum the width of the remaining codepoints using their East_Asian_Width
37+
#| property and a flag for the interpretation of (A)mbiguous width codepoints.
38+
sub duospace-width-core(str $text, int $wide-context) is export {
39+
# Various chunks of this cribbed from Rakudo setting internals
40+
use nqp;
41+
my constant $gc-prop = nqp::unipropcode('General_Category');
42+
my constant $eaw-prop = nqp::unipropcode('East_Asian_Width');
43+
my constant $ignore = nqp::hash(
44+
'Mn', 1, 'Mc', 1, 'Me', 1, 'Cc', 1, 'Cf', 1, 'Cs', 1, 'Co', 1, 'Cn', 1);
45+
my constant $narrow = nqp::hash(
46+
'N', 1, 'Na', 1, 'H', 1, 'F', 2, 'W', 2, 'A', 1);
47+
my constant $wide = nqp::hash(
48+
'N', 1, 'Na', 1, 'H', 1, 'F', 2, 'W', 2, 'A', 2);
49+
50+
my $cells := $wide-context ?? $wide !! $narrow;
51+
my $codes := nqp::strtocodes(
52+
$text,
53+
nqp::const::NORMALIZE_NFC,
54+
nqp::create(array[uint32])
55+
);
56+
57+
my int $elems = nqp::elems($codes);
58+
my int $i = -1;
59+
my uint $width = 0;
60+
my uint $ord;
61+
62+
nqp::while(
63+
nqp::islt_i(++$i, $elems),
64+
nqp::stmts(
65+
($ord = nqp::atpos_u($codes, $i)),
66+
nqp::unless(
67+
nqp::atkey($ignore, nqp::getuniprop_str($ord, $gc-prop)),
68+
($width = nqp::add_i($width,
69+
nqp::atkey($cells,
70+
nqp::getuniprop_str($ord, $eaw-prop)))),
71+
)
72+
)
73+
);
74+
75+
$width
76+
}
4377

4478
#| Wrap a single line of (possibly ANSI colored) $text to a given $width
4579
# Returns an array of wrapped lines with no trailing newlines.

0 commit comments

Comments
 (0)