@@ -19,27 +19,61 @@ sub horizontal-ruler(UInt:D $width = 80) is export {
1919}
2020
2121
22- # | Calculate monospaced width of a single line of text, ignoring ANSI colors
22+ # | Calculate monospaced width of a single line of text, accounting for
23+ # | narrow and wide characters, ignoring ANSI SGR color/attribute escapes
2324# XXXX: Does not handle cursor-movement control characters such as TAB
2425sub duospace-width (Str : D $ text , Bool : $ wide-context = False ) is export {
25- # OLD APPROXIMATION, simply counting NFG characters
26- # colorstrip($text).chars
27-
28- # Unicode TR11 approximation, based on legacy character set display width
29- # compatibility and General_Category visibility -- first strip out ANSI
30- # codes and likely invisible/non-spacing Unicode characters, then sum the
31- # counts of remaining characters in each width category
32- my constant % ignore = < Mn Mc Me Cc Cf Cs Co Cn > Z => 1 xx * ;
33- my $ counts = colorstrip($ text )
34- . ords
35- . map ({ . uniprop (' East_Asian_Width' ) unless % ignore {. uniprop } })
36- . Bag ;
37-
38- $ counts <N > + $ counts <Na > + $ counts <H > # Generally narrow
39- + 2 * ($ counts <F > + $ counts <W >) # Always wide
40- + (1 + $ wide-context ) * $ counts <A > # Context-dependent
26+ duospace-width-core((my str $ str = colorstrip($ text )),
27+ (my int $ context = + $ wide-context ))
4128}
4229
30+ # | Optimized core for duospace-width, when colorstrip is known NOT needed.
31+ # | If you're not sure which to use, use the regular duospace-width routine.
32+ # |
33+ # | Like duospace-width, calculates monospace width of a single line of text,
34+ # | while being aware of narrow and wide codepoints using the Unicode TR11
35+ # | width approximation: ignore likely invisible/non-spacing codepoints then
36+ # | sum the width of the remaining codepoints using their East_Asian_Width
37+ # | property and a flag for the interpretation of (A)mbiguous width codepoints.
38+ sub duospace-width-core (str $ text , int $ wide-context ) is export {
39+ # Various chunks of this cribbed from Rakudo setting internals
40+ use nqp ;
41+ my constant $ gc-prop = nqp ::unipropcode(' General_Category' );
42+ my constant $ eaw-prop = nqp ::unipropcode(' East_Asian_Width' );
43+ my constant $ ignore = nqp ::hash(
44+ ' Mn' , 1 , ' Mc' , 1 , ' Me' , 1 , ' Cc' , 1 , ' Cf' , 1 , ' Cs' , 1 , ' Co' , 1 , ' Cn' , 1 );
45+ my constant $ narrow = nqp ::hash(
46+ ' N' , 1 , ' Na' , 1 , ' H' , 1 , ' F' , 2 , ' W' , 2 , ' A' , 1 );
47+ my constant $ wide = nqp ::hash(
48+ ' N' , 1 , ' Na' , 1 , ' H' , 1 , ' F' , 2 , ' W' , 2 , ' A' , 2 );
49+
50+ my $ cells := $ wide-context ?? $ wide !! $ narrow ;
51+ my $ codes := nqp ::strtocodes(
52+ $ text ,
53+ nqp ::const::NORMALIZE_NFC,
54+ nqp ::create(array[uint32 ])
55+ );
56+
57+ my int $ elems = nqp :: elems ($ codes );
58+ my int $ i = -1 ;
59+ my uint $ width = 0 ;
60+ my uint $ ord ;
61+
62+ nqp ::while (
63+ nqp ::islt_i(++ $ i , $ elems ),
64+ nqp ::stmts(
65+ ($ ord = nqp ::atpos_u($ codes , $ i )),
66+ nqp ::unless(
67+ nqp ::atkey($ ignore , nqp ::getuniprop_str($ ord , $ gc-prop )),
68+ ($ width = nqp ::add_i($ width ,
69+ nqp ::atkey($ cells ,
70+ nqp ::getuniprop_str($ ord , $ eaw-prop )))),
71+ )
72+ )
73+ );
74+
75+ $ width
76+ }
4377
4478# | Wrap a single line of (possibly ANSI colored) $text to a given $width
4579# Returns an array of wrapped lines with no trailing newlines.
0 commit comments