Skip to content

Commit

Permalink
canvas (GraphemeClusterBreak): handle surrogate pairs for UCS-2 wchar_t
Browse files Browse the repository at this point in the history
  • Loading branch information
akinomyoga committed Sep 25, 2022
1 parent 0b3e611 commit 18bf121
Show file tree
Hide file tree
Showing 9 changed files with 655 additions and 137 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Expand Up @@ -17,12 +17,17 @@
/test

# memo
/memo/*.exe
/memo/D0702.HISTFILE*
/memo/D0727.bind.*
/memo/D0857.unbind
/memo/D0857.bind
/memo/D0857.pipe
/memo/D0857.stderr
/memo/*.exe
/memo/D1881/pr227.*.txt
/memo/D1881/pr227.*.hist
/memo/D1881/pr227-sleep-delay.pdf

# make
/make/*.exe
/make/canvas.c2w.wcwidth.txt
107 changes: 71 additions & 36 deletions lib/test-canvas.sh
Expand Up @@ -352,7 +352,22 @@ function ble/test:canvas/textmap {
#------------------------------------------------------------------------------
# Grapheme_Cluster_Break

ble/test/start-section 'ble/unicode/GraphemeCluster/c2break' 72
ble/test/start-section 'ble/unicode/GraphemeCluster/c2break' 77

if (LC_ALL=C.UTF-8 builtin eval "s=\$'\\U1F6D1'"; ((${#s}==2))) 2>/dev/null; then
function ble/test:canvas/GraphemeCluster/.locate-code-point {
local s=$1 k=$2 len=${#1} i=0 shift
while ((k-->=1&&i<len)); do
ble/unicode/GraphemeCluster/s2break-right "$s" "$i" shift
((i+=shift))
done
ret=$i
}
else
function ble/test:canvas/GraphemeCluster/.locate-code-point {
ret=$2
}
fi

(
bleopt emoji_opts=ri:tpvs:epvs:zwj
Expand Down Expand Up @@ -396,44 +411,60 @@ ble/test/start-section 'ble/unicode/GraphemeCluster/c2break' 72
ble/test 'ble/unicode/GraphemeCluster/c2break "$((0x1F32B))"' ret="$_ble_unicode_GraphemeClusterBreak_Pictographic"

if ((_ble_bash>=40200)); then
function ble/test:canvas/GraphemeClusterBreak/find-previous-boundary {
local str=$1 index=$2 ans=$3 ret=
ble/test:canvas/GraphemeCluster/.locate-code-point "$str." "$index"; index=$ret
ble/test:canvas/GraphemeCluster/.locate-code-point "$str" "$ans"; ans=$ret
ble/test "ble/unicode/GraphemeCluster/find-previous-boundary '$str' $index" ret="$ans"
}

# Regional_Indicator
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 1' ret="0"
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 2' ret="0"
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 3' ret="2"
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 4' ret="2"
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 5' ret="4"
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 2' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 3' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 4' ret=3
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 5' ret=3
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 6' ret=5
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 7' ret=6
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6Z" 7' ret=6
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 7' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 6' ret=1
ble/test $'bleopt_grapheme_cluster=legacy ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 7' ret=5
ble/test $'bleopt_grapheme_cluster=legacy ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 6' ret=5
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 1 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 2 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 3 2
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 4 2
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 5 4
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 2 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'B\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 3 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'C\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 4 3
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'D\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 5 3
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'E\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 6 5
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'F\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 7 6
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'G\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6Z' 7 6
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'H\u600\u600\u600\u600\U1F1E6\U1F1FF' 7 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'I\u600\u600\u600\u600\U1F1E6\U1F1FF' 6 1
bleopt_grapheme_cluster=legacy ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'J\u600\u600\u600\u600\U1F1E6\U1F1FF' 7 5
bleopt_grapheme_cluster=legacy ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'K\u600\u600\u600\u600\U1F1E6\U1F1FF' 6 5

# ZWJ sequence
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 1' ret=0
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 2' ret=0
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 3' ret=0
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 4' ret=0
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 5' ret=4
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 2' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 3' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 4' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 5' ret=1
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 6' ret=5
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 2' ret=0
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 3' ret=2
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 4' ret=2
ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 5' ret=4
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 1 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 2 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 3 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 4 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 5 4
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'a\U1F636\U200D\U1F32B\UFE0F' 2 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'b\U1F636\U200D\U1F32B\UFE0F' 3 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'c\U1F636\U200D\U1F32B\UFE0F' 4 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'d\U1F636\U200D\U1F32B\UFE0F' 5 1
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'e\U1F636\U200D\U1F32B\UFE0F' 6 5
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'f\U200D\U1F32B\UFE0F' 2 0
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'g\U200D\U1F32B\UFE0F' 3 2
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'h\U200D\U1F32B\UFE0F' 4 2
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'i\U200D\U1F32B\UFE0F' 5 4

ble/test "ble/test:canvas/textmap \$'@@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=1
ble/test "ble/test:canvas/textmap \$'@\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=2
ble/test "ble/test:canvas/textmap \$'@\u0308\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=3
ble/test "ble/test:canvas/textmap \$'@\u0308\u0308\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=4

# s2break-{right,left}
ble/test 'ble/util/is-unicode-output'
c1=$'\uFE0F'
ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-right "$c1" 0 code; ret=$code' ret="$((0xFE0F))"
ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-left "$c1" "${#c1}" code; ret=$code' ret="$((0xFE0F))"
c2=$'\U1F6D1'
ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-right "$c2" 0 code; ret=$code' ret="$((0x1F6D1))"
ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-left "$c2" "${#c2}" code; ret=$code' ret="$((0x1F6D1))"
fi
)

Expand All @@ -446,17 +477,21 @@ ble/test/start-section 'ble/unicode/GraphemeCluster/c2break (GraphemeBreakTest.t

function ble/test:canvas/GraphemeClusterBreak/find-previous-boundary {
local ans=${1%%:*} str=${1#*:}
eval "local s=\$'$str'"
ble/string#split ans , "$ans"
local i=0 b=0
local k=0 b=0
for k in "${!ans[@]}"; do
ble/test "ble/unicode/GraphemeCluster/find-previous-boundary \$'$str' $((k+1))" ret=${ans[k]}
if ((ans[k]>b)); then
ble/test:canvas/GraphemeCluster/.locate-code-point "$s." "$((k+1))"; local i=$ret
ble/test:canvas/GraphemeCluster/.locate-code-point "$s" "${ans[k]}"; local a=$ret
ble/test "ble/unicode/GraphemeCluster/find-previous-boundary \$'$str' $i" ret="$a"
if ((a>b)); then
local ret= c= w= cs= extend=
ble/test "ble/unicode/GraphemeCluster/match \$'$str' $b && ((ret=b+1+extend))" ret=${ans[k]}
((b=ans[k]))
ble/test "ble/unicode/GraphemeCluster/match \$'$str' $b && ((ret=b+1+extend))" ret="$a"
((b=a))
fi
done
}

if ((_ble_bash>=40200)); then
for spec in "${tests_cases[@]}"; do
ble/test:canvas/GraphemeClusterBreak/find-previous-boundary "$spec"
Expand Down
76 changes: 44 additions & 32 deletions make_command.sh
Expand Up @@ -364,15 +364,22 @@ function sub:generate-grapheme-cluster-table {
v2c[10] = "v";
v2c[11] = "t";
v2c[12] = "G";
# [blesh extension] surrogate pair
PropertyCount += 2;
prop2v["HighSurrogate"] = HSG = 13;
prop2v["LowSurrogate"] = LSG = 14;
v2c[13] = "<";
v2c[14] = ">";
}
function process_GraphemeClusterBreak(_, v, m, b, e, i) {
v = prop2v[$3];
if (match($1, /([[:xdigit:]]+)\.\.([[:xdigit:]]+)/, m) > 0) {
function process_GraphemeClusterBreak(code, prop, _, v, m, b, e, i) {
v = prop2v[prop];
if (match(code, /([[:xdigit:]]+)\.\.([[:xdigit:]]+)/, m) > 0) {
b = strtonum("0x" m[1]);
e = strtonum("0x" m[2]);
} else {
b = e = strtonum("0x" $1);
b = e = strtonum("0x" code);
}
for (i = b; i <= e; i++)
Expand Down Expand Up @@ -401,38 +408,41 @@ function sub:generate-grapheme-cluster-table {
/__Grapheme_Cluster_Break__/ {mode = "break";}
/__Extended_Pictographic__/ {mode = "picto";}
/^[[:space:]]*(#|$)/ {next;}
mode == "break" && $2 == ";" { process_GraphemeClusterBreak(); }
mode == "break" && $2 == ";" { process_GraphemeClusterBreak($1, $3); }
mode == "picto" && /Extended_Pictographic/ { process_ExtendedPictographic(); }
function rule_add(i, j, value) {
if (rule[i, j] != "") return;
rule[i, j] = value;
if (rule[i, j] != "") return;
rule[i, j] = value;
}
function rule_initialize() {
for (i = 0; i < PropertyCount; i++) {
rule_add(Control, i, 0);
rule_add(i, Control, 0);
}
rule_add(L, L, 1);
rule_add(L, V, 1);
rule_add(L, LV, 1);
rule_add(L, LVT, 1);
rule_add(LV, V, 1);
rule_add(LV, T, 1);
rule_add(V, V, 1);
rule_add(V, T, 1);
rule_add(LVT, T, 1);
rule_add(T, T, 1);
for (i = 0; i < PropertyCount; i++) {
rule_add(i, Extend, 1);
rule_add(i, ZWJ, 1);
}
for (i = 0; i < PropertyCount; i++) {
rule_add(i, SpacingMark, 2);
rule_add(Prepend, i, 2);
}
rule_add(ZWJ, Pictographic, 3);
rule_add(Regional_Indicator, Regional_Indicator, 4);
for (i = 0; i < PropertyCount; i++) {
rule_add(Control, i, 0);
rule_add(i, Control, 0);
}
rule_add(L, L, 1);
rule_add(L, V, 1);
rule_add(L, LV, 1);
rule_add(L, LVT, 1);
rule_add(LV, V, 1);
rule_add(LV, T, 1);
rule_add(V, V, 1);
rule_add(V, T, 1);
rule_add(LVT, T, 1);
rule_add(T, T, 1);
for (i = 0; i < PropertyCount; i++) {
rule_add(i, Extend, 1);
rule_add(i, ZWJ, 1);
}
for (i = 0; i < PropertyCount; i++) {
rule_add(i, SpacingMark, 2);
rule_add(Prepend, i, 2);
}
rule_add(ZWJ, Pictographic, 3);
rule_add(Regional_Indicator, Regional_Indicator, 4);
# [blesh extension] surrogate pair
rule_add(HSG, LSG, 5);
}
function rule_print(_, i, j, t, out) {
out = "";
Expand Down Expand Up @@ -519,8 +529,10 @@ function sub:generate-grapheme-cluster-table {
}
END {
#print_table();
process_GraphemeClusterBreak("D800..DBFF", "HighSurrogate");
process_GraphemeClusterBreak("DC00..DFFF", "LowSurrogate");
#print_table();
prop_print();
print "_ble_unicode_GraphemeClusterBreak_MaxCode=" (max_code + 1);
Expand Down
46 changes: 46 additions & 0 deletions memo/D1881/pr227-measure.sh
@@ -0,0 +1,46 @@
#!/usr/bin/env bash

if [[ ${EPOCHREALTIME-} ]]; then
measure() {
local beg=$EPOCHREALTIME
eval "$1"
local end=$EPOCHREALTIME
echo "$(bc -l <<< "$end-$beg") $2"
}

else
cc -o epoch.tmp -x c - <<EOF
#include <sys/time.h>
#include <stdio.h>
int main() {
struct timeval tv;
gettimeofday(&tv, NULL);
printf("%ld.%06u\n", tv.tv_sec, tv.tv_usec);
}
EOF

chmod +x epoch.tmp
measure() {
local beg=$(./epoch.tmp)
eval "$1"
local end=$(./epoch.tmp)
echo "$(bc -l <<< "$end-$beg") $2"
}

trap 'rm -f epoch.tmp' EXIT
trap 'rm -f epoch.tmp; trap - INT; kill -INT $$' INT
fi

{
echo "# $BASH_VERSION ($MACHTYPE)"
for i in {0..100}; do
measure ":"
done
for i in {0..100}; do
measure "sleep 0.001" 0.001
done
for i in {2..300}; do
printf -v v '0.%03d' "$i"
measure "sleep $v" "$v"
done
}

0 comments on commit 18bf121

Please sign in to comment.