diff --git a/.gitignore b/.gitignore index 22c513ed..7ec149ca 100644 --- a/.gitignore +++ b/.gitignore @@ -17,12 +17,17 @@ /test # memo +/memo/*.exe /memo/D0702.HISTFILE* /memo/D0727.bind.* /memo/D0857.unbind /memo/D0857.bind /memo/D0857.pipe /memo/D0857.stderr -/memo/*.exe +/memo/D1881/pr227.*.txt +/memo/D1881/pr227.*.hist +/memo/D1881/pr227-sleep-delay.pdf + +# make /make/*.exe /make/canvas.c2w.wcwidth.txt diff --git a/lib/test-canvas.sh b/lib/test-canvas.sh index 3d865241..dce9c600 100644 --- a/lib/test-canvas.sh +++ b/lib/test-canvas.sh @@ -352,7 +352,22 @@ function ble/test:canvas/textmap { #------------------------------------------------------------------------------ # Grapheme_Cluster_Break -ble/test/start-section 'ble/unicode/GraphemeCluster/c2break' 72 +ble/test/start-section 'ble/unicode/GraphemeCluster/c2break' 77 + +if (LC_ALL=C.UTF-8 builtin eval "s=\$'\\U1F6D1'"; ((${#s}==2))) 2>/dev/null; then + function ble/test:canvas/GraphemeCluster/.locate-code-point { + local s=$1 k=$2 len=${#1} i=0 shift + while ((k-->=1&&i=40200)); then + function ble/test:canvas/GraphemeClusterBreak/find-previous-boundary { + local str=$1 index=$2 ans=$3 ret= + ble/test:canvas/GraphemeCluster/.locate-code-point "$str." "$index"; index=$ret + ble/test:canvas/GraphemeCluster/.locate-code-point "$str" "$ans"; ans=$ret + ble/test "ble/unicode/GraphemeCluster/find-previous-boundary '$str' $index" ret="$ans" + } + # Regional_Indicator - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 1' ret="0" - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 2' ret="0" - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 3' ret="2" - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 4' ret="2" - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F1E6\U1F1FF\U1F1E6\U1F1FF" 5' ret="4" - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 2' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 3' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 4' ret=3 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 5' ret=3 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 6' ret=5 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6" 7' ret=6 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6Z" 7' ret=6 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 7' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 6' ret=1 - ble/test $'bleopt_grapheme_cluster=legacy ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 7' ret=5 - ble/test $'bleopt_grapheme_cluster=legacy ble/unicode/GraphemeCluster/find-previous-boundary "A\u600\u600\u600\u600\U1F1E6\U1F1FF" 6' ret=5 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 1 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 2 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 3 2 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 4 2 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F1E6\U1F1FF\U1F1E6\U1F1FF' 5 4 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'A\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 2 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'B\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 3 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'C\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 4 3 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'D\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 5 3 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'E\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 6 5 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'F\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6' 7 6 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'G\U1F1E6\U1F1FF\U1F1E6\U1F1FF\U1F1E6Z' 7 6 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'H\u600\u600\u600\u600\U1F1E6\U1F1FF' 7 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'I\u600\u600\u600\u600\U1F1E6\U1F1FF' 6 1 + bleopt_grapheme_cluster=legacy ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'J\u600\u600\u600\u600\U1F1E6\U1F1FF' 7 5 + bleopt_grapheme_cluster=legacy ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'K\u600\u600\u600\u600\U1F1E6\U1F1FF' 6 5 # ZWJ sequence - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 1' ret=0 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 2' ret=0 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 3' ret=0 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 4' ret=0 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "\U1F636\U200D\U1F32B\UFE0F" 5' ret=4 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 2' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 3' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 4' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 5' ret=1 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U1F636\U200D\U1F32B\UFE0F" 6' ret=5 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 2' ret=0 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 3' ret=2 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 4' ret=2 - ble/test $'ble/unicode/GraphemeCluster/find-previous-boundary "a\U200D\U1F32B\UFE0F" 5' ret=4 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 1 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 2 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 3 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 4 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'\U1F636\U200D\U1F32B\UFE0F' 5 4 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'a\U1F636\U200D\U1F32B\UFE0F' 2 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'b\U1F636\U200D\U1F32B\UFE0F' 3 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'c\U1F636\U200D\U1F32B\UFE0F' 4 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'d\U1F636\U200D\U1F32B\UFE0F' 5 1 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'e\U1F636\U200D\U1F32B\UFE0F' 6 5 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'f\U200D\U1F32B\UFE0F' 2 0 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'g\U200D\U1F32B\UFE0F' 3 2 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'h\U200D\U1F32B\UFE0F' 4 2 + ble/test:canvas/GraphemeClusterBreak/find-previous-boundary $'i\U200D\U1F32B\UFE0F' 5 4 ble/test "ble/test:canvas/textmap \$'@@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=1 ble/test "ble/test:canvas/textmap \$'@\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=2 ble/test "ble/test:canvas/textmap \$'@\u0308\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=3 ble/test "ble/test:canvas/textmap \$'@\u0308\u0308\u0308@' stderr; ble/textmap#get-index-at -v ret 1 0" ret=4 + + # s2break-{right,left} + ble/test 'ble/util/is-unicode-output' + c1=$'\uFE0F' + ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-right "$c1" 0 code; ret=$code' ret="$((0xFE0F))" + ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-left "$c1" "${#c1}" code; ret=$code' ret="$((0xFE0F))" + c2=$'\U1F6D1' + ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-right "$c2" 0 code; ret=$code' ret="$((0x1F6D1))" + ble/test code:'code=; ble/unicode/GraphemeCluster/s2break-left "$c2" "${#c2}" code; ret=$code' ret="$((0x1F6D1))" fi ) @@ -446,17 +477,21 @@ ble/test/start-section 'ble/unicode/GraphemeCluster/c2break (GraphemeBreakTest.t function ble/test:canvas/GraphemeClusterBreak/find-previous-boundary { local ans=${1%%:*} str=${1#*:} + eval "local s=\$'$str'" ble/string#split ans , "$ans" - local i=0 b=0 + local k=0 b=0 for k in "${!ans[@]}"; do - ble/test "ble/unicode/GraphemeCluster/find-previous-boundary \$'$str' $((k+1))" ret=${ans[k]} - if ((ans[k]>b)); then + ble/test:canvas/GraphemeCluster/.locate-code-point "$s." "$((k+1))"; local i=$ret + ble/test:canvas/GraphemeCluster/.locate-code-point "$s" "${ans[k]}"; local a=$ret + ble/test "ble/unicode/GraphemeCluster/find-previous-boundary \$'$str' $i" ret="$a" + if ((a>b)); then local ret= c= w= cs= extend= - ble/test "ble/unicode/GraphemeCluster/match \$'$str' $b && ((ret=b+1+extend))" ret=${ans[k]} - ((b=ans[k])) + ble/test "ble/unicode/GraphemeCluster/match \$'$str' $b && ((ret=b+1+extend))" ret="$a" + ((b=a)) fi done } + if ((_ble_bash>=40200)); then for spec in "${tests_cases[@]}"; do ble/test:canvas/GraphemeClusterBreak/find-previous-boundary "$spec" diff --git a/make_command.sh b/make_command.sh index 56f2eec8..7433f7ae 100755 --- a/make_command.sh +++ b/make_command.sh @@ -364,15 +364,22 @@ function sub:generate-grapheme-cluster-table { v2c[10] = "v"; v2c[11] = "t"; v2c[12] = "G"; + + # [blesh extension] surrogate pair + PropertyCount += 2; + prop2v["HighSurrogate"] = HSG = 13; + prop2v["LowSurrogate"] = LSG = 14; + v2c[13] = "<"; + v2c[14] = ">"; } - function process_GraphemeClusterBreak(_, v, m, b, e, i) { - v = prop2v[$3]; - if (match($1, /([[:xdigit:]]+)\.\.([[:xdigit:]]+)/, m) > 0) { + function process_GraphemeClusterBreak(code, prop, _, v, m, b, e, i) { + v = prop2v[prop]; + if (match(code, /([[:xdigit:]]+)\.\.([[:xdigit:]]+)/, m) > 0) { b = strtonum("0x" m[1]); e = strtonum("0x" m[2]); } else { - b = e = strtonum("0x" $1); + b = e = strtonum("0x" code); } for (i = b; i <= e; i++) @@ -401,38 +408,41 @@ function sub:generate-grapheme-cluster-table { /__Grapheme_Cluster_Break__/ {mode = "break";} /__Extended_Pictographic__/ {mode = "picto";} /^[[:space:]]*(#|$)/ {next;} - mode == "break" && $2 == ";" { process_GraphemeClusterBreak(); } + mode == "break" && $2 == ";" { process_GraphemeClusterBreak($1, $3); } mode == "picto" && /Extended_Pictographic/ { process_ExtendedPictographic(); } function rule_add(i, j, value) { - if (rule[i, j] != "") return; - rule[i, j] = value; + if (rule[i, j] != "") return; + rule[i, j] = value; } function rule_initialize() { - for (i = 0; i < PropertyCount; i++) { - rule_add(Control, i, 0); - rule_add(i, Control, 0); - } - rule_add(L, L, 1); - rule_add(L, V, 1); - rule_add(L, LV, 1); - rule_add(L, LVT, 1); - rule_add(LV, V, 1); - rule_add(LV, T, 1); - rule_add(V, V, 1); - rule_add(V, T, 1); - rule_add(LVT, T, 1); - rule_add(T, T, 1); - for (i = 0; i < PropertyCount; i++) { - rule_add(i, Extend, 1); - rule_add(i, ZWJ, 1); - } - for (i = 0; i < PropertyCount; i++) { - rule_add(i, SpacingMark, 2); - rule_add(Prepend, i, 2); - } - rule_add(ZWJ, Pictographic, 3); - rule_add(Regional_Indicator, Regional_Indicator, 4); + for (i = 0; i < PropertyCount; i++) { + rule_add(Control, i, 0); + rule_add(i, Control, 0); + } + rule_add(L, L, 1); + rule_add(L, V, 1); + rule_add(L, LV, 1); + rule_add(L, LVT, 1); + rule_add(LV, V, 1); + rule_add(LV, T, 1); + rule_add(V, V, 1); + rule_add(V, T, 1); + rule_add(LVT, T, 1); + rule_add(T, T, 1); + for (i = 0; i < PropertyCount; i++) { + rule_add(i, Extend, 1); + rule_add(i, ZWJ, 1); + } + for (i = 0; i < PropertyCount; i++) { + rule_add(i, SpacingMark, 2); + rule_add(Prepend, i, 2); + } + rule_add(ZWJ, Pictographic, 3); + rule_add(Regional_Indicator, Regional_Indicator, 4); + + # [blesh extension] surrogate pair + rule_add(HSG, LSG, 5); } function rule_print(_, i, j, t, out) { out = ""; @@ -519,8 +529,10 @@ function sub:generate-grapheme-cluster-table { } END { - #print_table(); + process_GraphemeClusterBreak("D800..DBFF", "HighSurrogate"); + process_GraphemeClusterBreak("DC00..DFFF", "LowSurrogate"); + #print_table(); prop_print(); print "_ble_unicode_GraphemeClusterBreak_MaxCode=" (max_code + 1); diff --git a/memo/D1881/pr227-measure.sh b/memo/D1881/pr227-measure.sh new file mode 100755 index 00000000..21de93a2 --- /dev/null +++ b/memo/D1881/pr227-measure.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +if [[ ${EPOCHREALTIME-} ]]; then + measure() { + local beg=$EPOCHREALTIME + eval "$1" + local end=$EPOCHREALTIME + echo "$(bc -l <<< "$end-$beg") $2" + } + +else + cc -o epoch.tmp -x c - < +#include +int main() { + struct timeval tv; + gettimeofday(&tv, NULL); + printf("%ld.%06u\n", tv.tv_sec, tv.tv_usec); +} +EOF + + chmod +x epoch.tmp + measure() { + local beg=$(./epoch.tmp) + eval "$1" + local end=$(./epoch.tmp) + echo "$(bc -l <<< "$end-$beg") $2" + } + + trap 'rm -f epoch.tmp' EXIT + trap 'rm -f epoch.tmp; trap - INT; kill -INT $$' INT +fi + +{ + echo "# $BASH_VERSION ($MACHTYPE)" + for i in {0..100}; do + measure ":" + done + for i in {0..100}; do + measure "sleep 0.001" 0.001 + done + for i in {2..300}; do + printf -v v '0.%03d' "$i" + measure "sleep $v" "$v" + done +} diff --git a/memo/D1881/pr227-plot.gp b/memo/D1881/pr227-plot.gp new file mode 100755 index 00000000..f9c2eeb4 --- /dev/null +++ b/memo/D1881/pr227-plot.gp @@ -0,0 +1,84 @@ +#!/usr/bin/gnuplot + +set terminal pdfcairo size 4.5,4.5/sqrt(2) +set output 'pr227-sleep-delay.pdf' + +set xlabel 'Requested delay (argument of sleep) [sec]' +set ylabel 'Actual delay of sleep [sec]' + +avg_empty = 0.005270 +avg_0_001 = 0.0112548 +fcost = avg_0_001 - 0.001 - avg_empty + +set title 'Scatter plot of actual delay vs requested (Bash 3.2 / CI macOS)' +set key left top Left +plot [0:0.100]\ + 'pr227.ci-macos.txt' u 2:($1-avg_empty) lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x', \ + x+fcost lc rgb '#888888' dt (8,4) title 'y = x + (fork overhead)' + +plot \ + 'pr227.ci-macos.txt' u 2:($1-avg_empty) lc rgb '#FF0000' ps 0.5 lw 0.5 title 'Sample points', \ + x lc rgb '#000000' title 'y = x', \ + x+fcost lc rgb '#888888' dt (8,4) title 'y = x + (fork overhead)' + +set title 'Distribution of delay (requested = 0.001) (Bash 3.2 / CI macOS)' +set xlabel 'Actual delay [sec]' +set ylabel 'Histogram count' +set style fill solid +set boxwidth 0.8 relative +set yrange [0:60] +plot \ + 'pr227.sleep0001.hist' u 1:2 w boxes fc rgb '#AAAAFF' notitle, \ + 'pr227.sleep0001.hist' u 1:2:(sqrt($2)) w yerror lc rgb '#000088' notitle + +set title 'Distribution of extra delay (requested >= 0.020) (Bash 3.2 / CI macOS)' +set xlabel 'Extra delay [sec]' +set ylabel 'Histogram count' +set style fill solid +set boxwidth 0.8 relative +set yrange [0:*] +plot \ + 'pr227.sleep0020p.hist' u 1:2 w boxes fc rgb '#AAAAFF' notitle, \ + 'pr227.sleep0020p.hist' u 1:2:(sqrt($2)) w yerror lc rgb '#000088' notitle + +#------------------------------------------------------------------------------ + +set xlabel 'Requested delay (argument of sleep) [sec]' +set ylabel 'Actual delay of sleep [sec]' + +set title 'Scatter plot in Bash 3.2 (GNU/Linux)' +set key left top Left +plot [0:0.200]\ + 'pr227.linux32.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' + +set title 'Scatter plot in Bash 5.2 (GNU/Linux)' +set key left top Left +plot [0:0.200]\ + 'pr227.linux52.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' + +set title 'Scatter plot in Bash 5.1 (FreeBSD 13)' +set key left top Left +plot [0:0.200]\ + 'pr227.freebsd.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' + +set title 'Scatter plot in Bash 4.4 (Cygwin)' +set key left top Left +plot [0:0.200]\ + 'pr227.cygwin.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' + +set title 'Scatter plot in Bash 3.2 (macOS)' +set key left top Left +plot [0:0.200]\ + 'pr227.macos32.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' + +set title 'Scatter plot in Bash 5.1 (macOS)' +set key left top Left +plot [0:0.200]\ + 'pr227.macos51.txt' u 2:1 lc rgb '#FF0000' title 'Sample points', \ + x lc rgb '#000000' title 'y = x' diff --git a/memo/D1881/pr227-plot.sh b/memo/D1881/pr227-plot.sh new file mode 100755 index 00000000..04ffff2a --- /dev/null +++ b/memo/D1881/pr227-plot.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +awk ' + BEGIN { + bin_width = 0.0005; + ibin_min = 99999; + ibin_max = 0; + count = 0; + } + $2 == 0.001 { + v = $1; + ibin = int(v / bin_width) + hist[ibin]++; + if (ibin < ibin_min) ibin_min = ibin; + if (ibin > ibin_max) ibin_max = ibin; + count++; + } + END { + for (ibin = ibin_min; ibin <= ibin_max; ibin++) { + center = (ibin + 0.5) * bin_width; + print center, hist[ibin], hist[ibin] / (count * bin_width); + } + } +' pr227.ci-macos.txt > pr227.sleep0001.hist + +awk ' + BEGIN { + bin_width = 0.005; + ibin_min = 99999; + ibin_max = 0; + count = 0; + } + $2 >= 0.020 { + v = $1 - $2; + ibin = int(v / bin_width) + hist[ibin]++; + if (ibin < ibin_min) ibin_min = ibin; + if (ibin > ibin_max) ibin_max = ibin; + count++; + } + END { + for (ibin = ibin_min; ibin <= ibin_max; ibin++) { + center = (ibin + 0.5) * bin_width; + print center, hist[ibin], hist[ibin] / (count * bin_width); + } + } +' pr227.ci-macos.txt > pr227.sleep0020p.hist + +gnuplot pr227-plot.gp + diff --git a/note.txt b/note.txt index 8b48b6d7..ba401c7b 100644 --- a/note.txt +++ b/note.txt @@ -6604,6 +6604,112 @@ bash_tips Done (実装ログ) ------------------------------------------------------------------------------- +2022-09-25 + + * github: GitHub#227 CI tests in macOS/Windows (reported by aiotter) [#D1881] + https://github.com/akinomyoga/ble.sh/pull/227 + + * macOS の sleep の実装は此処にある。特に変な事をしている訳でもない。 + https://github.com/apple-oss-distributions/shell_cmds/blob/main/sleep/sleep.c + + * macOS の CI ではテストをスキップする様にする方法について模索する。 + + https://docs.github.com/ja/actions/learn-github-actions/environment-variables + + の環境変数 RUNNER_OS を参照すれば良い。github workflow の中で動いている事 + を確認する為に、CI == true 及び GITHUB_ACTION についても確認する。 + + * macOS で問題が発生した時に誰が解決するのかという問題が生じる気がする。修 + 正に時間が掛かるし実際の macOS で試してみないと分からない事もある。誰かを + 頼ろうと思っても誰に頼んだら良いのかも分からないしすぐ応答してくれるとは + 限らない。 + + その間 nightly が全くビルドされなくなる。という事を考えるとやはり nightly + と macOS のテストは少なくとも分離するべきである。 + + また直る迄の間ずっと X がならぶ事になる。直す事ができないとずっと失敗する + という事になる。うーん。一方で時々確認しておくという事はしたい気もする。 + 確認する時だけ macOS をテストに含めるという使い方もあるだろうか?? + + * PR に対しては一応実行しておきたい気はする。と言っても minor change や + rebase に対して毎回テストを実行していたら clone stat が大変な事になる。テ + ストを実行する時に approve をする機能があるみたいなので、それをどの様にす + れば良いか確認する。 + + * msys のテストで色々失敗している。GraphemeCluster に関して調べてみようとし + たが、そもそも msys2 bash の上では $'\U1F6D1' が1文字ではなく2文字とカウ + ントされる様である。 + + $ a=$'\U1F6D1' + $ echo "${#a}" + 2 + + 確認してみた所、実は Cygwin でも同様の問題があるという事が判明した。うー + ん。これは Bash の側で修正するべき事の気がする。或いは Cygwin の側で修正 + するべき事の可能性もある。後で調べる事にする。 + + うーん subst.c:8046 の MB_STRLEN を呼び出している。そしてこの MB_STRLEN + は include/shmbutil.h で + + #define MBSLEN(s) (((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0) + #define MB_STRLEN(s) ((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s)) + + の様にして定義されている。mbstrlen は lib/sh/shmbchar.c で定義されている + 関数である。中では mbrlen を呼び出している。これは標準ライブラリから来て + いる。 + + ------------------------------------------------------------------------- + + cygwin の上で mbrlen を呼び出すコードで再現した。newlib-cygwin の + newlib/libc/stdlib/mbrlen.c (mbrlen) は単に mbrtowc を呼び出している。 + newlib/libc/stdlib/mbrtowc.c (mbrtowc) は wchar_t に変換する関数である。 + 一方で windows では wchar_t は 16bit である。なので surrogate を読み取る + しかないという事。うーん。この状況だとどのように直すのが正しいのか不明である。 + + * mbrlen の実装を弄って mbrtowc ではなくて Unicode code point を読み取っ + た時のバイト数を返す様に変更すると、今度は他の箇所で mbrtowc との不整合 + が問題になる可能性もある。 + + そもそも bash ですら途中で wchar_t に変換して行う処理があった筈なので問 + 題が起こる可能性が高い。 + + * 或いは mbrtowc に変わる mbrtoc32 的な物があれば良いのだが。然し + char32_t は C++ の物だし、もし一連の関数を提供するとしても bash の側で + も大幅な変更と検証が必要になるので色々難しい気がする。 + + だとすると ble.sh の側で上手に surrogate も処理できる様にする? 然し、 + UTF-8 で表されている文字列を bash でちゃんと切り取る事ができるのかも謎で + ある。切り取る事ができたとしてもちゃんと処理できるのだろうか。。。 + + ------------------------------------------------------------------------- + + 一つの手は Grapheme Cluster の判定に Surrogate pair も考慮に入れるという + 事。或いは既に考慮に入っている? →確認してみたが考慮には入っていない様だ。 + + GraphemeClusterTable を確認すると surrogate pair U*D800..U+DFFF は全て 0 + になっている。つまり通常文字として取り扱われている。これを取り敢えず新し + いカテゴリとして登録する事にする。 + + 取り敢えず実装したが、実際に Cygwin の上で動かすと動かない。どうも + surrogate pair の後半に対して ble/util/s2c を実行しようとしても常に 0 に + なってしまう。これは printf '%s' に渡す前に文字を切断する必要があるが、文 + 字を切断する時点で空文字列になってしまうから? 調べてみると ${s:i} で切断 + した時点で 4byte 中の 3byte が処理された状態になっていて UTF-8 の最後の文 + 字が切り出されるという事態になっている。 + + ------------------------------------------------------------------------- + + うーん。また変な振る舞いを見つけてしまった。これは bash の側で修正した。 + + s=$'\U1F6D1' + printf '%d ' "'$s" "'$s" "'$s" + printf '%d ' "'$s" "'x" "'$s" "'x" "'$s" + echo + + https://lists.gnu.org/archive/html/bug-bash/2022-09/msg00055.html + + 他にも色々振る舞いについて修正などが必要だったが取り敢えず通る様になった。 + 2022-09-16 * canvas: Unicode version 更新 [#D1880] diff --git a/src/canvas.GraphemeClusterBreak.sh b/src/canvas.GraphemeClusterBreak.sh index 9b612517..f1a21b06 100644 --- a/src/canvas.GraphemeClusterBreak.sh +++ b/src/canvas.GraphemeClusterBreak.sh @@ -1,5 +1,7 @@ -_ble_unicode_GraphemeClusterBreak_Count=13 +_ble_unicode_GraphemeClusterBreak_Count=15 _ble_unicode_GraphemeClusterBreak_ZWJ=2 +_ble_unicode_GraphemeClusterBreak_LowSurrogate=14 +_ble_unicode_GraphemeClusterBreak_HighSurrogate=13 _ble_unicode_GraphemeClusterBreak_Regional_Indicator=6 _ble_unicode_GraphemeClusterBreak_Prepend=3 _ble_unicode_GraphemeClusterBreak_SpacingMark=5 @@ -84,7 +86,7 @@ _ble_unicode_GraphemeClusterBreak=( [119172]=0 [121461]=4 [121476]=4 [121504]=0 [122887]=0 [122905]=0 [122906]=0 [122914]=0 [122915]=4 [122916]=4 [122917]=0 [123023]=4 [123566]=4 [127279]=12 [127358]=12 [127359]=12 [127374]=12 [127375]=0 [127376]=0 [127488]=0 [127514]=12 [127535]=12 [127536]=0 [127537]=0 [127547]=0 [129339]=0 [129350]=0 - # Grapheme_Cluster_Break ranges (370 ranges) + # Grapheme_Cluster_Break ranges (373 ranges) [0]=1 [32]=0 [127]=1 [160]=0 [768]=4 [880]=0 [1155]=4 [1162]=0 [1425]=4 [1480]=0 [1536]=3 [1542]=0 [1552]=4 [1565]=0 [1611]=4 [1632]=0 [1750]=4 [1774]=0 [1840]=4 [1867]=0 [1958]=4 [1969]=0 [2027]=4 [2036]=0 [2070]=4 [2094]=0 [2137]=4 [2140]=0 [2200]=4 [2208]=0 [2250]=4 [2308]=0 [2366]=5 [2369]=4 [2377]=5 [2385]=4 [2392]=0 [2497]=4 [2510]=0 [2622]=5 [2627]=0 [2635]=4 [2638]=0 [2750]=5 [2753]=4 [2766]=0 [2810]=4 [2820]=0 @@ -96,19 +98,19 @@ _ble_unicode_GraphemeClusterBreak=( [7680]=0 [8232]=1 [8239]=0 [8288]=1 [8304]=0 [8400]=4 [8433]=0 [8596]=12 [8602]=0 [9193]=12 [9204]=0 [9208]=12 [9211]=0 [9723]=12 [9862]=0 [9872]=12 [10007]=0 [10067]=12 [10072]=0 [10083]=12 [10088]=0 [10133]=12 [10136]=0 [11013]=12 [11016]=0 [11503]=4 [11506]=0 [11744]=4 [11776]=0 [12330]=4 [12337]=0 [42607]=4 [42622]=0 [43188]=5 [43206]=0 [43232]=4 [43250]=0 [43302]=4 [43310]=0 [43335]=4 [43348]=0 [43360]=7 [43389]=0 [43392]=4 [43396]=0 [43446]=4 [43454]=5 [43457]=0 - [43561]=4 [43575]=0 [43698]=4 [43705]=0 [44033]=11 [55204]=0 [55216]=8 [55239]=0 [55243]=9 [55292]=0 [65024]=4 [65040]=0 [65056]=4 [65072]=0 [65520]=1 [65532]=0 - [66422]=4 [66427]=0 [68097]=4 [68103]=0 [68108]=4 [68112]=0 [68152]=4 [68155]=0 [68900]=4 [68904]=0 [69373]=4 [69376]=0 [69446]=4 [69457]=0 [69506]=4 [69510]=0 - [69688]=4 [69703]=0 [69759]=4 [69763]=0 [69808]=5 [69811]=4 [69822]=0 [69888]=4 [69891]=0 [69927]=4 [69941]=0 [70067]=5 [70070]=4 [70084]=0 [70089]=4 [70096]=0 - [70188]=5 [70191]=4 [70200]=0 [70368]=5 [70371]=4 [70379]=0 [70465]=5 [70478]=0 [70502]=4 [70509]=0 [70512]=4 [70517]=0 [70709]=5 [70712]=4 [70727]=0 [70835]=4 - [70852]=0 [71090]=4 [71096]=5 [71105]=0 [71216]=5 [71219]=4 [71233]=0 [71344]=4 [71352]=0 [71453]=4 [71468]=0 [71724]=5 [71727]=4 [71739]=0 [71985]=5 [72004]=0 - [72145]=5 [72148]=4 [72156]=5 [72161]=0 [72193]=4 [72203]=0 [72243]=4 [72255]=0 [72273]=4 [72284]=0 [72324]=3 [72330]=4 [72346]=0 [72752]=4 [72768]=0 [72850]=4 - [72887]=0 [73009]=4 [73015]=0 [73023]=4 [73032]=0 [73098]=5 [73112]=0 [73526]=4 [73531]=0 [78896]=1 [78913]=0 [78919]=4 [78934]=0 [92912]=4 [92917]=0 [92976]=4 - [92983]=0 [94033]=5 [94088]=0 [94095]=4 [94099]=0 [113824]=1 [113828]=0 [118528]=4 [118599]=0 [119143]=4 [119146]=0 [119150]=4 [119155]=1 [119163]=4 [119180]=0 [119210]=4 - [119214]=0 [119362]=4 [119365]=0 [121344]=4 [121399]=0 [121403]=4 [121453]=0 [121499]=4 [121520]=0 [122880]=4 [122923]=0 [123184]=4 [123191]=0 [123628]=4 [123632]=0 [124140]=4 - [124144]=0 [125136]=4 [125143]=0 [125252]=4 [125259]=0 [126976]=12 [127232]=0 [127245]=12 [127248]=0 [127340]=12 [127346]=0 [127377]=12 [127387]=0 [127405]=12 [127462]=6 [127489]=12 - [127504]=0 [127538]=12 [127552]=0 [127561]=12 [127995]=4 [128000]=12 [128318]=0 [128326]=12 [128592]=0 [128640]=12 [128768]=0 [128884]=12 [128896]=0 [128981]=12 [129024]=0 [129036]=12 - [129040]=0 [129096]=12 [129104]=0 [129114]=12 [129120]=0 [129160]=12 [129168]=0 [129198]=12 [129280]=0 [129292]=12 [129792]=0 [130048]=12 [131070]=0 [917504]=1 [917536]=4 [917632]=1 - [917760]=4 [918000]=1 + [43561]=4 [43575]=0 [43698]=4 [43705]=0 [44033]=11 [55204]=0 [55216]=8 [55239]=0 [55243]=9 [55292]=0 [55296]=13 [56320]=14 [57344]=0 [65024]=4 [65040]=0 [65056]=4 + [65072]=0 [65520]=1 [65532]=0 [66422]=4 [66427]=0 [68097]=4 [68103]=0 [68108]=4 [68112]=0 [68152]=4 [68155]=0 [68900]=4 [68904]=0 [69373]=4 [69376]=0 [69446]=4 + [69457]=0 [69506]=4 [69510]=0 [69688]=4 [69703]=0 [69759]=4 [69763]=0 [69808]=5 [69811]=4 [69822]=0 [69888]=4 [69891]=0 [69927]=4 [69941]=0 [70067]=5 [70070]=4 + [70084]=0 [70089]=4 [70096]=0 [70188]=5 [70191]=4 [70200]=0 [70368]=5 [70371]=4 [70379]=0 [70465]=5 [70478]=0 [70502]=4 [70509]=0 [70512]=4 [70517]=0 [70709]=5 + [70712]=4 [70727]=0 [70835]=4 [70852]=0 [71090]=4 [71096]=5 [71105]=0 [71216]=5 [71219]=4 [71233]=0 [71344]=4 [71352]=0 [71453]=4 [71468]=0 [71724]=5 [71727]=4 + [71739]=0 [71985]=5 [72004]=0 [72145]=5 [72148]=4 [72156]=5 [72161]=0 [72193]=4 [72203]=0 [72243]=4 [72255]=0 [72273]=4 [72284]=0 [72324]=3 [72330]=4 [72346]=0 + [72752]=4 [72768]=0 [72850]=4 [72887]=0 [73009]=4 [73015]=0 [73023]=4 [73032]=0 [73098]=5 [73112]=0 [73526]=4 [73531]=0 [78896]=1 [78913]=0 [78919]=4 [78934]=0 + [92912]=4 [92917]=0 [92976]=4 [92983]=0 [94033]=5 [94088]=0 [94095]=4 [94099]=0 [113824]=1 [113828]=0 [118528]=4 [118599]=0 [119143]=4 [119146]=0 [119150]=4 [119155]=1 + [119163]=4 [119180]=0 [119210]=4 [119214]=0 [119362]=4 [119365]=0 [121344]=4 [121399]=0 [121403]=4 [121453]=0 [121499]=4 [121520]=0 [122880]=4 [122923]=0 [123184]=4 [123191]=0 + [123628]=4 [123632]=0 [124140]=4 [124144]=0 [125136]=4 [125143]=0 [125252]=4 [125259]=0 [126976]=12 [127232]=0 [127245]=12 [127248]=0 [127340]=12 [127346]=0 [127377]=12 [127387]=0 + [127405]=12 [127462]=6 [127489]=12 [127504]=0 [127538]=12 [127552]=0 [127561]=12 [127995]=4 [128000]=12 [128318]=0 [128326]=12 [128592]=0 [128640]=12 [128768]=0 [128884]=12 [128896]=0 + [128981]=12 [129024]=0 [129036]=12 [129040]=0 [129096]=12 [129104]=0 [129114]=12 [129120]=0 [129160]=12 [129168]=0 [129198]=12 [129280]=0 [129292]=12 [129792]=0 [130048]=12 [131070]=0 + [917504]=1 [917536]=4 [917632]=1 [917760]=4 [918000]=1 ) _ble_unicode_GraphemeClusterBreak_ranges=( 0 32 127 160 768 880 1155 1162 1425 1480 1536 1542 1552 1565 1611 1632 1750 1774 1840 1867 1958 1969 2027 2036 2070 2094 2137 2140 2200 2208 2250 2308 @@ -116,26 +118,28 @@ _ble_unicode_GraphemeClusterBreak_ranges=( 3544 3552 3636 3643 3655 3663 3764 3773 3784 3791 3953 3976 3981 4029 4141 4159 4190 4193 4209 4213 4352 4448 4520 4608 4957 4960 5906 5910 6071 6078 6089 6100 6155 6160 6432 6435 6444 6451 6457 6460 6744 6765 6771 6784 6832 6863 6912 6917 6964 6973 6981 7019 7028 7074 7086 7146 7151 7156 7204 7212 7224 7376 7401 7616 7680 8232 8239 8288 8304 8400 8433 8596 8602 9193 9204 9208 9211 9723 9862 9872 10007 10067 10072 10083 10088 10133 10136 11013 11016 11503 11506 11744 11776 12330 12337 42607 - 42622 43188 43206 43232 43250 43302 43310 43335 43348 43360 43389 43392 43396 43446 43454 43457 43561 43575 43698 43705 44033 55204 55216 55239 55243 55292 65024 65040 65056 65072 65520 65532 - 66422 66427 68097 68103 68108 68112 68152 68155 68900 68904 69373 69376 69446 69457 69506 69510 69688 69703 69759 69763 69808 69811 69822 69888 69891 69927 69941 70067 70070 70084 70089 70096 - 70188 70191 70200 70368 70371 70379 70465 70478 70502 70509 70512 70517 70709 70712 70727 70835 70852 71090 71096 71105 71216 71219 71233 71344 71352 71453 71468 71724 71727 71739 71985 72004 - 72145 72148 72156 72161 72193 72203 72243 72255 72273 72284 72324 72330 72346 72752 72768 72850 72887 73009 73015 73023 73032 73098 73112 73526 73531 78896 78913 78919 78934 92912 92917 92976 - 92983 94033 94088 94095 94099 113824 113828 118528 118599 119143 119146 119150 119155 119163 119180 119210 119214 119362 119365 121344 121399 121403 121453 121499 121520 122880 122923 123184 123191 123628 123632 124140 - 124144 125136 125143 125252 125259 126976 127232 127245 127248 127340 127346 127377 127387 127405 127462 127489 127504 127538 127552 127561 127995 128000 128318 128326 128592 128640 128768 128884 128896 128981 129024 129036 - 129040 129096 129104 129114 129120 129160 129168 129198 129280 129292 129792 130048 131070 917504 917536 917632 917760 918000 921600 + 42622 43188 43206 43232 43250 43302 43310 43335 43348 43360 43389 43392 43396 43446 43454 43457 43561 43575 43698 43705 44033 55204 55216 55239 55243 55292 55296 56320 57344 65024 65040 65056 + 65072 65520 65532 66422 66427 68097 68103 68108 68112 68152 68155 68900 68904 69373 69376 69446 69457 69506 69510 69688 69703 69759 69763 69808 69811 69822 69888 69891 69927 69941 70067 70070 + 70084 70089 70096 70188 70191 70200 70368 70371 70379 70465 70478 70502 70509 70512 70517 70709 70712 70727 70835 70852 71090 71096 71105 71216 71219 71233 71344 71352 71453 71468 71724 71727 + 71739 71985 72004 72145 72148 72156 72161 72193 72203 72243 72255 72273 72284 72324 72330 72346 72752 72768 72850 72887 73009 73015 73023 73032 73098 73112 73526 73531 78896 78913 78919 78934 + 92912 92917 92976 92983 94033 94088 94095 94099 113824 113828 118528 118599 119143 119146 119150 119155 119163 119180 119210 119214 119362 119365 121344 121399 121403 121453 121499 121520 122880 122923 123184 123191 + 123628 123632 124140 124144 125136 125143 125252 125259 126976 127232 127245 127248 127340 127346 127377 127387 127405 127462 127489 127504 127538 127552 127561 127995 128000 128318 128326 128592 128640 128768 128884 128896 + 128981 129024 129036 129040 129096 129104 129114 129120 129160 129168 129198 129280 129292 129792 130048 131070 917504 917536 917632 917760 918000 921600 ) _ble_unicode_GraphemeClusterBreak_rule=( - 0 0 1 0 1 2 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 1 0 1 2 0 0 0 0 0 0 3 - 2 0 1 2 1 2 2 2 2 2 2 2 2 - 0 0 1 0 1 2 0 0 0 0 0 0 0 - 0 0 1 0 1 2 0 0 0 0 0 0 0 - 0 0 1 0 1 2 4 0 0 0 0 0 0 - 0 0 1 0 1 2 0 1 1 0 1 1 0 - 0 0 1 0 1 2 0 0 1 1 0 0 0 - 0 0 1 0 1 2 0 0 0 1 0 0 0 - 0 0 1 0 1 2 0 0 1 1 0 0 0 - 0 0 1 0 1 2 0 0 0 1 0 0 0 - 0 0 1 0 1 2 0 0 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 0 0 0 3 0 0 + 2 0 1 2 1 2 2 2 2 2 2 2 2 2 2 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 0 + 0 0 1 0 1 2 4 0 0 0 0 0 0 0 0 + 0 0 1 0 1 2 0 1 1 0 1 1 0 0 0 + 0 0 1 0 1 2 0 0 1 1 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 1 0 0 0 0 0 + 0 0 1 0 1 2 0 0 1 1 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 1 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 0 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 5 + 0 0 1 0 1 2 0 0 0 0 0 0 0 0 0 ) diff --git a/src/canvas.sh b/src/canvas.sh index f84bfabe..6e11ce04 100644 --- a/src/canvas.sh +++ b/src/canvas.sh @@ -543,6 +543,180 @@ function ble/unicode/GraphemeCluster/c2break { return 0 } +_ble_unicode_GraphemeCluster_bomlen=1 +_ble_unicode_GraphemeCluster_ucs4len=1 +function ble/unicode/GraphemeCluster/s2break/.initialize { + local LC_ALL=C.UTF-8 + builtin eval "local v1=\$'\\uFE0F' v2=\$'\\U1F6D1'" + _ble_unicode_GraphemeCluster_bomlen=${#v1} + _ble_unicode_GraphemeCluster_ucs4len=${#v2} + ble/util/unlocal LC_ALL + builtin unset -f "$FUNCNAME" +} 2>/dev/null # suppress locale error #D1440 +ble/unicode/GraphemeCluster/s2break/.initialize + +## @fn ble/unicode/GraphemeCluster/s2break/.combine-surrogate code1 code2 str +## @var[out] c +function ble/unicode/GraphemeCluster/s2break/.combine-surrogate { + local code1=$1 code2=$2 s=$3 + if ((0xDC00<=code2&&code2<=0xDFFF)); then + ((c=0x10000+(code1-0xD800)*1024+(code2&0x3FF))) + else + local ret + ble/util/s2bytes "$s" + ble/encoding:UTF-8/b2c "${ret[@]}" + c=$ret + fi +} +## @fn ble/unicode/GraphemeCluster/s2break/.wa-bash43bug-uFFFF code +## (#D1881) Bash 4.3, 4.4 [sizeof(wchar_t) == 2] で $'\uE000'.. $'\uFFFF' が +## 壊れたサロゲートになるバグに対する対策。この時、前半サロゲートは不正な値 +## U+D7F8..D7FF になるが、これはハングル字母などと被る。U+D7F8..D7FF の時は、 +## 次の文字が後半サロゲートの時に限り前半サロゲートとして取り扱う。 +## +## @param[in] code +## 壊れた前半サロゲータの可能性がある文字コード +## @var[in,out] ret +## 調整前後の GraphemeClusterBreak 値 +## @exit +## 調整が行われた時に成功です (0)。それ以外の時は失敗 (1) です。 +## +if ((_ble_unicode_GraphemeCluster_bomlen==2&&40300<=_ble_bash&&_ble_bash<50000)); then + function ble/unicode/GraphemeCluster/s2break/.wa-bash43bug-uFFFF { + local code=$1 + ((0xD7F8<=code&&code<0xD800)) && ble/util/is-unicode-output && + ret=$_ble_unicode_GraphemeClusterBreak_HighSurrogate + } +else + function ble/unicode/GraphemeCluster/s2break/.wa-bash43bug-uFFFF { ((0)); } +fi +## @fn ble/unicode/GraphemeCluster/s2break/.wa-cygwin-LSG code +## (#D1881) Cygwin では UCS-2 に入らないコードポイントの後半サロゲートをs2cで +## 取ろうとしても 0 になってしまう (Bash 5.0 以降では 4-byte UTF-8 の最後のバ +## イト値) ので、後半について code == 0 の場合も前半サロゲートをチェックする。 +## +## @param[in] code +## UCS-4 の後半サロゲートの可能性がある文字コード +## @var[in,out] ret +## 調整前後の GraphemeClusterBreak 値 +## @exit +## 調整が行われた時に成功です (0)。それ以外の時は失敗 (1) です。 +## +if ((_ble_unicode_GraphemeCluster_ucs4len==2)); then + if ((_ble_bash<50000)); then + function ble/unicode/GraphemeCluster/s2break/.wa-cygwin-LSG { + local code=$1 + ((code==0)) && ble/util/is-unicode-output && + ret=$_ble_unicode_GraphemeClusterBreak_LowSurrogate + } + else + function ble/unicode/GraphemeCluster/s2break/.wa-cygwin-LSG { + local code=$1 + ((0x80<=code&&code<0xC0)) && ble/util/is-unicode-output && + ret=$_ble_unicode_GraphemeClusterBreak_LowSurrogate + } + fi +else + function ble/unicode/GraphemeCluster/s2break/.wa-cygwin-LSG { ((0)); } +fi + +## @fn ble/unicode/GraphemeCluster/s2break-left str index [opts] +## @fn ble/unicode/GraphemeCluster/s2break-right str index [opts] +## 指定した文字列の指定した境界の左右の code point の GraphemeCulsterBreak 値 +## を求めます。単に bash の文字単位ではなく、サロゲートペアも考慮に入れたコー +## ドポイント単位で処理を行います。 +## +## @param str +## @param index +## @param[opt] opts +## @var[out] ret +## GraphemeCulsterBreak 値を返します。 +## @var[out,opt] shift +## opts に shift が指定された時に対象の code point の文字数を返します。 +## surrogate pair の時に 2 になります。それ以外の時は 1 です。 +## @var[out,opt] code +## opts に code が指定された時に対象の code point を返します。 +## +## * Note2 (#D1881): ${s:i-1:2} 等として 2 文字切り出すのは、Cygwin では +## ${s:i-1:1} として最初の文字を切り出そうとすると UCS-2 に入らない code +## point の文字が破壊されてしまって surrogate 前半すら取り出せなくなる為。少 +## なくとも wchar_t*2 の分だけ渡せば printf %d '$1 で surrogate 前半の code +## point を取り出す事ができる。 +function ble/unicode/GraphemeCluster/s2break-left { + ret=0 + local s=$1 N=${#1} i=$2 opts=$3 sh=1 + ((i>0)) && ble/util/s2c "${s:i-1:2}"; local c=$ret code2=$ret # Note2 (上述) + ble/unicode/GraphemeCluster/c2break "$code2"; local break=$ret + + # process surrogate pairs + ((i-1=0&&ret==_ble_unicode_GraphemeClusterBreak_LowSurrogate)); then + ble/util/s2c "${s:i-2:2}"; local code1=$ret # Note2 (上述) + ble/unicode/GraphemeCluster/c2break "$code1" + ble/unicode/GraphemeCluster/s2break/.wa-bash43bug-uFFFF "$code1" + if ((ret==_ble_unicode_GraphemeClusterBreak_HighSurrogate)); then + ble/unicode/GraphemeCluster/s2break/.combine-surrogate "$code1" "$code2" "${s:i-2:2}" + ble/unicode/GraphemeCluster/c2break "$c" + break=$ret + sh=2 + fi + elif ((i0;j--)); do - ble/util/s2c "${text:j-1:1}" - ble/unicode/GraphemeCluster/c2break "$ret" + local j=$((i-1)) shift=1 + for ((j=i-1;j>0;j-=shift)); do + ble/unicode/GraphemeCluster/s2break-left "$text" "$j" shift ((ret==_ble_unicode_GraphemeClusterBreak_Extend)) || break done @@ -568,49 +741,50 @@ function ble/unicode/GraphemeCluster/find-previous-boundary/.ZWJ { else # Pictographic | Extend* ZWJ | Pictographic # ^--- j ^--- i - ((i=j-1,b1=ret)) + ((i=j-shift,b1=ret)) return 1 fi } ## @fn ble/unicode/GraphemeCluster/find-previous-boundary/.RI -## @var[in] text i +## @var[in] text i shift ## @var[out] ret function ble/unicode/GraphemeCluster/find-previous-boundary/.RI { if [[ :$bleopt_emoji_opts: != *:ri:* ]]; then ((ret=i)) return 0 fi - - local j=$((i-1)) - for ((j=i-1;j>0;j--)); do - ble/util/s2c "${text:j-1:1}" - ble/unicode/GraphemeCluster/c2break "$ret" + local j1=$((i-shift)) + local j shift=1 countRI=1 + for ((j=j1;j>0;j-=shift,countRI++)); do + ble/unicode/GraphemeCluster/s2break-left "$text" "$j" shift ((ret==_ble_unicode_GraphemeClusterBreak_Regional_Indicator)) || break done - if ((i-j==1)); then + if ((j==j1)); then ((i=j,b1=_ble_unicode_GraphemeClusterBreak_Regional_Indicator)) return 1 else - ((ret=(i-j)%2==1?i-1:i)) + ((ret=countRI%2==1?j1:i)) return 0 fi } function ble/unicode/GraphemeCluster/find-previous-boundary { - local text=$1 i=$2 + local text=$1 i=$2 shift if [[ $bleopt_grapheme_cluster ]] && ((i&&--i)); then - ble/util/s2c "${text:i:1}" - ble/unicode/GraphemeCluster/c2break "$ret"; local b1=$ret + ble/unicode/GraphemeCluster/s2break-right "$text" "$i" shift; local b1=$ret while ((i>0)); do local b2=$b1 - ble/util/s2c "${text:i-1:1}" - ble/unicode/GraphemeCluster/c2break "$ret"; local b1=$ret + ble/unicode/GraphemeCluster/s2break-left "$text" "$i" shift; local b1=$ret case ${_ble_unicode_GraphemeClusterBreak_rule[b1*_ble_unicode_GraphemeClusterBreak_Count+b2]} in (0) break ;; - (1) ((i--)) ;; - (2) [[ $bleopt_grapheme_cluster != extended ]] && break; ((i--)) ;; + (1) ((i-=shift)) ;; + (2) [[ $bleopt_grapheme_cluster != extended ]] && break; ((i-=shift)) ;; (3) ble/unicode/GraphemeCluster/find-previous-boundary/.ZWJ && return 0 ;; (4) ble/unicode/GraphemeCluster/find-previous-boundary/.RI && return 0 ;; + (5) + # surrogate pair の間にいた時は GraphemeClusterBreak を取得し直す + ((i-=shift)) + ble/unicode/GraphemeCluster/s2break-right "$text" "$i"; b1=$ret ;; esac done fi @@ -628,16 +802,16 @@ _ble_unicode_GraphemeClusterBreak_isCore[_ble_unicode_GraphemeClusterBreak_T]=1 _ble_unicode_GraphemeClusterBreak_isCore[_ble_unicode_GraphemeClusterBreak_LV]=1 _ble_unicode_GraphemeClusterBreak_isCore[_ble_unicode_GraphemeClusterBreak_LVT]=1 _ble_unicode_GraphemeClusterBreak_isCore[_ble_unicode_GraphemeClusterBreak_Pictographic]=1 +_ble_unicode_GraphemeClusterBreak_isCore[_ble_unicode_GraphemeClusterBreak_HighSurrogate]=1 ## @fn ble/unicode/GraphemeCluster/extend-ascii text i ## @var[out] extend function ble/unicode/GraphemeCluster/extend-ascii { extend=0 [[ $_ble_util_locale_encoding != UTF-8 || ! $bleopt_grapheme_cluster ]] && return 1 - local text=$1 iN=${#1} i=$2 ret - for ((;i=iN)); then c=0 w=0 cs= cb= extend=0 return 1 - elif [[ $_ble_util_locale_encoding != UTF-8 || ! $bleopt_grapheme_cluster ]]; then + elif ! ble/util/is-unicode-output || [[ ! $bleopt_grapheme_cluster ]]; then cs=${text:i:1} ble/util/s2c "$cs"; c=$ret if [[ $flags != *R* ]] && { @@ -694,9 +868,8 @@ function ble/unicode/GraphemeCluster/match { return 0 fi - local b0 b1 b2 c0 c2 - ble/util/s2c "${text:i:1}"; c0=$ret - ble/unicode/GraphemeCluster/c2break "$c0"; b0=$ret + local b0 b1 b2 c0 c2 shift code + ble/unicode/GraphemeCluster/s2break-right "$text" "$i" code:shift; c0=$code b0=$ret local coreb= corec= npre=0 vs= ri= c2=$c0 b2=$b0 @@ -711,10 +884,9 @@ function ble/unicode/GraphemeCluster/match { vs=epvs fi - ((j++)) + ((j+=shift)) b1=$b2 - ble/util/s2c "${text:j:1}"; c2=$ret - ble/unicode/GraphemeCluster/c2break "$c2"; b2=$ret + ble/unicode/GraphemeCluster/s2break-right "$text" "$j" code:shift; c2=$code b2=$ret case ${_ble_unicode_GraphemeClusterBreak_rule[b1*_ble_unicode_GraphemeClusterBreak_Count+b2]} in (0) break ;; (1) continue ;; @@ -722,6 +894,9 @@ function ble/unicode/GraphemeCluster/match { (3) [[ :$bleopt_emoji_opts: == *:zwj:* ]] && ((coreb==_ble_unicode_GraphemeClusterBreak_Pictographic)) || break ;; (4) [[ :$bleopt_emoji_opts: == *:ri:* && ! $ri ]] || break; ri=1 ;; + (5) + # surrogate pair の間にいた時は GraphemeClusterBreak を取得し直す + ble/unicode/GraphemeCluster/s2break-left "$text" "$((j+shift))" code; c2=$code b2=$ret ;; esac done @@ -733,8 +908,9 @@ function ble/unicode/GraphemeCluster/match { ble/util/c2s "$corec" cs=${text:i:npre}$ret${text:i+npre:j-i-npre} else - ble/util/s2c "$cs"; c=$ret corec=$ret - ble/unicode/GraphemeCluster/c2break "$c"; cb=$ret + local code + ble/unicode/GraphemeCluster/s2break-right "$cs" 0 code + c=$code corec=$code cb=$ret fi fi