From 45f3df375950ac7366c15258684e98a01ca5de6a Mon Sep 17 00:00:00 2001 From: Koichi Murase Date: Sat, 24 Sep 2022 11:25:35 +0900 Subject: [PATCH] encoding: add minor fixes and workarounds * util (ble/encoding:UTF-8/b2c): fix interpretation of leading byte * util (ble/util/s2c): work around intermediate mbstate of bash <= 5.2 --- src/util.sh | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/util.sh b/src/util.sh index 705690ea..59a73417 100644 --- a/src/util.sh +++ b/src/util.sh @@ -3563,11 +3563,22 @@ _ble_util_s2c_table_enabled= ## @param[in] text ## @param[in,opt] index ## @var[out] ret -if ((_ble_bash>=40100)); then - # - printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる) +if ((_ble_bash>=50300)); then + # printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる) function ble/util/s2c { builtin printf -v ret '%d' "'${1:$2:1}" } +elif ((_ble_bash>=40100)); then + function ble/util/s2c { + # Note #D1881: bash-5.2 以前では printf %d "'x" に対して mbstate_t 状態が + # 残ってしまう。なので一旦 clear を試みる。 + if ble/util/is-unicode-output; then + builtin printf -v ret %d "'μ" + else + builtin printf -v ret %d "'x" + fi + builtin printf -v ret %d "'$1" + } elif ((_ble_bash>=40000&&!_ble_bash_loaded_in_function)); then # - 連想配列にキャッシュできる # - printf "'c" で unicode が読める @@ -3627,7 +3638,7 @@ if ((_ble_bash>=40200)); then # workarounds of bashbug that printf '\uFFFF' results in a broken surrogate # pair in systems where sizeof(wchar_t) == 2. function ble/util/.has-bashbug-printf-uffff { - ((40200<=_ble_bash&&_ble_bash<40500)) || return 1 + ((40200<=_ble_bash&&_ble_bash<50000)) || return 1 local LC_ALL=C.UTF-8 2>/dev/null # Workaround: CentOS 7 に C.UTF-8 がなかった local ret builtin printf -v ret '\uFFFF' @@ -3732,6 +3743,12 @@ function ble/util/.cache/update-locale { fi } +function ble/util/is-unicode-output { + [[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] && + ble/util/.cache/update-locale + [[ $_ble_util_locale_encoding == UTF-8 ]] +} + #------------------------------------------------------------------------------ ## 関数 ble/util/s2chars text @@ -3849,9 +3866,9 @@ function ble/encoding:UTF-8/b2c { bytes=("$@") ret=0 ((b0=bytes[0]&0xFF)) - ((n=b0>0xF0 - ?(b0>0xFC?5:(b0>0xF8?4:3)) - :(b0>0xE0?2:(b0>0xC0?1:0)), + ((n=b0>=0xF0 + ?(b0>=0xFC?5:(b0>=0xF8?4:3)) + :(b0>=0xE0?2:(b0>=0xC0?1:0)), ret=n?b0&0x7F>>n:b0)) for ((i=1;i<=n;i++)); do ((ret=ret<<6|0x3F&bytes[i])) @@ -3892,9 +3909,3 @@ function ble/encoding:C/c2b { local code=$1 bytes=($((code&0xFF))) } - -function ble/util/is-unicode-output { - [[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] && - ble/util/.cache/update-locale - [[ $_ble_util_locale_encoding == UTF-8 ]] -}