From 2e1a7c17b598b76777040f1f35715156ce227684 Mon Sep 17 00:00:00 2001 From: Koichi Murase Date: Sat, 24 Sep 2022 11:25:35 +0900 Subject: [PATCH] encoding: add minor fixes and workarounds * util (ble/encoding:UTF-8/b2c): fix interpretation of leading byte * util (ble/util/s2c): work around intermediate mbstate of bash <= 5.2 * util (ble/util/s2bytes): clear locale cache --- src/util.sh | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/util.sh b/src/util.sh index 43a34be8..c462f652 100644 --- a/src/util.sh +++ b/src/util.sh @@ -6208,11 +6208,22 @@ _ble_util_s2c_table_enabled= ## @param[in] text ## @param[in,opt] index ## @var[out] ret -if ((_ble_bash>=40100)); then - # - printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる) +if ((_ble_bash>=50300)); then + # printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる) function ble/util/s2c { builtin printf -v ret %d "'$1" } +elif ((_ble_bash>=40100)); then + function ble/util/s2c { + # Note #D1881: bash-5.2 以前では printf %d "'x" に対して mbstate_t 状態が + # 残ってしまう。なので一旦 clear を試みる。 + if ble/util/is-unicode-output; then + builtin printf -v ret %d "'μ" + else + builtin printf -v ret %d "'x" + fi + builtin printf -v ret %d "'$1" + } elif ((_ble_bash>=40000&&!_ble_bash_loaded_in_function)); then # - 連想配列にキャッシュできる # - printf "'c" で unicode が読める @@ -6272,7 +6283,7 @@ if ((_ble_bash>=40200)); then # workarounds of bashbug that printf '\uFFFF' results in a broken surrogate # pair in systems where sizeof(wchar_t) == 2. function ble/util/.has-bashbug-printf-uffff { - ((40200<=_ble_bash&&_ble_bash<40500)) || return 1 + ((40200<=_ble_bash&&_ble_bash<50000)) || return 1 local LC_ALL=C.UTF-8 2>/dev/null # Workaround: CentOS 7 に C.UTF-8 がなかった local ret builtin printf -v ret '\uFFFF' @@ -6426,6 +6437,12 @@ function ble/util/.cache/update-locale { fi } +function ble/util/is-unicode-output { + [[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] && + ble/util/.cache/update-locale + [[ $_ble_util_locale_encoding == UTF-8 ]] +} + #------------------------------------------------------------------------------ ## @fn ble/util/s2chars text @@ -6441,9 +6458,11 @@ function ble/util/s2chars { } function ble/util/s2bytes { local LC_ALL= LC_CTYPE=C - ble/util/s2chars "$1" -} -ble/function#suppress-stderr ble/util/s2bytes + ble/util/s2chars "$1"; local ext=$? + ble/util/unlocal LC_ALL LC_CTYPE + ble/util/.cache/update-locale + return "$?" +} >/dev/null # bind で使用される keyseq の形式 @@ -6548,9 +6567,9 @@ function ble/encoding:UTF-8/b2c { bytes=("$@") ret=0 ((b0=bytes[0]&0xFF)) - ((n=b0>0xF0 - ?(b0>0xFC?5:(b0>0xF8?4:3)) - :(b0>0xE0?2:(b0>0xC0?1:0)), + ((n=b0>=0xF0 + ?(b0>=0xFC?5:(b0>=0xF8?4:3)) + :(b0>=0xE0?2:(b0>=0xC0?1:0)), ret=n?b0&0x7F>>n:b0)) for ((i=1;i<=n;i++)); do ((ret=ret<<6|0x3F&bytes[i])) @@ -6591,9 +6610,3 @@ function ble/encoding:C/c2b { local code=$1 bytes=("$((code&0xFF))") } - -function ble/util/is-unicode-output { - [[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] && - ble/util/.cache/update-locale - [[ $_ble_util_locale_encoding == UTF-8 ]] -}