Skip to content

Commit

Permalink
encoding: add minor fixes and workarounds
Browse files Browse the repository at this point in the history
* util (ble/encoding:UTF-8/b2c): fix interpretation of leading byte
* util (ble/util/s2c): work around intermediate mbstate of bash <= 5.2
  • Loading branch information
akinomyoga committed Oct 2, 2022
1 parent e7c657c commit 45f3df3
Showing 1 changed file with 23 additions and 12 deletions.
35 changes: 23 additions & 12 deletions src/util.sh
Expand Up @@ -3563,11 +3563,22 @@ _ble_util_s2c_table_enabled=
## @param[in] text
## @param[in,opt] index
## @var[out] ret
if ((_ble_bash>=40100)); then
# - printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる)
if ((_ble_bash>=50300)); then
# printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる)
function ble/util/s2c {
builtin printf -v ret '%d' "'${1:$2:1}"
}
elif ((_ble_bash>=40100)); then
function ble/util/s2c {
# Note #D1881: bash-5.2 以前では printf %d "'x" に対して mbstate_t 状態が
# 残ってしまう。なので一旦 clear を試みる。
if ble/util/is-unicode-output; then
builtin printf -v ret %d ""
else
builtin printf -v ret %d "'x"
fi
builtin printf -v ret %d "'$1"
}
elif ((_ble_bash>=40000&&!_ble_bash_loaded_in_function)); then
# - 連想配列にキャッシュできる
# - printf "'c" で unicode が読める
Expand Down Expand Up @@ -3627,7 +3638,7 @@ if ((_ble_bash>=40200)); then
# workarounds of bashbug that printf '\uFFFF' results in a broken surrogate
# pair in systems where sizeof(wchar_t) == 2.
function ble/util/.has-bashbug-printf-uffff {
((40200<=_ble_bash&&_ble_bash<40500)) || return 1
((40200<=_ble_bash&&_ble_bash<50000)) || return 1
local LC_ALL=C.UTF-8 2>/dev/null # Workaround: CentOS 7 に C.UTF-8 がなかった
local ret
builtin printf -v ret '\uFFFF'
Expand Down Expand Up @@ -3732,6 +3743,12 @@ function ble/util/.cache/update-locale {
fi
}

function ble/util/is-unicode-output {
[[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] &&
ble/util/.cache/update-locale
[[ $_ble_util_locale_encoding == UTF-8 ]]
}

#------------------------------------------------------------------------------

## 関数 ble/util/s2chars text
Expand Down Expand Up @@ -3849,9 +3866,9 @@ function ble/encoding:UTF-8/b2c {
bytes=("$@")
ret=0
((b0=bytes[0]&0xFF))
((n=b0>0xF0
?(b0>0xFC?5:(b0>0xF8?4:3))
:(b0>0xE0?2:(b0>0xC0?1:0)),
((n=b0>=0xF0
?(b0>=0xFC?5:(b0>=0xF8?4:3))
:(b0>=0xE0?2:(b0>=0xC0?1:0)),
ret=n?b0&0x7F>>n:b0))
for ((i=1;i<=n;i++)); do
((ret=ret<<6|0x3F&bytes[i]))
Expand Down Expand Up @@ -3892,9 +3909,3 @@ function ble/encoding:C/c2b {
local code=$1
bytes=($((code&0xFF)))
}

function ble/util/is-unicode-output {
[[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] &&
ble/util/.cache/update-locale
[[ $_ble_util_locale_encoding == UTF-8 ]]
}

0 comments on commit 45f3df3

Please sign in to comment.