Skip to content

Commit

Permalink
encoding: add minor fixes and workarounds
Browse files Browse the repository at this point in the history
* util (ble/encoding:UTF-8/b2c): fix interpretation of leading byte
* util (ble/util/s2c): work around intermediate mbstate of bash <= 5.2
* util (ble/util/s2bytes): clear locale cache
  • Loading branch information
akinomyoga committed Sep 25, 2022
1 parent c5d1b82 commit 2e1a7c1
Showing 1 changed file with 28 additions and 15 deletions.
43 changes: 28 additions & 15 deletions src/util.sh
Expand Up @@ -6208,11 +6208,22 @@ _ble_util_s2c_table_enabled=
## @param[in] text
## @param[in,opt] index
## @var[out] ret
if ((_ble_bash>=40100)); then
# - printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる)
if ((_ble_bash>=50300)); then
# printf "'c" で Unicode が読める (どの LC_CTYPE でも Unicode になる)
function ble/util/s2c {
builtin printf -v ret %d "'$1"
}
elif ((_ble_bash>=40100)); then
function ble/util/s2c {
# Note #D1881: bash-5.2 以前では printf %d "'x" に対して mbstate_t 状態が
# 残ってしまう。なので一旦 clear を試みる。
if ble/util/is-unicode-output; then
builtin printf -v ret %d ""
else
builtin printf -v ret %d "'x"
fi
builtin printf -v ret %d "'$1"
}
elif ((_ble_bash>=40000&&!_ble_bash_loaded_in_function)); then
# - 連想配列にキャッシュできる
# - printf "'c" で unicode が読める
Expand Down Expand Up @@ -6272,7 +6283,7 @@ if ((_ble_bash>=40200)); then
# workarounds of bashbug that printf '\uFFFF' results in a broken surrogate
# pair in systems where sizeof(wchar_t) == 2.
function ble/util/.has-bashbug-printf-uffff {
((40200<=_ble_bash&&_ble_bash<40500)) || return 1
((40200<=_ble_bash&&_ble_bash<50000)) || return 1
local LC_ALL=C.UTF-8 2>/dev/null # Workaround: CentOS 7 に C.UTF-8 がなかった
local ret
builtin printf -v ret '\uFFFF'
Expand Down Expand Up @@ -6426,6 +6437,12 @@ function ble/util/.cache/update-locale {
fi
}

function ble/util/is-unicode-output {
[[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] &&
ble/util/.cache/update-locale
[[ $_ble_util_locale_encoding == UTF-8 ]]
}

#------------------------------------------------------------------------------

## @fn ble/util/s2chars text
Expand All @@ -6441,9 +6458,11 @@ function ble/util/s2chars {
}
function ble/util/s2bytes {
local LC_ALL= LC_CTYPE=C
ble/util/s2chars "$1"
}
ble/function#suppress-stderr ble/util/s2bytes
ble/util/s2chars "$1"; local ext=$?
ble/util/unlocal LC_ALL LC_CTYPE
ble/util/.cache/update-locale
return "$?"
} >/dev/null

# bind で使用される keyseq の形式

Expand Down Expand Up @@ -6548,9 +6567,9 @@ function ble/encoding:UTF-8/b2c {
bytes=("$@")
ret=0
((b0=bytes[0]&0xFF))
((n=b0>0xF0
?(b0>0xFC?5:(b0>0xF8?4:3))
:(b0>0xE0?2:(b0>0xC0?1:0)),
((n=b0>=0xF0
?(b0>=0xFC?5:(b0>=0xF8?4:3))
:(b0>=0xE0?2:(b0>=0xC0?1:0)),
ret=n?b0&0x7F>>n:b0))
for ((i=1;i<=n;i++)); do
((ret=ret<<6|0x3F&bytes[i]))
Expand Down Expand Up @@ -6591,9 +6610,3 @@ function ble/encoding:C/c2b {
local code=$1
bytes=("$((code&0xFF))")
}

function ble/util/is-unicode-output {
[[ $_ble_util_locale_triple != "$LC_ALL:$LC_CTYPE:$LANG" ]] &&
ble/util/.cache/update-locale
[[ $_ble_util_locale_encoding == UTF-8 ]]
}

0 comments on commit 2e1a7c1

Please sign in to comment.