Skip to content

Commit

Permalink
canvas: update Unicode version 15.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
akinomyoga committed Sep 16, 2022
1 parent 60b6989 commit 49e55f4
Show file tree
Hide file tree
Showing 8 changed files with 591 additions and 501 deletions.
1 change: 1 addition & 0 deletions docs/ChangeLog.md
Expand Up @@ -199,6 +199,7 @@
- progcomp: reproduce arguments of completion functions passed by Bash `#D1872` 4d2dd35
- prompt: preserve transient prompt with `same-dir` after `clear-screen` `#D1876` xxxxxxx
- color: let `bleopt term_index_colors` override the default if specified `#D1878` xxxxxxx
- canvas: update Unicode version 15.0.0 `#D1880` xxxxxxx

## Fixes

Expand Down
31 changes: 31 additions & 0 deletions make/canvas.c2w.list-ucsver-detection-codes.sh
Expand Up @@ -43,5 +43,36 @@ function list-range-code-for-version-detection {
done
done

printf ' | %-*s|musl\n' "$((nversion*3))" '-----Unicode EAW+GeneralCategory'
local -a keys=(
U+9FBC U+9FC4 U+31B8 U+D7B0
U+3099 U+9FCD U+1F93B U+312E
U+312F U+16FE2 U+32FF U+31BB
U+9FFD U+1B132)
local code index=0 ret
for code in "${keys[@]}"; do
((code=16#${code#U+}))
ble/unicode/EmojiStatus "$code"
if ((ret)); then
printf 'U+%04X: emoji cannot be used to detect the Unicode version\n' "$code" >&2
continue
fi

local c2w=${_ble_unicode_c2w[code]}
if [[ ! $c2w ]]; then
local c=$code
until [[ $c2w || c -eq 0 ]]; do c2w=${_ble_unicode_c2w[--c]}; done
if [[ $c2w ]]; then
printf 'U+%04X: warning: not c2w boundary. borrow the data of boundary U+%04X\n' "$code" "$c" >&2
else
printf 'U+%04X: this is not c2w boundary\n' "$code" >&2
continue
fi
fi

local width_vec=$(printf ' %2d' "${_ble_unicode_c2w_UnicodeVersionMapping[@]:c2w*nversion:nversion}")
ble/util/c2w:musl "$code"; local c2w_musl=$ret
printf '%-6s U+%05X |%s |%2d\n' "ws[$((index++))]" "$code" "$width_vec" "$c2w_musl"
done
}
list-range-code-for-version-detection
23 changes: 13 additions & 10 deletions make_command.sh
Expand Up @@ -102,7 +102,7 @@ function sub:ignoreeof-messages {
function sub:generate-emoji-table {
local -x name=${1:-_ble_unicode_EmojiStatus}

local unicode_version=14.0
local unicode_version=15.0
local cache=out/data/unicode-emoji-$unicode_version.txt
download "https://unicode.org/Public/emoji/$unicode_version/emoji-test.txt" "$cache"

Expand Down Expand Up @@ -248,22 +248,25 @@ function sub:generate-emoji-table {
}

function sub:generate-grapheme-cluster-table {
local url=http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
local cache=out/data/unicode-GraphemeBreakProperty-latest.txt
#local unicode_version=latest base_url=http://www.unicode.org/Public/UCD/latest/ucd
local unicode_version=15.0.0 base_url=https://www.unicode.org/Public/15.0.0/ucd

local url=$base_url/auxiliary/GraphemeBreakProperty.txt
local cache=out/data/unicode-GraphemeBreakProperty-$unicode_version.txt
if [[ ! -s $cache ]]; then
mkd out/data
wget "$url" -O "$cache.part" && mv "$cache.part" "$cache"
fi

local url2=https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
local cache2=out/data/unicode-emoji-data-latest.txt
local url2=$base_url/emoji/emoji-data.txt
local cache2=out/data/unicode-emoji-data-$unicode_version.txt
if [[ ! -s $cache2 ]]; then
mkd out/data
wget "$url2" -O "$cache2.part" && mv "$cache2.part" "$cache2"
fi

local url3=http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
local cache3=out/data/unicode-GraphemeBreakTest-latest.txt
local url3=$base_url/auxiliary/GraphemeBreakTest.txt
local cache3=out/data/unicode-GraphemeBreakTest-$unicode_version.txt
if [[ ! -s $cache3 ]]; then
mkd out/data
wget "$url3" -O "$cache3.part" && mv "$cache3.part" "$cache3"
Expand Down Expand Up @@ -527,12 +530,12 @@ function sub:generate-grapheme-cluster-table {
rule_initialize();
rule_print();
}
' > src/canvas.GraphemeClusterBreak.sh
' | sed 's/[[:space:]]\{1,\}$//' > src/canvas.GraphemeClusterBreak.sh
}

function sub:update-EastAsianWidth {
local version
for version in {4.1,5.{0,1,2},6.{0..3},{7..11}.0,12.{0,1},13.0}.0; do
for version in {4.1,5.{0,1,2},6.{0..3},{7..11}.0,12.{0,1},13.0,14.0,15.0}.0; do
local data=out/data/unicode-EastAsianWidth-$version.txt
download http://www.unicode.org/Public/$version/ucd/EastAsianWidth.txt "$data"
gawk '
Expand Down Expand Up @@ -750,7 +753,7 @@ function sub:update-EastAsianWidth {

function sub:generate-c2w-table {
local version
for version in {4.1,5.{0,1,2},6.{0..3},{7..11}.0,12.{0,1},13.0,14.0}.0; do
for version in {4.1,5.{0,1,2},6.{0..3},{7..11}.0,12.{0,1},13.0,14.0,15.0}.0; do
local data=out/data/unicode-EastAsianWidth-$version.txt
download http://www.unicode.org/Public/$version/ucd/EastAsianWidth.txt "$data"
echo "__unicode_version__ $version"
Expand Down
35 changes: 35 additions & 0 deletions note.txt
Expand Up @@ -1895,8 +1895,15 @@ bash_tips
2022-08-31

* complete: 'a b c' というファイルがある状態で 'b [TAB] すると変な事になる。

→今試してみると再現しない。bash-completion をロードしていてもしていなくて
も問題なく補完される。

* complete: ~/b/c/スペースを含むファイル[TAB] としても requote されない。

* complete: 変数名の曖昧補完が効かない。一旦候補を生成したら menu-filter によ
る曖昧絞り込みはできている。

2022-08-29

* DEBUG trap についても関数呼び出しの階層を再現する?
Expand Down Expand Up @@ -6599,6 +6606,34 @@ bash_tips

2022-09-16

* canvas: Unicode version 更新 [#D1880]
15.0.0 が出ている。各テーブルを更新したが、更にversion 判定ロジックを改めて
更新する必要がある。

既存の判定用テーブルは #D1668 にある。これを更新する。うーん。このテーブル
を生成するのに使ったスクリプトがある筈。だが見つからない。と思ったら

make/canvas.c2w.list-ucsver-detection-codes.sh

であった。うーん。U+1B132 (平仮名の小さな「こ」) を採用する。更新されたテー
ブルは以下の様な感じ。

| -----Unicode EAW+GeneralCategory-------------------|musl
ws[0] U+09FBC | -1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 | 2
ws[1] U+09FC4 | -1 -1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 | 2
ws[2] U+031B8 | -1 -1 -1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 | 2
ws[3] U+0D7B0 | -1 -1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 | 2
ws[4] U+03099 | 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 | 0
ws[5] U+09FCD | -1 -1 2 2 2 2 2 -2 2 2 2 2 2 2 2 2 2 | 2
ws[6] U+1F93B | -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 2 2 1 1 1 | 1
ws[7] U+0312E | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 2 2 2 2 | 1
ws[8] U+0312F | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 2 2 2 | 1
ws[9] U+16FE2 | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 2 2 | 1
ws[10] U+032FF | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 2 | 1
ws[11] U+031BB | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 2 | 1
ws[12] U+09FFD | -1 -1 2 2 2 2 2 -2 -2 -2 -2 -2 -2 -2 -2 2 2 | 2
ws[13] U+1B132 | -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 | 1

* mandb: longname の説明に対応する short option 名を併記するう (suggested by bbyfacekiller) [#D1879]
https://github.com/akinomyoga/ble.sh/issues/231

Expand Down

0 comments on commit 49e55f4

Please sign in to comment.