Skip to content

Commit

Permalink
canvas: update emoji database and support "bleopt emoji_version"
Browse files Browse the repository at this point in the history
  • Loading branch information
akinomyoga committed Jan 30, 2021
1 parent 47a3301 commit d1f8c27
Show file tree
Hide file tree
Showing 6 changed files with 356 additions and 56 deletions.
9 changes: 8 additions & 1 deletion blerc
Expand Up @@ -271,11 +271,18 @@
#bleopt char_width_mode=auto


## "emoji_width" specifies the width of emoji characters.
## "emoji_width" specifies the width of emoji characters. If an empty value is
## specified, special treatment of emoji is disabled.

#bleopt emoji_width=2


## "emoji_version" specifies the version of Unicode Emoji. Available values
## are 1.0, 2.0, 3.0, 4.0, 5.0, 11.0, 12.0, 12.1, 13.0, and 13.1.

#bleopt emoji_version=13.1


##-----------------------------------------------------------------------------
## User input settings

Expand Down
108 changes: 108 additions & 0 deletions make_command.sh
Expand Up @@ -80,6 +80,114 @@ function sub:ignoreeof-messages {
) >| lib/core-edit.ignoreeof-messages.new
}

function sub:update-emoji-database {
local unicode_version=$(wget https://unicode.org/Public/emoji/ -O - | grep -Eo 'href="[0-9]+\.[0-9]+/"' a.html | sed 's,^href=",,;s,/"$,,' | tail -n 1)
local cache=out/data/unicode-emoji-$unicode_version.txt
if [[ ! -s $cache ]]; then
mkd out/data
wget https://unicode.org/Public/emoji/$unicode_version/emoji-test.txt -O "$cache.part" &&
mv "$cache.part" "$cache"
fi

local q=\'
local versions=$(gawk 'match($0, / E([0-9]+\.[0-9]+)/, m) > 0 { print m[1]; }' "$cache" | sort -Vu | tr '\n' ' ')
gawk -v versions="$versions" '
function join(arr, sep, _, r, i, n) {
r = "";
n = length(arr);
for (i = 1; i <= n; i++) {
if (i > 1) r = r sep;
r = r arr[i];
}
return r;
}
BEGIN {
split(versions, vers);
nvers = length(vers);
}
# 単一絵文字 (sequence でない) のみを登録する。
# unqualified 絵文字を含めるかどうかは微妙だが、既存の端末は含めている気がする。
#$3 == "fully-qualified" && match($0, / E([0-9]+\.[0-9]+)/, m) > 0 {
($3 == "fully-qualified" || $3 == "unqualified") && match($0, / E([0-9]+\.[0-9]+)/, m) > 0 {
char_code = strtonum("0x" $1);
char_emoji_version = m[1];
for (i = nvers; i >= 1; i--) {
emoji_version = vers[i];
data[emoji_version, char_code]++;
data[emoji_version, char_code + 1]++;
if (char_emoji_version == emoji_version) break;
}
if (char_code <= 0x2000 || 0x2E80 <= char_code && char_code <= 0xA4D0) {
printf("_ble_util_c2w_except[0x%04X]=-2\n", char_code);
} else {
if (char_code < 0x10000) {
if (bmp_min == "" || char_code < bmp_min) bmp_min = char_code;
if (bmp_max == "" || char_code > bmp_max) bmp_max = char_code;
} else {
if (smp_min == "" || char_code < smp_min) smp_min = char_code;
if (smp_max == "" || char_code > smp_max) smp_max = char_code;
}
}
}
function get_database_name(version, _, m) {
if (match(version, /^0*([0-9]+)\.0*([0-9]+)$/, m) > 0)
return sprintf("_ble_canvas_emoji_database_%04d", m[1] * 100 + m[2]);
else
return "";
}
function start_emoji_version(version) {
if (version == g_emoji_version) return 0;
end_emoji_version();
g_emoji_version = version;
return 1;
}
function end_emoji_version(_, database_name, _list) {
if (g_emoji_version == "") return;
if ((database_name = get_database_name(g_emoji_version))) {
asorti(g_emoji_list, _list, "@ind_num_asc");
g_def_wranges[g_emoji_version] = database_name "=(" join(_list, " ") ")";
}
g_emoji_version = "";
delete g_emoji_list;
}
END {
printf("_ble_canvas_emoji_expr_maybe='$q'_ble_util_c2w_except[code]==-2||%d<=code&&code<=%d||%d<=code&&code<=%d'$q'\n", bmp_min, bmp_max, smp_min, smp_max);
# printf("_ble_canvas_emoji_bmp_min=%-6d # U+%04X\n", bmp_min, bmp_min);
# printf("_ble_canvas_emoji_bmp_max=%-6d # U+%04X\n", bmp_max, bmp_max);
# printf("_ble_canvas_emoji_smp_min=%-6d # U+%04X\n", smp_min, smp_min);
# printf("_ble_canvas_emoji_smp_max=%-6d # U+%04X\n", smp_max, smp_max);
n = asorti(data, boundaries);
emoji_version = "";
for (i = 1; i <= n; i++) {
if (data[boundaries[i]] % 2 != 1) continue;
split(boundaries[i], fields, SUBSEP);
code = fields[2];
start_emoji_version(fields[1]);
g_emoji_list[code]++;
}
end_emoji_version();
for (i = 1; i <= nvers; i++) {
emoji_version = vers[i];
if (emoji_version >= 1.0)
print g_def_wranges[emoji_version];
}
latest_version = vers[nvers];
print "bleopt/declare -n emoji_version " latest_version;
print "_ble_canvas_emoji_database=(\"${" get_database_name(latest_version) "[@]}\")";
}
' "$cache" | ifold -w 119 --spaces --no-text-justify --indent=.. > src/canvas.emoji.sh
}

#------------------------------------------------------------------------------
# sub:check
# sub:check-all
Expand Down
1 change: 1 addition & 0 deletions memo/ChangeLog.md
Expand Up @@ -11,6 +11,7 @@
- complete: support new options `bleopt complete_limit{,_auto}` (contributed by timjrd) `#D1445` b13f114 5504bbc
- edit (kill/copy): combine multiple kills and copies (suggested by 3ximus) `#D1443` 66564e1
- edit (`{kill,copy}-region-or`): fix unconditionally combined kills/copies (reported by 3ximus) `#D1447` 0000000
- canvas: update emoji database and support `bleopt emoji_version` (motivated by endorfina) `#D1454` 0000000

## Changes

Expand Down
17 changes: 17 additions & 0 deletions note.txt
Expand Up @@ -3719,6 +3719,23 @@ bash_tips
Done (実装ログ)
-------------------------------------------------------------------------------

2021-01-30

* edit: emoji の表を更新する (reported by endorfina) [#D1455]
https://github.com/akinomyoga/ble.sh/issues/84

更新しようと思ったが現在のコードを確認すると色々工夫して高速化している。
少し面倒である。取り敢えずデータのダウンロードだけは行う事にする。

https://unicode.org/Public/emoji/

色々考えて現在のコードの構造に近い形で再実装した。実は以前の実装に
はバグがあった。

以前のテーブルは https://github.com/vim-jp/issues/issues/1086 から
拾ってきた物だったが、これは 2017 年の物だったので報告のあった
flamingo の絵文字 (2018) は含まれていなかったのだ。

2021-01-25

* edit: change default behavior of "C-w" and "M-w" to operate on backward words (reported by 3ximus) [#D1448]
Expand Down

0 comments on commit d1f8c27

Please sign in to comment.