Skip to content

Commit

Permalink
emoji: unify emoji tables of different versions
Browse files Browse the repository at this point in the history
  • Loading branch information
akinomyoga committed Oct 4, 2021
1 parent 430f449 commit af82662
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 572 deletions.
3 changes: 2 additions & 1 deletion blerc
Expand Up @@ -322,7 +322,8 @@


## "emoji_version" specifies the version of Unicode Emoji. Available values
## are 1.0, 2.0, 3.0, 4.0, 5.0, 11.0, 12.0, 12.1, 13.0, and 13.1.
## are 0.6, 0.7, 1.0, 2.0, 3.0, 4.0, 5.0, 11.0, 12.0, 12.1, 13.0, 13.1, and
## 14.0.

#bleopt emoji_version=13.1

Expand Down
21 changes: 11 additions & 10 deletions docs/ChangeLog.md
Expand Up @@ -13,6 +13,7 @@
- edit (kill/copy): combine multiple kills and copies (suggested by 3ximus) `#D1443` 66564e1
- edit (`{kill,copy}-region-or`): fix unconditionally combined kills/copies (reported by 3ximus) `#D1447` 1631751
- canvas: update emoji database and support `bleopt emoji_version` (motivated by endorfina) `#D1454` d1f8c27
- emoji: unify emoji tables of different versions `#D1671` 0000000
- canvas, edit: support `bleopt info_display` (suggested by 0neGuyDev) `#D1458` 69228fa
- canvas (panel): always call `panel::render` to update height `#D1472` 51d2c05
- util (visible-bell): work around coordinate mismatches in subshells `#D1495` 01cfb10
Expand All @@ -28,7 +29,7 @@
- decode: support `ble-bind -m KEYMAP --cursor DECSCUSR` (motivated by jmederosalvarado) `#D1514` `#D1515` `#D1516` 79d671d
- edit: support `nsearch` options (motivated by Alyetama, rashil2000, carv-silva) `#D1517` 9125795
- edit: support `nsearch` opts `empty=emulate-readline` (motivated by jainpratik163) `#D1661` d68ba61
- edit: support bash-5.2 binding of `prior/next` to `history-search-{for,back}ward` `#D1661` 0000000
- edit: support bash-5.2 binding of `prior/next` to `history-search-{for,back}ward` `#D1661` d26a6e1
- syntax: support the deprecated redirection `>& file` `#D1539` b9b0de4
- complete: complete file descriptors and heredoc words after redirections `#D1539` b9b0de4
- main: support `blehook ATTACH DETACH`, `BLE_ONLOAD`, `BLE_ATTACHED` `#D1543` 750ca38
Expand Down Expand Up @@ -64,16 +65,16 @@
- canvas: support grapheme clusters (motivated by huresche) `#D1619` c0d997b
- canvas (`ble/util/c2w`): use `EastAsianWidth` and `GeneralCategory` to mimic `wcwidth` `#D1645` 9a132b7
- canvas (c2w:auto): work around combining chars applied to the previous line `#D1649` 1cbbecb
- canvas (c2w:auto): avoid duplicate requests `#D1649` 1cbbecb 0000000
- canvas (c2w:auto): send <kbd>DSR(6)</kbd> in the internal state `#D1664` 0000000
- canvas (c2w): support `bleopt char_width_mode=musl` `#D1668` 0000000
- canvas (c2w:auto): detect `emacs` and `musl` `#D1668` 0000000
- canvas (c2w:auto): avoid duplicate requests `#D1649` 1cbbecb a3047f56
- canvas (c2w:auto): send <kbd>DSR(6)</kbd> in the internal state `#D1664` a3047f5
- canvas (c2w): support `bleopt char_width_mode=musl` `#D1668` 05b258f `#D1672` 0000000
- canvas (c2w:auto): detect `emacs` and `musl` `#D1668` 05b258f
- rlfunc: support vi word operations in `emacs` keymap (requested by SolarAquarion) `#D1624` 21d636a
- edit: support `TMOUT` for the session timeout `#D1631` 0e16dbd
- edit: support bash-5.2 `READLINE_ARGUMENT` `#D1638` d347fb3
- complete: support `complete [-DI]` in old versions of Bash through `_DefaultCmD_` and `_InitialWorD_` `#D1639` 925b2cd
- rlfunc: support nsearch widgets in `vi_nmap` keymap (requested by cornfeedhobo) `#D1651` 9a7c8b1
- prompt: support `bleopt prompt_ruler` `#D1666` 0000000
- prompt: support `bleopt prompt_ruler` `#D1666` 05cf638

## Changes

Expand Down Expand Up @@ -110,8 +111,8 @@
- prompt: do not evaluate `PROMPT_COMMAND` for subprompts `#D1654` 08e903e
- Makefile: work around the case the repository is cloned without `--recursive` `#D1655` 22ace5f
- repo: add subdirectories `make` and `docs` `#D1657` 75bd04c
- blerc: add all the missing options `#D1667` 0000000
- util: time out <kbd>CPR</kbd> requests `#D1669` 00000000
- blerc: add all the missing options `#D1667` 0228d76
- util: time out <kbd>CPR</kbd> requests `#D1669` 1481d48

## Fixes

Expand Down Expand Up @@ -176,7 +177,7 @@
- complete: fix a task scheduling bug of referencing two different clocks (reported by rashil2000) `#D1636` fea5f5b
- canvas: update prompt trace on `char_width_mode` change (reported by Barbarossa93) `#D1642` 68ee111
- decode (`cmap/initialize`): fix unquoted special chars in the cmap cache `#D1647` 7434d2d
- decode: fix a bug that the characters input while initialization are delayed `#D1670` 0000000
- decode: fix a bug that the characters input while initialization are delayed `#D1670` 430f449

## Optimization

Expand Down Expand Up @@ -245,7 +246,7 @@
- util: fix `ble/util/dense-array#fill-range` a46fdaf
- util: fix leak variables `buff`, `trap`, `{x,y}{1,2}` `#D1572` 5967d6c
- util: fix leak variables `#D1643` fcf634b
- edit (`command-help`): use `ble/util/assign/.mktmp` to determine the temporary filename `#D1663`
- edit (`command-help`): use `ble/util/assign/.mktmp` to determine the temporary filename `#D1663` 1af0800

<!---------------------------------------------------------------------------->
# ble-0.4.0-devel2
Expand Down
175 changes: 99 additions & 76 deletions make/canvas.emoji.measure-width.sh → make/canvas.emoji.sh
Expand Up @@ -2,14 +2,68 @@

function mkd { [[ -d $1 ]] || mkdir -p "$1"; }

function sub:compare {
## @fn make/canvas.emoji/get-emoji-data [emoji_version]
## @var[out] emoji_data
## @var[out] emoji_cache_file
function make/canvas.emoji/get-emoji-data {
#local unicode_version=$(wget https://unicode.org/Public/emoji/ -O - | grep -Eo 'href="[0-9]+\.[0-9]+/"' | sed 's,^href=",,;s,/"$,,' | tail -n 1)
local unicode_version=${1:-14.0}
emoji_cache_file=out/data/unicode-emoji-$unicode_version.txt
if [[ ! -s $emoji_cache_file ]]; then
mkd out/data
wget "https://unicode.org/Public/emoji/$unicode_version/emoji-test.txt" -O "$emoji_cache_file.part" &&
mv "$emoji_cache_file.part" "$emoji_cache_file"
fi

local gawk_script='
/^[[:space:]]*#/ { next; }
sub(/;.*$/, "") { print $0; }'
ble/util/assign-array emoji_data 'gawk "$gawk_script" "$emoji_cache_file"'
}

function make/canvas.emoji/sub:help {
ble/util/print "usage: source ${BASH_SOURCE##*/}${BASH_SOURCE:-canvas.emoji.sh} SUBCOMMAND ARGS..."
ble/util/print
ble/util/print "SUBCOMMAND"
declare -F | sed -n 's/^declare -f make\/canvas.emoji\/sub:\([^[:space:]]*\)/ \1/p'
ble/util/print
}

function make/canvas.emoji/sub:save-emoji-type {
local emoji_data emoji_cache_file
make/canvas.emoji/get-emoji-data
gawk '
/^[[:space:]]*#/ { next; }
{
if (/unqualified/) {
type = "UQ";
} else if (/fully-qualified/) {
type = "FQ";
} else if (/minimally-qualified/) {
type = "MQ";
} else {
type = "XX";
}
}
sub(/;.*$/, "") {
s = "";
for (i = 1; i <= NF; i++) {
s = s sprintf("\\U%05X", strtonum("0x" $i));
}
print s ": " type;
}
' "$emoji_cache_file" | sort -u > out/data/emoji.TYPE.txt
}

function make/canvas.emoji/sub:compare {
grep '^\\' out/data/emoji.TYPE.txt |
join - <(awk '/^\\/ { sub(/^w=/, "blesh=", $2); print; }' out/data/emoji.blesh.txt) |
join - <(awk '/^\\/ { sub(/^w=/, "kitty=", $2); print; }' out/data/emoji.kitty.txt)
# grep -E '^.{7}: UQ' | less
}

#------------------------------------------------------------------------------
# measure-emoji.impl1

_tool_emoji_width_code=()
_tool_emoji_width_gcb=()
Expand All @@ -26,7 +80,7 @@ function inspect1/proc {
ble/util/c2s "$c"
ble/canvas/put.draw "$ret"
done

ble/array#push _tool_emoji_width_code "${code[*]}"
ble/array#push _tool_emoji_width_gcb "${gcb[*]}"
ble/term/CPR/request.draw inspect1/callback
Expand All @@ -48,7 +102,20 @@ function inspect1/callback-final {
done | sort -u
} >> emoji.txt

function make/canvas.emoji/sub:measure-emoji.impl1 {
local emoji_data emoji_cache_file
make/canvas.emoji/get-emoji-data
ble/util/buffer.flush >&2
local line
for line in "${emoji_data[@]}"; do
eval "inspect1/proc $line"
done
ble/term/CPR/request.buff inspect1/callback-final
ble/util/buffer.flush >&2
}

#------------------------------------------------------------------------------
# measure-emoji

_term_emojiw_index_req=0
_term_emojiw_index_rcv=0
Expand All @@ -61,7 +128,7 @@ _term_emojiw_output=emoji.txt
function inspect2/start {
_term_emojiw_index_req=0
_term_emojiw_index_rcv=0
_term_emojiw_data=("${data[@]}")
_term_emojiw_data=("${emoji_data[@]}")
_term_emojiw_output=emoji.txt
: > "$_term_emojiw_output"
inspect2/next
Expand Down Expand Up @@ -124,14 +191,21 @@ function inspect2/final {
echo Done
}

function make/canvas.emoji/sub:measure-emoji {
local emoji_data emoji_cache_file
make/canvas.emoji/get-emoji-data 14.0
inspect2/start
}

#------------------------------------------------------------------------------
# measure-emoji-sequences

## @fn ble/unicode/measure-emoji-sequences
## @var[in] data
## @var[in] emoji_data
function ble/unicode/measure-emoji-sequences {
local line words ret count=0
local -a codes=() gcbs=() widths=()
for line in "${data[@]}"; do
for line in "${emoji_data[@]}"; do
ble/string#split-words words "$line"

local s= word c
Expand Down Expand Up @@ -163,9 +237,11 @@ function ble/unicode/test-emoji-sequence-width {
diff -bwu <(grep '^\\U' out/data/emoji."blesh-$scheme".txt) <(grep '^\\U' out/data/emoji."$term".txt)
}

## @fn main/sub:measure-blesh term [scheme]
## @var[in] data cache
function main/sub:measure-blesh {
## @fn make/canvas.emoji/sub:measure-blesh term [scheme]
function make/canvas.emoji/sub:measure-blesh {
local emoji_data emoji_cache_file
make/canvas.emoji/get-emoji-data 14.0

local term=$1 scheme=${2:-$1}
case $scheme in
(blesh)
Expand Down Expand Up @@ -396,78 +472,25 @@ function main/sub:measure-blesh {
}

#------------------------------------------------------------------------------

function main {
local type=$1

#local unicode_version=$(wget https://unicode.org/Public/emoji/ -O - | grep -Eo 'href="[0-9]+\.[0-9]+/"' | sed 's,^href=",,;s,/"$,,' | tail -n 1)
local unicode_version=14.0
local cache=out/data/unicode-emoji-$unicode_version.txt
if [[ ! -s $cache ]]; then
mkd out/data
wget "https://unicode.org/Public/emoji/$unicode_version/emoji-test.txt" -O "$cache.part" &&
mv "$cache.part" "$cache"
fi
function make/canvas.emoji/sub:dump-EmojiStatus {
local emoji_data emoji_cache_file
make/canvas.emoji/get-emoji-data

local gawk_script='
/^[[:space:]]*#/ { next; }
sub(/;.*$/, "") { print $0; }'
local data
ble/util/assign-array data 'gawk "$gawk_script" "$cache"'

case $type in
(measure-at-once)
ble/util/buffer.flush >&2
local line
for line in "${data[@]}"; do
eval "inspect1/proc $line"
done
ble/term/CPR/request.buff inspect1/callback-final
ble/util/buffer.flush >&2 ;;
(measure-emoji-by-emoji)
inspect2/start ;;
(measure-blesh)
main/sub:measure-blesh "$2" "$3" ;;
(save-emoji-type)
gawk '
/^[[:space:]]*#/ { next; }
{
if (/unqualified/) {
type = "UQ";
} else if (/fully-qualified/) {
type = "FQ";
} else if (/minimally-qualified/) {
type = "MQ";
} else {
type = "XX";
}
}
sub(/;.*$/, "") {
s = "";
for (i = 1; i <= NF; i++) {
s = s sprintf("\\U%05X", strtonum("0x" $i));
}
print s ": " type;
}
' "$cache" | sort -u > out/data/emoji.TYPE.txt ;;
esac
}
local line words code
for line in "${emoji_data[@]}"; do
ble/string#split-words words "$line"

function sub:measure-emoji-impl1 {
main measure-at-once
}
function sub:measure-emoji {
main measure-emoji-by-emoji
}
function sub:measure-blesh {
main measure-blesh "$@"
}
function sub:save-emoji-type {
main save-emoji-type
((${#words[@]}==1)) || continue
((code=16#${words[0]}))
ble/unicode/EmojiStatus "$code"
printf 'U+%05X %d\n' "$code" "$ret"
done
}

if declare -F "sub:$1" &>/dev/null; then
"sub:$@"

if declare -F "make/canvas.emoji/sub:$1" &>/dev/null; then
"make/canvas.emoji/sub:$@"
else
sub:measure-emoji
make/canvas.emoji/sub:help
fi

0 comments on commit af82662

Please sign in to comment.