Skip to content

Commit

Permalink
wip - update emoji tooling to support tr51 sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
dougfelt committed Nov 4, 2015
1 parent deff1a6 commit 0f227e7
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 28 deletions.
3 changes: 2 additions & 1 deletion Makefile
Expand Up @@ -78,7 +78,8 @@ flag-symlinks: $(WAVED_FLAGS)

$(PNG128_FLAGS): flag-symlinks

EMOJI_PNG128 = ./png/128/emoji_u
#EMOJI_PNG128 = ./png/128/emoji_u
EMOJI_PNG128 = /tmp/placeholder_emoji_plus/emoji_u

EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
ADD_GLYPHS = third_party/color_emoji/add_glyphs.py
Expand Down
11 changes: 11 additions & 0 deletions NotoColorEmoji.tmpl.ttx.tmpl
Expand Up @@ -4,6 +4,10 @@
<GlyphOrder>
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
<GlyphID id="0" name=".notdef"/>
<GlyphID id="1" name="null"/>
<GlyphID id="2" name="nonmarkingreturn"/>
<GlyphID id="3" name="space"/>
<GlyphID id="4" name="u200D"/>
</GlyphOrder>

<head>
Expand Down Expand Up @@ -119,12 +123,19 @@

<hmtx>
<mtx name=".notdef" width="2550" lsb="0"/>
<mtx name="null" width="0" lsb="0"/>
<mtx name="nonmarkingreturn" width="2550" lsb="0"/>
<mtx name="space" width="2550" lsb="0"/>
<mtx name="u200D" width="0" lsb="0"/>
</hmtx>

<cmap>
<tableVersion version="0"/>
<cmap_format_12 platformID="3" platEncID="10" language="0" format="12" reserved="0" length="1" nGroups="1">
<map code="0x0" name=".notdef"/><!-- &lt;control> -->
<map code="0xd" name="nonmarkingreturn"/>
<map code="0x20" name="space"/>
<map code="0x200d" name="u200D"/>
</cmap_format_12>
</cmap>

Expand Down
73 changes: 73 additions & 0 deletions build_emoji_set.py
@@ -0,0 +1,73 @@
# delete dst, then:
# copy the placeholders to dst
# then copy the noto images to dst
# then copy the draft images to dst, skipping names with parens and
# after fixing the case of the names

import glob
import os
from os import path
import re
import shutil

DST = "/tmp/placeholder_emoji_plus"

SRC_PLACEHOLDER = "/tmp/placeholder_emoji"
SRC_NOTO = "/usr/local/google/users/dougfelt/newnoto/noto-emoji/png/128"
SRC_DRAFT = "/usr/local/google/home/dougfelt/Downloads/PNG_latest_working_draft"

# First, scan the draft images and select which ones to use. This does
# two things:
# - The download package returns all the images, including previous versions.
# Ensure we use the one with the highest version.
# - The names often mix case. Make sure we have all lower case names.
#
# If something seems amiss, we fail.

UPDATED_NAMES = {}
FIXED_NAMES = {}
VAR_PAT = re.compile(r'(.*?)\((\d+)\)\.png')
for fname in glob.glob(path.join(SRC_DRAFT, '*.png')):
name = path.basename(fname)
m = VAR_PAT.match(name)
if m:
name = '%s.png' % m.group(1).lower()
version = int(m.group(2))
if version > UPDATED_NAMES.get(name, (0, None))[0]:
print 'update %s to version %d' % (name, version)
UPDATED_NAMES[name] = (version, fname)
else:
name = name.lower()
FIXED_NAMES[name] = fname

for name in UPDATED_NAMES:
if name not in FIXED_NAMES:
raise Exception('updated name %s not in names' % name)
fname = UPDATED_NAMES[name][1]
print 'using updated image %s for %s' % (fname, name)
FIXED_NAMES[name] = fname

# Now, recreate the destination directory and copy the data into it.

if path.isdir(DST):
shutil.rmtree(DST)
os.makedirs(DST)

SKIP_PLACEHOLDERS = frozenset([
'emoji_u1f468_200d_1f469_200d_1f466.png',
'emoji_u1f469_200d_2764_fe0f_200d_1f468.png',
'emoji_u1f469_200d_2764_fe0f_200d_1f48b_200d_1f468.png',
])

for fname in glob.glob(path.join(SRC_PLACEHOLDER, '*.png')):
basename = path.basename(fname)
if basename in SKIP_PLACEHOLDERS:
print 'skip %s' % basename
continue
shutil.copy(fname, DST)

for fname in glob.glob(path.join(SRC_NOTO, '*.png')):
shutil.copy(fname, DST)

for name, fname in FIXED_NAMES.iteritems():
shutil.copy(fname, path.join(DST, name))
95 changes: 95 additions & 0 deletions generate_emoji_placeholders.py
@@ -0,0 +1,95 @@
import os
from os import path
import subprocess

OUTPUT_DIR = '/tmp/placeholder_emoji'

def generate_image(name, text):
print name, text.replace('\n', '_')
subprocess.check_call(
['convert', '-size', '100x100', 'label:%s' % text,
'%s/%s' % (OUTPUT_DIR, name)])

def is_color_patch(cp):
return cp >= 0x1f3fb and cp <= 0x1f3ff

def has_color_patch(values):
for v in values:
if is_color_patch(v):
return True
return False

def regional_to_ascii(cp):
return unichr(ord('A') + cp - 0x1f1e6)

def is_flag_sequence(values):
if len(values) != 2:
return False
for v in values:
v -= 0x1f1e6
if v < 0 or v > 25:
return False
return True

def is_keycap_sequence(values):
return len(values) == 2 and values[1] == 0x20e3

def get_keycap_text(values):
return '-%c-' % unichr(values[0]) # convert gags on '['

char_map = {
0x1f468: 'M',
0x1f469: 'W',
0x1f466: 'B',
0x1f467: 'G',
0x2764: 'H', # heavy black heart, no var sel
0x1f48b: 'K', # kiss mark
0x200D: '-', # zwj placeholder
0xfe0f: '-', # variation selector placeholder
0x1f441: 'I', # Eye
0x1f5e8: 'W', # 'witness' (left speech bubble)
}

def get_combining_text(values):
chars = []
for v in values:
char = char_map.get(v, None)
if not char:
return None
if char != '-':
chars.append(char)
return ''.join(chars)


if not path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)

with open('sequences.txt', 'r') as f:
for seq in f:
seq = seq.strip()
text = None
values = [int(code, 16) for code in seq.split('_')]
if len(values) == 1:
val = values[0]
text = '%04X' % val # ensure upper case format
elif is_flag_sequence(values):
text = ''.join(regional_to_ascii(cp) for cp in values)
elif has_color_patch(values):
print 'skipping color patch sequence %s' % seq
elif is_keycap_sequence(values):
text = get_keycap_text(values)
else:
text = get_combining_text(values)
if not text:
print 'missing %s' % seq

if text:
if len(text) > 3:
if len(text) == 4:
hi = text[:2]
lo = text[2:]
else:
hi = text[:-3]
lo = text[-3:]
text = '%s\n%s' % (hi, lo)
generate_image('emoji_u%s.png' % seq, text)
120 changes: 100 additions & 20 deletions third_party/color_emoji/add_glyphs.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python

import glob, os, sys
import collections, glob, os, sys
from fontTools import ttx
from fontTools.ttLib.tables import otTables
from png import PNG
Expand All @@ -10,11 +10,31 @@
import add_emoji_gsub


def is_vs(cp):
return cp >= 0xfe00 and cp <= 0xfe0f

def codes_to_string(codes):
if "_" in codes:
pieces = codes.split ("_")
string = "".join ([unichr (int (code, 16)) for code in pieces])
else:
string = unichr (int (codes, 16))
return string


def glyph_sequence(string):
# sequence of names of glyphs that form a ligature
# variation selectors are stripped
return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]


def glyph_name(string):
# name of a ligature
# includes variation selectors when present
return "_".join (["u%04X" % ord (char) for char in string])


def add_ligature (font, string):
def add_ligature (font, seq, name):
if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {}
Expand All @@ -34,17 +54,27 @@ def add_ligature (font, string):
ligatures = lookup.SubTable[0].ligatures

lig = otTables.Ligature()
lig.CompCount = len(string)
lig.Component = [glyph_name(ch) for ch in string[1:]]
lig.LigGlyph = glyph_name(string)
lig.CompCount = len(seq)
lig.Component = seq[1:]
lig.LigGlyph = name

first = glyph_name(string[0])
first = seq[0]
try:
ligatures[first].append(lig)
except KeyError:
ligatures[first] = [lig]


# Ligating sequences for emoji that already have a defined codepoint,
# to match the sequences for the related emoji with no codepoint.
# The key is the name of the glyph with the codepoint, the value is the
# name of the sequence in filename form.
EXTRA_SEQUENCES = {
'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
}

if len (sys.argv) < 4:
print >>sys.stderr, """
Usage:
Expand All @@ -65,23 +95,22 @@ def add_ligature (font, string):

in_file = sys.argv[1]
out_file = sys.argv[2]
img_prefix = sys.argv[3]
img_prefixen = sys.argv[3:]
del sys.argv

font = ttx.TTFont()
font.importXML (in_file)

img_files = {}
glb = "%s*.png" % img_prefix
print "Looking for images matching '%s'." % glb
for img_file in glob.glob (glb):
codes = img_file[len (img_prefix):-4]
if "_" in codes:
pieces = codes.split ("_")
u = "".join ([unichr (int (code, 16)) for code in pieces])
else:
u = unichr (int (codes, 16))
img_files[u] = img_file
for img_prefix in img_prefixen:
glb = "%s*.png" % img_prefix
print "Looking for images matching '%s'." % glb
for img_file in glob.glob (glb):
codes = img_file[len (img_prefix):-4]
u = codes_to_string(codes)
if u in img_files:
print 'overwriting %s with %s' % (img_files[u], imag_file)
img_files[u] = img_file
if not img_files:
raise Exception ("No image files found in '%s'." % glb)

Expand All @@ -98,20 +127,71 @@ def add_ligature (font, string):
img_pairs = img_files.items ()
img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))

glyph_names = set()
ligatures = {}

def add_lig_sequence(ligatures, seq, n):
# Assume sequences with ZWJ are emoji 'ligatures' and rtl order
# is also valid. Internal permutations, though, no.
# We associate a sequence with a filename. We can overwrite the
# sequence with a different filename later.
tseq = tuple(seq)
if tseq in ligatures:
print 'lig sequence %s, replace %s with %s' % (
tseq, ligatures[tseq], n)
ligatures[tseq] = n
if 'u200D' in seq:
rev_seq = seq[:]
rev_seq.reverse()
trseq = tuple(rev_seq)
if trseq in ligatures:
print 'rev lig sequence %s, replace %s with %s' % (
trseq, ligatures[trseq], n)
ligatures[trseq] = n


for (u, filename) in img_pairs:
print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
n = glyph_name (u)
glyph_names.add(n)

g.append (n)
for char in u:
if char not in c:
cp = ord(char)
if cp not in c and not is_vs(cp):
name = glyph_name (char)
c[ord (char)] = name
c[cp] = name
if len (u) > 1:
h[name] = [0, 0]
(img_width, img_height) = PNG (filename).get_size ()
advance = int (round ((float (ascent+descent) * img_width / img_height)))
h[n] = [advance, 0]
if len (u) > 1:
add_ligature (font, u)
seq = glyph_sequence(u)
add_lig_sequence(ligatures, seq, n)

for n in EXTRA_SEQUENCES:
if n in glyph_names:
seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
add_lig_sequence(ligatures, seq, n)
else:
print 'extras: no glyph for %s' % n


keyed_ligatures = collections.defaultdict(list)
for k, v in ligatures.iteritems():
first = k[0]
keyed_ligatures[first].append((k, v))

for base in sorted(keyed_ligatures):
pairs = keyed_ligatures[base]
print 'base %s has %d sequences' % (base, len(pairs))
# Sort longest first, this ensures longer sequences with common prefixes
# are handled before shorter ones. It would be better to have multiple
# lookups, most likely.
pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
for seq, name in pairs:
print seq, name
add_ligature(font, seq, name)

font.saveXML (out_file)

0 comments on commit 0f227e7

Please sign in to comment.