/
gftools-namelist.py
executable file
·227 lines (196 loc) · 7.48 KB
/
gftools-namelist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python3
# Copyright 2015, Google Inc.
# Author: Dave Crossland (dave@understandinglimited.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# namelist.py: A fontTools python script for generating namelist files
#
# Usage:
# # To make a Namelist from a font file.
# $ namelist.py Font.ttf > NameList.nam
#
# # To reformat an existing Namelist.
# $ ./namelist.py reformat NameList.nam > NameList.nam
#
# # To reformat all Namelists in a directory.
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
# bash -c './namelist.py reformat "{}" > "{}__tmp" && mv "{}__tmp" "{}"' \;
#
# # To generate "uni names" and "nice names" filter lists a Namelist
# # This will create the needed directories if missing
# $ ./namelist.py generate-filter-lists NameList.nam
#
# # To generate "uni names" and "nice names" filter lists for all Namelists
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
# ./namelist.py generate-filter-lists "{}" \;
from __future__ import print_function, unicode_literals
import sys
import os
from fontTools.ttLib import TTFont
from fontTools.unicode import Unicode
import codecs
from gftools.util import google_fonts
from gftools.util import filter_lists
if sys.version[0] == '3':
unichr = chr
def _get_basechar_unicode(name):
codepoint = filter_lists.get_unicode_by_name(name)
if codepoint is not None:
return codepoint;
if '_' in name:
# use the first ligature component as base char
return _get_basechar_unicode(name.split('_')[0])
if '.' in name:
return _get_basechar_unicode(name.split('.')[0])
return None
def _sortkey_namelist_entries(entry):
codepoint, name, _, _ = entry
base_codepoint = None
extension = None
if name == 'NULL':
codepoint = -float('inf')
if name:
if '.' in name:
extension = name.split('.')[1]
base_codepoint = _get_basechar_unicode(name)
return tuple([
codepoint if codepoint is not None else float('inf')
# keep glyphs of one OT-Feature sticking together
, extension
, base_codepoint if base_codepoint is not None else float('inf')
, name
])
def reformat_namelist(filename, out=None):
if out is None:
out = sys.stdout
if filename == '-':
_reformat_namelist(codecs.getreader('utf8')(sys.stdin), out)
return
with codecs.open(filename, 'r', encoding='utf-8') as f:
_reformat_namelist(f, out)
def _reformat_namelist(f, out=None):
entries = []
before = []
header = []
for line in f:
line = line.rstrip()
if (not entries and not before) and line.startswith('#'):
header.append(line)
continue
entry = None
if line.startswith('0x'):
# uni chr
codepoint = google_fonts.get_codepoint_from_line(line)
entry = (codepoint, None, line)
elif line.startswith(' '):
# unencoded name
name = filter_lists.translate_name(line.rsplit(' ', 1)[1])
entry = (None, name, line)
if entry is not None:
entry += (before, )
before = []
entries.append(entry)
else:
# these lines will stick before the next entry
before.append(line)
entries.sort(key=_sortkey_namelist_entries)
_print = lambda *args: print(*args,file=out)
map(_print, header)
for codepoint, name, original, item_before in entries:
map(_print, item_before)
if codepoint is not None:
_print(format_codepoint(codepoint))
elif name is not None:
_print((' '*9 + name))
# output left over lines at the end of the file
map(_print, before)
def _names_generator(filename):
with codecs.open(filename, 'r', encoding='utf-8') as f:
for line in f:
line = line.rstrip()
if line.startswith('0x'):
# uni chr
codepoint = google_fonts.get_codepoint_from_line(line)
name = filter_lists.get_name_by_unicode(codepoint)
if name is None:
prefix = 'u' if codepoint > 0xFFFF else 'uni'
name = '{0}{1:04X}'.format(prefix, codepoint)
yield name
elif line.startswith(' ' * 6):
# unencoded name
yield line.rsplit(' ', 1)[1]
def _mkdir(path):
try:
os.makedirs(path)
except OSError as exc:
if not os.path.isdir(path):
raise exc
def generate_filter_lists(filename):
# 'GF-{script}-rest.nam' => {script}-rest
basename = os.path.basename(filename).split('.', 1)[0].split('-', 2)[-1]
filerListFileName = '{0}.txt'.format(basename)
dirname = os.path.dirname(filename)
nice_names_filename = os.path.join(dirname, 'filter lists', 'nice names', filerListFileName)
prod_names_filename = os.path.join(dirname, 'filter lists', 'uni names', filerListFileName)
_mkdir(os.path.dirname(nice_names_filename))
_mkdir(os.path.dirname(prod_names_filename))
with codecs.open(nice_names_filename, 'w', encoding='utf-8') as niceNamesFile, \
codecs.open(prod_names_filename, 'w', encoding='utf-8') as prodNamesFile:
for name in _names_generator(filename):
print(filter_lists.translate_name(name, production_name=False), file=niceNamesFile)
print(filter_lists.translate_name(name, production_name=True), file=prodNamesFile)
def _format_codepoint(codepoint):
if 0xE000 <= codepoint <= 0xF8FF:
item_description = 'PRIVATE USE AREA U+{0:04X}'.format(codepoint)
char = ' '
elif codepoint == 0x000D:
# Special case, this only happens in Latin-core.
# FIXME: we should consider remover CR from Latin-core
item_description = 'CR'
char = ' '
else:
item_description = Unicode[codepoint]
char = unichr(codepoint)
return ('0x{0:04X}'.format(codepoint)
, char
, item_description)
def format_codepoint(codepoint):
return ' '.join(_format_codepoint(codepoint))
def namelist_from_font(file_name, out=None):
if out is None:
out = sys.stdout
excluded_chars = ["????", "SPACE", "NO-BREAK SPACE"]
font = TTFont(file_name)
charcodes = set()
for cmap in font["cmap"].tables:
if not cmap.isUnicode():
continue
charcodes.update(cp for cp,name in cmap.cmap.items())
charcodes = sorted(charcodes)
for charcode in charcodes:
hexchar, char, item_description = _format_codepoint(charcode)
if item_description not in excluded_chars:
string = "{} {} {}".format(hexchar, char, item_description)
print(string, file=out)
return
font.close()
def main(*args):
if args[0] == 'reformat':
reformat_namelist(args[1])
elif args[0] == 'generate-filter-lists':
generate_filter_lists(args[1])
else:
namelist_from_font(args[0])
if __name__ == '__main__':
main(*sys.argv[1:])