Skip to content

Commit

Permalink
Merge 1be3499 into 2e29a40
Browse files Browse the repository at this point in the history
  • Loading branch information
dscorbett committed Mar 15, 2020
2 parents 2e29a40 + 1be3499 commit f147d30
Show file tree
Hide file tree
Showing 21 changed files with 3,541 additions and 3,220 deletions.
6 changes: 5 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,11 @@ unicode-tables: \
arabic-table \
emoji-table \
indic-table \
os2-table \
tag-table \
ucd-table \
use-table \
emoji-table \
vowel-constraints \
$(NULL)

arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
Expand All @@ -277,6 +278,9 @@ emoji-table: gen-emoji-table.py emoji-data.txt
indic-table: gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-indic-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-indic-table.cc; false)
os2-table: ./gen-os2-unicode-ranges.py OS2UnicodeRanges.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-os2-unicode-ranges.hh \
|| ($(RM) $(srcdir)/hb-ot-os2-unicode-ranges.hh; false)
tag-table: gen-tag-table.py languagetags language-subtag-registry
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-tag-table.hh \
|| ($(RM) $(srcdir)/hb-ot-tag-table.hh; false)
Expand Down
169 changes: 169 additions & 0 deletions src/OS2UnicodeRanges.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
0 Basic Latin 0000-007F
1 Latin-1 Supplement 0080-00FF
2 Latin Extended-A 0100-017F
3 Latin Extended-B 0180-024F
4 IPA Extensions 0250-02AF
Phonetic Extensions 1D00-1D7F
Phonetic Extensions Supplement 1D80-1DBF
5 Spacing Modifier Letters 02B0-02FF
Modifier Tone Letters A700-A71F
6 Combining Diacritical Marks 0300-036F
Combining Diacritical Marks Supplement 1DC0-1DFF
7 Greek and Coptic 0370-03FF
8 Coptic 2C80-2CFF
9 Cyrillic 0400-04FF
Cyrillic Supplement 0500-052F
Cyrillic Extended-A 2DE0-2DFF
Cyrillic Extended-B A640-A69F
10 Armenian 0530-058F
11 Hebrew 0590-05FF
12 Vai A500-A63F
13 Arabic 0600-06FF
Arabic Supplement 0750-077F
14 NKo 07C0-07FF
15 Devanagari 0900-097F
16 Bengali 0980-09FF
17 Gurmukhi 0A00-0A7F
18 Gujarati 0A80-0AFF
19 Oriya 0B00-0B7F
20 Tamil 0B80-0BFF
21 Telugu 0C00-0C7F
22 Kannada 0C80-0CFF
23 Malayalam 0D00-0D7F
24 Thai 0E00-0E7F
25 Lao 0E80-0EFF
26 Georgian 10A0-10FF
Georgian Supplement 2D00-2D2F
27 Balinese 1B00-1B7F
28 Hangul Jamo 1100-11FF
29 Latin Extended Additional 1E00-1EFF
Latin Extended-C 2C60-2C7F
Latin Extended-D A720-A7FF
30 Greek Extended 1F00-1FFF
31 General Punctuation 2000-206F
Supplemental Punctuation 2E00-2E7F
32 Superscripts And Subscripts 2070-209F
33 Currency Symbols 20A0-20CF
34 Combining Diacritical Marks For Symbols 20D0-20FF
35 Letterlike Symbols 2100-214F
36 Number Forms 2150-218F
37 Arrows 2190-21FF
Supplemental Arrows-A 27F0-27FF
Supplemental Arrows-B 2900-297F
Miscellaneous Symbols and Arrows 2B00-2BFF
38 Mathematical Operators 2200-22FF
Supplemental Mathematical Operators 2A00-2AFF
Miscellaneous Mathematical Symbols-A 27C0-27EF
Miscellaneous Mathematical Symbols-B 2980-29FF
39 Miscellaneous Technical 2300-23FF
40 Control Pictures 2400-243F
41 Optical Character Recognition 2440-245F
42 Enclosed Alphanumerics 2460-24FF
43 Box Drawing 2500-257F
44 Block Elements 2580-259F
45 Geometric Shapes 25A0-25FF
46 Miscellaneous Symbols 2600-26FF
47 Dingbats 2700-27BF
48 CJK Symbols And Punctuation 3000-303F
49 Hiragana 3040-309F
50 Katakana 30A0-30FF
Katakana Phonetic Extensions 31F0-31FF
51 Bopomofo 3100-312F
Bopomofo Extended 31A0-31BF
52 Hangul Compatibility Jamo 3130-318F
53 Phags-pa A840-A87F
54 Enclosed CJK Letters And Months 3200-32FF
55 CJK Compatibility 3300-33FF
56 Hangul Syllables AC00-D7AF
57 Non-Plane 0 10000-10FFFF
58 Phoenician 10900-1091F
59 CJK Unified Ideographs 4E00-9FFF
CJK Radicals Supplement 2E80-2EFF
Kangxi Radicals 2F00-2FDF
Ideographic Description Characters 2FF0-2FFF
CJK Unified Ideographs Extension A 3400-4DBF
CJK Unified Ideographs Extension B 20000-2A6DF
Kanbun 3190-319F
60 Private Use Area (plane 0) E000-F8FF
61 CJK Strokes 31C0-31EF
CJK Compatibility Ideographs F900-FAFF
CJK Compatibility Ideographs Supplement 2F800-2FA1F
62 Alphabetic Presentation Forms FB00-FB4F
63 Arabic Presentation Forms-A FB50-FDFF
64 Combining Half Marks FE20-FE2F
65 Vertical Forms FE10-FE1F
CJK Compatibility Forms FE30-FE4F
66 Small Form Variants FE50-FE6F
67 Arabic Presentation Forms-B FE70-FEFF
68 Halfwidth And Fullwidth Forms FF00-FFEF
69 Specials FFF0-FFFF
70 Tibetan 0F00-0FFF
71 Syriac 0700-074F
72 Thaana 0780-07BF
73 Sinhala 0D80-0DFF
74 Myanmar 1000-109F
75 Ethiopic 1200-137F
Ethiopic Supplement 1380-139F
Ethiopic Extended 2D80-2DDF
76 Cherokee 13A0-13FF
77 Unified Canadian Aboriginal Syllabics 1400-167F
78 Ogham 1680-169F
79 Runic 16A0-16FF
80 Khmer 1780-17FF
Khmer Symbols 19E0-19FF
81 Mongolian 1800-18AF
82 Braille Patterns 2800-28FF
83 Yi Syllables A000-A48F
Yi Radicals A490-A4CF
84 Tagalog 1700-171F
Hanunoo 1720-173F
Buhid 1740-175F
Tagbanwa 1760-177F
85 Old Italic 10300-1032F
86 Gothic 10330-1034F
87 Deseret 10400-1044F
88 Byzantine Musical Symbols 1D000-1D0FF
Musical Symbols 1D100-1D1FF
Ancient Greek Musical Notation 1D200-1D24F
89 Mathematical Alphanumeric Symbols 1D400-1D7FF
90 Private Use (plane 15) F0000-FFFFD
Private Use (plane 16) 100000-10FFFD
91 Variation Selectors FE00-FE0F
Variation Selectors Supplement E0100-E01EF
92 Tags E0000-E007F
93 Limbu 1900-194F
94 Tai Le 1950-197F
95 New Tai Lue 1980-19DF
96 Buginese 1A00-1A1F
97 Glagolitic 2C00-2C5F
98 Tifinagh 2D30-2D7F
99 Yijing Hexagram Symbols 4DC0-4DFF
100 Syloti Nagri A800-A82F
101 Linear B Syllabary 10000-1007F
Linear B Ideograms 10080-100FF
Aegean Numbers 10100-1013F
102 Ancient Greek Numbers 10140-1018F
103 Ugaritic 10380-1039F
104 Old Persian 103A0-103DF
105 Shavian 10450-1047F
106 Osmanya 10480-104AF
107 Cypriot Syllabary 10800-1083F
108 Kharoshthi 10A00-10A5F
109 Tai Xuan Jing Symbols 1D300-1D35F
110 Cuneiform 12000-123FF
Cuneiform Numbers and Punctuation 12400-1247F
111 Counting Rod Numerals 1D360-1D37F
112 Sundanese 1B80-1BBF
113 Lepcha 1C00-1C4F
114 Ol Chiki 1C50-1C7F
115 Saurashtra A880-A8DF
116 Kayah Li A900-A92F
117 Rejang A930-A95F
118 Cham AA00-AA5F
119 Ancient Symbols 10190-101CF
120 Phaistos Disc 101D0-101FF
121 Carian 102A0-102DF
Lycian 10280-1029F
Lydian 10920-1093F
122 Domino Tiles 1F030-1F09F
Mahjong Tiles 1F000-1F02F
4 changes: 2 additions & 2 deletions src/gen-arabic-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
if len (sys.argv) != 4:
print ("""usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
Input files, as of Unicode 12:
Input files:
* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt
* https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
Expand Down Expand Up @@ -65,7 +65,7 @@ def print_joining_table(f):
values[u] = value

short_value = {}
for value in set([v for v in values.values()] + ['JOINING_TYPE_X']):
for value in sorted (set ([v for v in values.values ()] + ['JOINING_TYPE_X'])):
short = ''.join(x[0] for x in value.split('_')[2:])
assert short not in short_value.values()
short_value[value] = short
Expand Down
4 changes: 2 additions & 2 deletions src/gen-emoji-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
if len (sys.argv) != 2:
print("""usage: ./gen-emoji-table.py emoji-data.txt
Input file, as of Unicode 12:
* https://www.unicode.org/Public/emoji/12.0/emoji-data.txt""", file=sys.stderr)
Input file:
* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt""", file=sys.stderr)
sys.exit (1)

f = open(sys.argv[1])
Expand Down
2 changes: 1 addition & 1 deletion src/gen-indic-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
if len (sys.argv) != 4:
print ("""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
Input files, as of Unicode 12:
Input files:
* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt""", file=sys.stderr)
Expand Down
46 changes: 45 additions & 1 deletion src/gen-os2-unicode-ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,32 @@
except NameError:
pass # Python 3

print ("""static OS2Range _hb_os2_unicode_ranges[] =
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following table is generated by running:")
print (" *")
print (" * ./gen-os2-unicode-ranges.py OS2UnicodeRanges.txt")
print (" */")
print ()
print ("#ifndef HB_OT_OS2_UNICODE_RANGES_HH")
print ("#define HB_OT_OS2_UNICODE_RANGES_HH")
print ()
print ('#include "hb.hh"')
print ()
print ("namespace OT {")
print ()
print ("struct OS2Range")
print ("{")
print (" int cmp (hb_codepoint_t key) const")
print (" { return (key < start) ? -1 : key <= end ? 0 : +1; }")
print ()
print (" hb_codepoint_t start;")
print (" hb_codepoint_t end;")
print (" unsigned int bit;")
print ("};")
print ()
print ("/* Note: The contents of this array was generated using gen-os2-unicode-ranges.py. */")
print ("""static const OS2Range _hb_os2_unicode_ranges[] =
{""")

args = sys.argv[1:]
Expand Down Expand Up @@ -53,3 +78,22 @@
print (" {%s, %s, %s}, // %s" % (start, end, bit, ranges[3]))

print ("""};""")
print ()
print ("/**")
print (" * _hb_ot_os2_get_unicode_range_bit:")
print (" * Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint.")
print (" **/")
print ("static unsigned int")
print ("_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp)")
print ("{")
print (" auto* range = hb_bsearch (cp, _hb_os2_unicode_ranges, ARRAY_LENGTH (_hb_os2_unicode_ranges));")
print (" if (range != nullptr)")
print (" return range->bit;")
print (" return -1;")
print ("}")
print ("")
print ("} /* namespace OT */")
print ()
print ("#endif /* HB_OT_OS2_UNICODE_RANGES_HH */")
print ()
print ("/* == End of generated table == */")
2 changes: 1 addition & 1 deletion src/gen-tag-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def write (s):
if len (sys.argv) != 3:
print ('''usage: ./gen-tag-table.py languagetags language-subtag-registry
Input files, as of Unicode 12:
Input files:
* https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
* https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry''', file=sys.stderr)
sys.exit (1)
Expand Down
2 changes: 1 addition & 1 deletion src/gen-ucd-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
if len (sys.argv) not in (2, 3):
print("""usage: ./gen-ucd-table ucd.nounihan.grouped.xml [/path/to/hb-common.h]
Input file, as of Unicode 12:
Input file:
* https://unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip""", file=sys.stderr)
sys.exit(1)

Expand Down
5 changes: 3 additions & 2 deletions src/gen-use-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
if len (sys.argv) != 5:
print ("""usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
Input file, as of Unicode 12:
Input file:
* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
* https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
Expand Down Expand Up @@ -302,7 +302,7 @@ def is_VOWEL_MOD(U, UISC, UGC):
},
'M': {
'Abv': [Top],
'Blw': [Bottom, Bottom_And_Left],
'Blw': [Bottom, Bottom_And_Left, Bottom_And_Right],
'Pst': [Right],
'Pre': [Left],
},
Expand Down Expand Up @@ -399,6 +399,7 @@ def map_to_use(data):
if 0x1CF8 <= U <= 0x1CF9: UIPC = Top

assert (UIPC in [Not_Applicable, Visual_Order_Left] or
USE == 'R' or
USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)

pos_mapping = use_positions.get(USE, None)
Expand Down
2 changes: 1 addition & 1 deletion src/gen-vowel-constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def write (s):
if len (sys.argv) != 3:
print ("""usage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt
Input file, as of Unicode 12:
Input file:
* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt""", file=sys.stderr)
sys.exit (1)

Expand Down
8 changes: 8 additions & 0 deletions src/hb-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,14 @@ typedef enum
/*12.0*/HB_SCRIPT_NYIAKENG_PUACHUE_HMONG = HB_TAG ('H','m','n','p'),
/*12.0*/HB_SCRIPT_WANCHO = HB_TAG ('W','c','h','o'),

/*
* Since REPLACEME
*/
/*13.0*/HB_SCRIPT_CHORASMIAN = HB_TAG ('C','h','r','s'),
/*13.0*/HB_SCRIPT_DIVES_AKURU = HB_TAG ('D','i','a','k'),
/*13.0*/HB_SCRIPT_KHITAN_SMALL_SCRIPT = HB_TAG ('K','i','t','s'),
/*13.0*/HB_SCRIPT_YEZIDI = HB_TAG ('Y','e','z','i'),

/* No script set. */
HB_SCRIPT_INVALID = HB_TAG_NONE,

Expand Down
Loading

0 comments on commit f147d30

Please sign in to comment.